diff --git a/api/v1beta1/types.go b/api/v1beta1/types.go index abaf2e40dbde..705059b59388 100644 --- a/api/v1beta1/types.go +++ b/api/v1beta1/types.go @@ -873,3 +873,14 @@ type UserManagedBootDiagnostics struct { // +kubebuilder:validation:MaxLength=1024 StorageAccountURI string `json:"storageAccountURI"` } + +// OrchestrationModeType represents the orchestration mode for a Virtual Machine Scale Set backing an AzureMachinePool. +// +kubebuilder:validation:Enum=Flexible;Uniform +type OrchestrationModeType string + +const ( + // FlexibleOrchestrationMode treats VMs as individual resources accessible by standard VM APIs. + FlexibleOrchestrationMode OrchestrationModeType = "Flexible" + // UniformOrchestrationMode treats VMs as identical instances accessible by the VMSS VM API. + UniformOrchestrationMode OrchestrationModeType = "Uniform" +) diff --git a/azure/converters/vmss.go b/azure/converters/vmss.go index 9863e637fca6..4c72ae1be32a 100644 --- a/azure/converters/vmss.go +++ b/azure/converters/vmss.go @@ -62,6 +62,38 @@ func SDKToVMSS(sdkvmss compute.VirtualMachineScaleSet, sdkinstances []compute.Vi return vmss } +// SDKVMToVMSSVM converts an Azure SDK VM to a VMSS VM. +func SDKVMToVMSSVM(sdkInstance compute.VirtualMachine) *azure.VMSSVM { + instance := azure.VMSSVM{ + ID: to.String(sdkInstance.ID), + } + + if sdkInstance.VirtualMachineProperties == nil { + return &instance + } + + instance.State = infrav1.Creating + if sdkInstance.ProvisioningState != nil { + instance.State = infrav1.ProvisioningState(to.String(sdkInstance.ProvisioningState)) + } + + if sdkInstance.OsProfile != nil && sdkInstance.OsProfile.ComputerName != nil { + instance.Name = *sdkInstance.OsProfile.ComputerName + } + + if sdkInstance.StorageProfile != nil && sdkInstance.StorageProfile.ImageReference != nil { + imageRef := sdkInstance.StorageProfile.ImageReference + instance.Image = SDKImageToImage(imageRef, sdkInstance.Plan != nil) + } + + if sdkInstance.Zones != nil && len(*sdkInstance.Zones) > 0 { + // An instance should have only 1 zone, so use the first item of the slice. + instance.AvailabilityZone = to.StringSlice(sdkInstance.Zones)[0] + } + + return &instance +} + // SDKToVMSSVM converts an Azure SDK VirtualMachineScaleSetVM into an infrav1exp.VMSSVM. func SDKToVMSSVM(sdkInstance compute.VirtualMachineScaleSetVM) *azure.VMSSVM { // Convert resourceGroup Name ID ( ProviderID in capz objects ) @@ -117,3 +149,11 @@ func SDKImageToImage(sdkImageRef *compute.ImageReference, isThirdPartyImage bool }, } } + +// GetOrchestrationMode returns the compute.OrchestrationMode for the given infrav1.OrchestrationModeType. +func GetOrchestrationMode(modeType infrav1.OrchestrationModeType) compute.OrchestrationMode { + if modeType == infrav1.FlexibleOrchestrationMode { + return compute.OrchestrationModeFlexible + } + return compute.OrchestrationModeUniform +} diff --git a/azure/converters/vmss_test.go b/azure/converters/vmss_test.go index 57c383c77be0..604a0df28174 100644 --- a/azure/converters/vmss_test.go +++ b/azure/converters/vmss_test.go @@ -267,3 +267,102 @@ func Test_SDKImageToImage(t *testing.T) { }) } } + +func Test_SDKVMToVMSSVM(t *testing.T) { + cases := []struct { + Name string + Subject compute.VirtualMachine + Expected *azure.VMSSVM + }{ + { + Name: "minimal VM", + Subject: compute.VirtualMachine{ + ID: to.StringPtr("vmID1"), + }, + Expected: &azure.VMSSVM{ + ID: "vmID1", + }, + }, + { + Name: "VM with zones", + Subject: compute.VirtualMachine{ + ID: to.StringPtr("vmID2"), + VirtualMachineProperties: &compute.VirtualMachineProperties{ + OsProfile: &compute.OSProfile{ + ComputerName: to.StringPtr("vmwithzones"), + }, + }, + Zones: to.StringSlicePtr([]string{"zone0", "zone1"}), + }, + Expected: &azure.VMSSVM{ + ID: "vmID2", + Name: "vmwithzones", + State: "Creating", + AvailabilityZone: "zone0", + }, + }, + { + Name: "VM with storage", + Subject: compute.VirtualMachine{ + ID: to.StringPtr("vmID3"), + VirtualMachineProperties: &compute.VirtualMachineProperties{ + OsProfile: &compute.OSProfile{ + ComputerName: to.StringPtr("vmwithstorage"), + }, + StorageProfile: &compute.StorageProfile{ + ImageReference: &compute.ImageReference{ + ID: to.StringPtr("imageID"), + }, + }, + }, + }, + Expected: &azure.VMSSVM{ + ID: "vmID3", + Image: infrav1.Image{ + ID: to.StringPtr("imageID"), + Marketplace: &infrav1.AzureMarketplaceImage{}, + }, + Name: "vmwithstorage", + State: "Creating", + }, + }, + { + Name: "VM with provisioning state", + Subject: compute.VirtualMachine{ + ID: to.StringPtr("vmID4"), + VirtualMachineProperties: &compute.VirtualMachineProperties{ + OsProfile: &compute.OSProfile{ + ComputerName: to.StringPtr("vmwithstate"), + }, + ProvisioningState: to.StringPtr("Succeeded"), + }, + }, + Expected: &azure.VMSSVM{ + ID: "vmID4", + Name: "vmwithstate", + State: "Succeeded", + }, + }, + } + + for _, c := range cases { + c := c + t.Run(c.Name, func(t *testing.T) { + t.Parallel() + g := gomega.NewGomegaWithT(t) + subject := converters.SDKVMToVMSSVM(c.Subject) + g.Expect(subject).To(gomega.Equal(c.Expected)) + }) + } +} + +func Test_GetOrchestrationMode(t *testing.T) { + g := gomega.NewGomegaWithT(t) + + g.Expect(converters.GetOrchestrationMode(infrav1.FlexibleOrchestrationMode)). + To(gomega.Equal(compute.OrchestrationModeFlexible)) + g.Expect(converters.GetOrchestrationMode(infrav1.UniformOrchestrationMode)). + To(gomega.Equal(compute.OrchestrationModeUniform)) + g.Expect(converters.GetOrchestrationMode("invalid")). + To(gomega.Equal(compute.OrchestrationModeUniform)) +} diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index 8262eadff4a3..5a575734133c 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -19,8 +19,11 @@ package scope import ( "context" "encoding/base64" + "fmt" + "strings" "time" + azureautorest "github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/to" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -136,6 +139,7 @@ func (m *MachinePoolScope) ScaleSetSpec() azure.ScaleSetSpec { FailureDomains: m.MachinePool.Spec.FailureDomains, TerminateNotificationTimeout: m.AzureMachinePool.Spec.Template.TerminateNotificationTimeout, NetworkInterfaces: m.AzureMachinePool.Spec.Template.NetworkInterfaces, + OrchestrationMode: m.AzureMachinePool.Spec.OrchestrationMode, } } @@ -339,17 +343,18 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er } func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSSVM) error { - if machine.InstanceID == "" { - return errors.New("machine.InstanceID must not be empty") - } + ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.createMachine") + defer done() - if machine.Name == "" { - return errors.New("machine.Name must not be empty") + parsed, err := azureautorest.ParseResourceID(machine.ID) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("failed to parse resource id %q", machine.ID)) } + instanceID := strings.ReplaceAll(parsed.ResourceName, "_", "-") ampm := infrav1exp.AzureMachinePoolMachine{ ObjectMeta: metav1.ObjectMeta{ - Name: m.AzureMachinePool.Name + "-" + machine.InstanceID, + Name: m.AzureMachinePool.Name + "-" + instanceID, Namespace: m.AzureMachinePool.Namespace, OwnerReferences: []metav1.OwnerReference{ { diff --git a/azure/services/scalesets/scalesets.go b/azure/services/scalesets/scalesets.go index 891cd2bb0362..b0ce282c5e2b 100644 --- a/azure/services/scalesets/scalesets.go +++ b/azure/services/scalesets/scalesets.go @@ -277,10 +277,21 @@ func (s *Service) patchVMSSIfNeeded(ctx context.Context, infraVMSS *azure.VMSS) } hasModelChanges := hasModelModifyingDifferences(infraVMSS, vmss) - if maxSurge > 0 && (hasModelChanges || !infraVMSS.HasEnoughLatestModelOrNotMixedModel()) { + var isFlex bool + for _, instance := range infraVMSS.Instances { + if instance.IsFlex() { + isFlex = true + break + } + } + updated := true + if !isFlex { + updated = infraVMSS.HasEnoughLatestModelOrNotMixedModel() + } + if maxSurge > 0 && (hasModelChanges || !updated) { // surge capacity with the intention of lowering during instance reconciliation surge := spec.Capacity + int64(maxSurge) - log.V(4).Info("surging...", "surge", surge) + log.V(4).Info("surging...", "surge", surge, "hasModelChanges", hasModelChanges, "updated", updated) patch.Sku.Capacity = to.Int64Ptr(surge) } @@ -468,6 +479,7 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet return compute.VirtualMachineScaleSet{}, err } + orchestrationMode := converters.GetOrchestrationMode(s.Scope.ScaleSetSpec().OrchestrationMode) vmss := compute.VirtualMachineScaleSet{ Location: to.StringPtr(s.Scope.Location()), Sku: &compute.Sku{ @@ -478,11 +490,8 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet Zones: to.StringSlicePtr(vmssSpec.FailureDomains), Plan: s.generateImagePlan(ctx), VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{ + OrchestrationMode: orchestrationMode, SinglePlacementGroup: to.BoolPtr(false), - UpgradePolicy: &compute.UpgradePolicy{ - Mode: compute.UpgradeModeManual, - }, - Overprovision: to.BoolPtr(false), VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{ OsProfile: osProfile, StorageProfile: storageProfile, @@ -523,6 +532,20 @@ func (s *Service) buildVMSSFromSpec(ctx context.Context, vmssSpec azure.ScaleSet }, } + // Set properties specific to VMSS orchestration mode + switch orchestrationMode { + case compute.OrchestrationModeUniform: + vmss.VirtualMachineScaleSetProperties.Overprovision = to.BoolPtr(false) + vmss.VirtualMachineScaleSetProperties.UpgradePolicy = &compute.UpgradePolicy{Mode: compute.UpgradeModeManual} + case compute.OrchestrationModeFlexible: + vmss.VirtualMachineScaleSetProperties.VirtualMachineProfile.NetworkProfile.NetworkAPIVersion = + compute.NetworkAPIVersionTwoZeroTwoZeroHyphenMinusOneOneHyphenMinusZeroOne + vmss.VirtualMachineScaleSetProperties.PlatformFaultDomainCount = to.Int32Ptr(1) + if len(vmssSpec.FailureDomains) > 1 { + vmss.VirtualMachineScaleSetProperties.PlatformFaultDomainCount = to.Int32Ptr(int32(len(vmssSpec.FailureDomains))) + } + } + // Use custom NIC definitions in VMSS if set if len(vmssSpec.NetworkInterfaces) > 0 { nicConfigs := []compute.VirtualMachineScaleSetNetworkConfiguration{} diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index 06c84adf070b..5ca9b2497ee0 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -1267,7 +1267,8 @@ func newDefaultVMSS(vmSize string) compute.VirtualMachineScaleSet { UpgradePolicy: &compute.UpgradePolicy{ Mode: compute.UpgradeModeManual, }, - Overprovision: to.BoolPtr(false), + Overprovision: to.BoolPtr(false), + OrchestrationMode: compute.OrchestrationModeUniform, VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{ OsProfile: &compute.VirtualMachineScaleSetOSProfile{ ComputerNamePrefix: to.StringPtr(defaultVMSSName), diff --git a/azure/services/scalesetvms/mock_scalesetvms/scalesetvms_mock.go b/azure/services/scalesetvms/mock_scalesetvms/scalesetvms_mock.go index 0df504dfd9ac..d7af6996ae40 100644 --- a/azure/services/scalesetvms/mock_scalesetvms/scalesetvms_mock.go +++ b/azure/services/scalesetvms/mock_scalesetvms/scalesetvms_mock.go @@ -261,6 +261,20 @@ func (mr *MockScaleSetVMScopeMockRecorder) Location() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Location", reflect.TypeOf((*MockScaleSetVMScope)(nil).Location)) } +// ProviderID mocks base method. +func (m *MockScaleSetVMScope) ProviderID() string { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "ProviderID") + ret0, _ := ret[0].(string) + return ret0 +} + +// ProviderID indicates an expected call of ProviderID. +func (mr *MockScaleSetVMScopeMockRecorder) ProviderID() *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ProviderID", reflect.TypeOf((*MockScaleSetVMScope)(nil).ProviderID)) +} + // ResourceGroup mocks base method. func (m *MockScaleSetVMScope) ResourceGroup() string { m.ctrl.T.Helper() diff --git a/azure/services/scalesetvms/scalesetvms.go b/azure/services/scalesetvms/scalesetvms.go index 076374652919..462251e8bbbb 100644 --- a/azure/services/scalesetvms/scalesetvms.go +++ b/azure/services/scalesetvms/scalesetvms.go @@ -18,12 +18,17 @@ package scalesetvms import ( "context" + "fmt" + "strings" "time" + azureautorest "github.com/Azure/go-autorest/autorest/azure" + "github.com/go-logr/logr" "github.com/pkg/errors" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" "sigs.k8s.io/cluster-api-provider-azure/azure/converters" + "sigs.k8s.io/cluster-api-provider-azure/azure/services/virtualmachines" "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) @@ -35,22 +40,25 @@ type ( azure.ClusterDescriber azure.AsyncStatusUpdater InstanceID() string + ProviderID() string ScaleSetName() string SetVMSSVM(vmssvm *azure.VMSSVM) } // Service provides operations on Azure resources. Service struct { - Client client - Scope ScaleSetVMScope + Client client + VMClient virtualmachines.Client + Scope ScaleSetVMScope } ) // NewService creates a new service. func NewService(scope ScaleSetVMScope) *Service { return &Service{ - Client: newClient(scope), - Scope: scope, + Client: newClient(scope), + VMClient: virtualmachines.NewClient(scope), + Scope: scope, } } @@ -61,16 +69,34 @@ func (s *Service) Name() string { // Reconcile idempotently gets, creates, and updates a scale set. func (s *Service) Reconcile(ctx context.Context) error { - ctx, _, done := tele.StartSpanWithLogger(ctx, "scalesetvms.Service.Reconcile") + ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesetvms.Service.Reconcile") defer done() var ( resourceGroup = s.Scope.ResourceGroup() vmssName = s.Scope.ScaleSetName() instanceID = s.Scope.InstanceID() + providerID = s.Scope.ProviderID() ) - // fetch the latest data about the instance -- model mutations are handled by the AzureMachinePoolReconciler + // Fetch the latest instance or VM data. AzureMachinePoolReconciler handles model mutations. + if isFlex(instanceID) { + resourceID := strings.TrimPrefix(providerID, azure.ProviderIDPrefix) + log.V(4).Info("VMSS is flex", "vmssName", vmssName, "providerID", providerID, "resourceID", resourceID) + // Using VMSS Flex, so fetch by resource ID. + vm, err := s.VMClient.GetByID(ctx, resourceID) + if err != nil { + if azure.ResourceNotFound(err) { + return azure.WithTransientError(errors.New("vm does not exist yet"), 30*time.Second) + } + return errors.Wrap(err, "failed getting vm") + } + s.Scope.SetVMSSVM(converters.SDKVMToVMSSVM(vm)) + return nil + } + + log.V(4).Info("VMSS is uniform", "vmssName", vmssName, "providerID", providerID, "instanceID", instanceID) + // Using VMSS Uniform, so fetch by instance ID. instance, err := s.Client.Get(ctx, resourceGroup, vmssName, instanceID) if err != nil { if azure.ResourceNotFound(err) { @@ -89,6 +115,7 @@ func (s *Service) Delete(ctx context.Context) error { resourceGroup = s.Scope.ResourceGroup() vmssName = s.Scope.ScaleSetName() instanceID = s.Scope.InstanceID() + providerID = s.Scope.ProviderID() ) ctx, log, done := tele.StartSpanWithLogger( @@ -100,6 +127,89 @@ func (s *Service) Delete(ctx context.Context) error { ) defer done() + if isFlex(instanceID) { + return s.deleteVMSSFlexVM(ctx, strings.TrimPrefix(providerID, azure.ProviderIDPrefix)) + } + return s.deleteVMSSUniformInstance(ctx, resourceGroup, vmssName, instanceID, log) +} + +func isFlex(instanceID string) bool { + return instanceID == "" +} + +func (s *Service) deleteVMSSFlexVM(ctx context.Context, resourceID string) error { + ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesetvms.Service.deleteVMSSFlexVM") + defer done() + + defer func() { + if vm, err := s.VMClient.GetByID(ctx, resourceID); err == nil && vm.VirtualMachineProperties != nil { + log.V(4).Info("vmss vm delete in progress", "state", vm.ProvisioningState) + s.Scope.SetVMSSVM(converters.SDKVMToVMSSVM(vm)) + } + }() + + parsed, err := azureautorest.ParseResourceID(resourceID) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("failed to parse resource id %q", resourceID)) + } + + resourceGroup := parsed.ResourceGroup + resourceName := strings.TrimPrefix(s.Scope.ProviderID(), azure.ProviderIDPrefix) + resourceNameSplits := strings.Split(resourceName, "/") + resourceName = resourceNameSplits[len(resourceNameSplits)-3] + "_" + resourceNameSplits[len(resourceNameSplits)-1] + + log.V(4).Info("entering delete") + future := s.Scope.GetLongRunningOperationState(resourceName, serviceName, infrav1.DeleteFuture) + if future != nil { + if future.Type != infrav1.DeleteFuture { + return azure.WithTransientError(errors.New("attempting to delete, non-delete operation in progress"), 30*time.Second) + } + + log.V(4).Info("checking if the vm is done deleting") + if _, err := s.VMClient.GetResultIfDone(ctx, future); err != nil { + // fetch vm to update status + return errors.Wrap(err, "failed to get result of long running operation") + } + + // there was no error in fetching the result, the future has been completed + log.V(4).Info("successfully deleted the vm") + s.Scope.DeleteLongRunningOperationState(resourceName, serviceName, infrav1.DeleteFuture) + return nil + } + // since the future was nil, there is no ongoing activity; start deleting the vm + log.V(4).Info("vmss delete vm future is nil") // This is always true + + vmGetter := &VMSSFlexVMGetter{ + Name: resourceName, + ResourceGroup: resourceGroup, + } + + sdkFuture, err := s.VMClient.DeleteAsync(ctx, vmGetter) + if err != nil { + if azure.ResourceNotFound(err) { + // already deleted + return nil + } + return errors.Wrapf(err, "failed to delete vm %s/%s", resourceGroup, resourceName) + } + + if sdkFuture != nil { + future, err = converters.SDKToFuture(sdkFuture, infrav1.DeleteFuture, serviceName, vmGetter.ResourceName(), vmGetter.ResourceGroupName()) + if err != nil { + return errors.Wrapf(err, "failed to convert SDK to Future %s/%s", resourceGroup, resourceName) + } + s.Scope.SetLongRunningOperationState(future) + return nil + } + + s.Scope.DeleteLongRunningOperationState(resourceName, serviceName, infrav1.DeleteFuture) + return nil +} + +func (s *Service) deleteVMSSUniformInstance(ctx context.Context, resourceGroup string, vmssName string, instanceID string, log logr.Logger) error { + ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesetvms.Service.deleteVMSSUniformInstance") + defer done() + defer func() { if instance, err := s.Client.Get(ctx, resourceGroup, vmssName, instanceID); err == nil && instance.VirtualMachineScaleSetVMProperties != nil { log.V(4).Info("updating vmss vm state", "state", instance.ProvisioningState) @@ -110,6 +220,10 @@ func (s *Service) Delete(ctx context.Context) error { log.V(4).Info("entering delete") future := s.Scope.GetLongRunningOperationState(instanceID, serviceName, infrav1.DeleteFuture) if future != nil { + if future.Type != infrav1.DeleteFuture { + return azure.WithTransientError(errors.New("attempting to delete, non-delete operation in progress"), 30*time.Second) + } + log.V(4).Info("checking if the instance is done deleting") if _, err := s.Client.GetResultIfDone(ctx, future); err != nil { // fetch instance to update status @@ -143,3 +257,32 @@ func (s *Service) Delete(ctx context.Context) error { s.Scope.DeleteLongRunningOperationState(instanceID, serviceName, infrav1.DeleteFuture) return nil } + +// VMSSFlexVMGetter gets the information required to create, update, or delete an Azure resource. +type VMSSFlexVMGetter struct { + Name string + ResourceGroup string +} + +// ResourceName returns the name of the resource. +func (vm *VMSSFlexVMGetter) ResourceName() string { + return vm.Name +} + +// OwnerResourceName returns the name of the resource that owns this Azure subresource. +func (vm *VMSSFlexVMGetter) OwnerResourceName() string { + return "" +} + +// ResourceGroupName returns the name of the resource group the resource is in. +func (vm *VMSSFlexVMGetter) ResourceGroupName() string { + return vm.ResourceGroup +} + +// Parameters takes the existing resource and returns the desired parameters of the resource. +// If the resource does not exist, or we do not care about existing parameters to update the resource, existing should be `nil`. +// If no update is needed on the resource, Parameters should return `nil`. +// NOTE: Not yet implemented, see kubernetes-sigs/cluster-api-provider-azure#2720. +func (vm *VMSSFlexVMGetter) Parameters(ctx context.Context, existing interface{}) (params interface{}, err error) { + return nil, nil +} diff --git a/azure/services/scalesetvms/scalesetvms_test.go b/azure/services/scalesetvms/scalesetvms_test.go index 0728b0c805b8..cb206ab83fe9 100644 --- a/azure/services/scalesetvms/scalesetvms_test.go +++ b/azure/services/scalesetvms/scalesetvms_test.go @@ -103,6 +103,7 @@ func TestService_Reconcile(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") vm := compute.VirtualMachineScaleSetVM{ InstanceID: to.StringPtr("0"), @@ -116,6 +117,7 @@ func TestService_Reconcile(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") m.Get(gomock2.AContext(), "rg", "scaleset", "0").Return(compute.VirtualMachineScaleSetVM{}, autorest404) }, @@ -127,6 +129,7 @@ func TestService_Reconcile(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") m.Get(gomock2.AContext(), "rg", "scaleset", "0").Return(compute.VirtualMachineScaleSetVM{}, errors.New("boom")) }, @@ -144,9 +147,9 @@ func TestService_Reconcile(t *testing.T) { ) defer mockCtrl.Finish() - scopeMock.EXPECT().SubscriptionID().Return("subID") - scopeMock.EXPECT().BaseURI().Return("https://localhost/") - scopeMock.EXPECT().Authorizer().Return(nil) + scopeMock.EXPECT().SubscriptionID().Return("subID").AnyTimes() + scopeMock.EXPECT().BaseURI().Return("https://localhost/").AnyTimes() + scopeMock.EXPECT().Authorizer().Return(nil).AnyTimes() service := NewService(scopeMock) service.Client = clientMock @@ -177,6 +180,7 @@ func TestService_Delete(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") s.GetLongRunningOperationState("0", serviceName, infrav1.DeleteFuture).Return(nil) future := &infrav1.Future{ @@ -197,6 +201,7 @@ func TestService_Delete(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") future := &infrav1.Future{ Type: infrav1.DeleteFuture, @@ -212,6 +217,7 @@ func TestService_Delete(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") s.GetLongRunningOperationState("0", serviceName, infrav1.DeleteFuture).Return(nil) m.DeleteAsync(gomock2.AContext(), "rg", "scaleset", "0").Return(nil, autorest404) @@ -223,6 +229,7 @@ func TestService_Delete(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") s.GetLongRunningOperationState("0", serviceName, infrav1.DeleteFuture).Return(nil) m.DeleteAsync(gomock2.AContext(), "rg", "scaleset", "0").Return(nil, errors.New("boom")) @@ -235,6 +242,7 @@ func TestService_Delete(t *testing.T) { Setup: func(s *mock_scalesetvms.MockScaleSetVMScopeMockRecorder, m *mock_scalesetvms.MockclientMockRecorder) { s.ResourceGroup().Return("rg") s.InstanceID().Return("0") + s.ProviderID().Return("foo") s.ScaleSetName().Return("scaleset") future := &infrav1.Future{ Type: infrav1.DeleteFuture, @@ -257,9 +265,9 @@ func TestService_Delete(t *testing.T) { ) defer mockCtrl.Finish() - scopeMock.EXPECT().SubscriptionID().Return("subID") - scopeMock.EXPECT().BaseURI().Return("https://localhost/") - scopeMock.EXPECT().Authorizer().Return(nil) + scopeMock.EXPECT().SubscriptionID().Return("subID").AnyTimes() + scopeMock.EXPECT().BaseURI().Return("https://localhost/").AnyTimes() + scopeMock.EXPECT().Authorizer().Return(nil).AnyTimes() service := NewService(scopeMock) service.Client = clientMock diff --git a/azure/services/virtualmachines/client.go b/azure/services/virtualmachines/client.go index b50bf6ef9b7d..ccc20869c005 100644 --- a/azure/services/virtualmachines/client.go +++ b/azure/services/virtualmachines/client.go @@ -18,7 +18,10 @@ package virtualmachines import ( "context" + "encoding/base64" "encoding/json" + "fmt" + "time" "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute" "github.com/Azure/go-autorest/autorest" @@ -31,11 +34,35 @@ import ( "sigs.k8s.io/cluster-api-provider-azure/util/tele" ) -// AzureClient contains the Azure go-sdk Client. -type AzureClient struct { - virtualmachines compute.VirtualMachinesClient +type ( + // AzureClient contains the Azure go-sdk Client. + AzureClient struct { + virtualmachines compute.VirtualMachinesClient + } + + // Client provides operations on Azure virtual machine resources. + Client interface { + Get(context.Context, azure.ResourceSpecGetter) (interface{}, error) + GetByID(context.Context, string) (compute.VirtualMachine, error) + CreateOrUpdateAsync(ctx context.Context, spec azure.ResourceSpecGetter, parameters interface{}) (result interface{}, future azureautorest.FutureAPI, err error) + DeleteAsync(ctx context.Context, spec azure.ResourceSpecGetter) (future azureautorest.FutureAPI, err error) + IsDone(ctx context.Context, future azureautorest.FutureAPI) (isDone bool, err error) + Result(ctx context.Context, future azureautorest.FutureAPI, futureType string) (result interface{}, err error) + GetResultIfDone(ctx context.Context, future *infrav1.Future) (compute.VirtualMachine, error) + } +) + +type genericVMFuture interface { + DoneWithContext(ctx context.Context, sender autorest.Sender) (done bool, err error) + Result(client compute.VirtualMachinesClient) (vm compute.VirtualMachine, err error) } +type deleteFutureAdapter struct { + compute.VirtualMachinesDeleteFuture +} + +var _ Client = &AzureClient{} + // NewClient creates a new VM client from subscription ID. func NewClient(auth azure.Authorizer) *AzureClient { c := newVirtualMachinesClient(auth.SubscriptionID(), auth.BaseURI(), auth.Authorizer()) @@ -57,6 +84,21 @@ func (ac *AzureClient) Get(ctx context.Context, spec azure.ResourceSpecGetter) ( return ac.virtualmachines.Get(ctx, spec.ResourceGroupName(), spec.ResourceName(), "") } +// GetByID retrieves information about the model or instance view of a virtual machine. +func (ac *AzureClient) GetByID(ctx context.Context, resourceID string) (compute.VirtualMachine, error) { + ctx, log, done := tele.StartSpanWithLogger(ctx, "virtualmachines.AzureClient.GetByID") + defer done() + + parsed, err := azureautorest.ParseResourceID(resourceID) + if err != nil { + return compute.VirtualMachine{}, errors.Wrap(err, fmt.Sprintf("failed parsing the VM resource id %q", resourceID)) + } + + log.V(4).Info("parsed VM resourceID", "parsed", parsed) + + return ac.virtualmachines.Get(ctx, parsed.ResourceGroup, parsed.ResourceName, "") +} + // CreateOrUpdateAsync creates or updates a virtual machine asynchronously. // It sends a PUT request to Azure and if accepted without error, the func will return a Future which can be used to track the ongoing // progress of the operation. @@ -169,3 +211,51 @@ func (ac *AzureClient) Result(ctx context.Context, future azureautorest.FutureAP return nil, errors.Errorf("unknown future type %q", futureType) } } + +// GetResultIfDone fetches the result of a long-running operation future if it is done. +func (ac *AzureClient) GetResultIfDone(ctx context.Context, future *infrav1.Future) (compute.VirtualMachine, error) { + ctx, _, spanDone := tele.StartSpanWithLogger(ctx, "virtualmachines.AzureClient.GetResultIfDone") + defer spanDone() + + var genericFuture genericVMFuture + futureData, err := base64.URLEncoding.DecodeString(future.Data) + if err != nil { + return compute.VirtualMachine{}, errors.Wrapf(err, "failed to base64 decode future data") + } + + switch future.Type { + case infrav1.DeleteFuture: + var future compute.VirtualMachinesDeleteFuture + if err := json.Unmarshal(futureData, &future); err != nil { + return compute.VirtualMachine{}, errors.Wrap(err, "failed to unmarshal future data") + } + + genericFuture = &deleteFutureAdapter{ + VirtualMachinesDeleteFuture: future, + } + default: + return compute.VirtualMachine{}, errors.Errorf("unknown future type %q", future.Type) + } + + done, err := genericFuture.DoneWithContext(ctx, ac.virtualmachines) + if err != nil { + return compute.VirtualMachine{}, errors.Wrapf(err, "failed checking if the operation was complete") + } + + if !done { + return compute.VirtualMachine{}, azure.WithTransientError(azure.NewOperationNotDoneError(future), 15*time.Second) + } + + vm, err := genericFuture.Result(ac.virtualmachines) + if err != nil { + return vm, errors.Wrapf(err, "failed fetching the result of operation for vm") + } + + return vm, nil +} + +// Result wraps result of a delete so it can be treated generically, when only the success or error is important. +func (da *deleteFutureAdapter) Result(client compute.VirtualMachinesClient) (compute.VirtualMachine, error) { + _, err := da.VirtualMachinesDeleteFuture.Result(client) + return compute.VirtualMachine{}, err +} diff --git a/azure/services/virtualmachines/mock_virtualmachines/client_mock.go b/azure/services/virtualmachines/mock_virtualmachines/client_mock.go index 62350954dba5..d1efe0ca1235 100644 --- a/azure/services/virtualmachines/mock_virtualmachines/client_mock.go +++ b/azure/services/virtualmachines/mock_virtualmachines/client_mock.go @@ -19,3 +19,197 @@ limitations under the License. // Package mock_virtualmachines is a generated GoMock package. package mock_virtualmachines + +import ( + context "context" + reflect "reflect" + + compute "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute" + autorest "github.com/Azure/go-autorest/autorest" + azure "github.com/Azure/go-autorest/autorest/azure" + gomock "github.com/golang/mock/gomock" + v1beta1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" + azure0 "sigs.k8s.io/cluster-api-provider-azure/azure" +) + +// MockClient is a mock of Client interface. +type MockClient struct { + ctrl *gomock.Controller + recorder *MockClientMockRecorder +} + +// MockClientMockRecorder is the mock recorder for MockClient. +type MockClientMockRecorder struct { + mock *MockClient +} + +// NewMockClient creates a new mock instance. +func NewMockClient(ctrl *gomock.Controller) *MockClient { + mock := &MockClient{ctrl: ctrl} + mock.recorder = &MockClientMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockClient) EXPECT() *MockClientMockRecorder { + return m.recorder +} + +// CreateOrUpdateAsync mocks base method. +func (m *MockClient) CreateOrUpdateAsync(ctx context.Context, spec azure0.ResourceSpecGetter, parameters interface{}) (interface{}, azure.FutureAPI, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateOrUpdateAsync", ctx, spec, parameters) + ret0, _ := ret[0].(interface{}) + ret1, _ := ret[1].(azure.FutureAPI) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 +} + +// CreateOrUpdateAsync indicates an expected call of CreateOrUpdateAsync. +func (mr *MockClientMockRecorder) CreateOrUpdateAsync(ctx, spec, parameters interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateOrUpdateAsync", reflect.TypeOf((*MockClient)(nil).CreateOrUpdateAsync), ctx, spec, parameters) +} + +// DeleteAsync mocks base method. +func (m *MockClient) DeleteAsync(ctx context.Context, spec azure0.ResourceSpecGetter) (azure.FutureAPI, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteAsync", ctx, spec) + ret0, _ := ret[0].(azure.FutureAPI) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DeleteAsync indicates an expected call of DeleteAsync. +func (mr *MockClientMockRecorder) DeleteAsync(ctx, spec interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteAsync", reflect.TypeOf((*MockClient)(nil).DeleteAsync), ctx, spec) +} + +// Get mocks base method. +func (m *MockClient) Get(arg0 context.Context, arg1 azure0.ResourceSpecGetter) (interface{}, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Get", arg0, arg1) + ret0, _ := ret[0].(interface{}) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Get indicates an expected call of Get. +func (mr *MockClientMockRecorder) Get(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Get", reflect.TypeOf((*MockClient)(nil).Get), arg0, arg1) +} + +// GetByID mocks base method. +func (m *MockClient) GetByID(arg0 context.Context, arg1 string) (compute.VirtualMachine, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetByID", arg0, arg1) + ret0, _ := ret[0].(compute.VirtualMachine) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetByID indicates an expected call of GetByID. +func (mr *MockClientMockRecorder) GetByID(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetByID", reflect.TypeOf((*MockClient)(nil).GetByID), arg0, arg1) +} + +// GetResultIfDone mocks base method. +func (m *MockClient) GetResultIfDone(ctx context.Context, future *v1beta1.Future) (compute.VirtualMachine, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "GetResultIfDone", ctx, future) + ret0, _ := ret[0].(compute.VirtualMachine) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// GetResultIfDone indicates an expected call of GetResultIfDone. +func (mr *MockClientMockRecorder) GetResultIfDone(ctx, future interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "GetResultIfDone", reflect.TypeOf((*MockClient)(nil).GetResultIfDone), ctx, future) +} + +// IsDone mocks base method. +func (m *MockClient) IsDone(ctx context.Context, future azure.FutureAPI) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "IsDone", ctx, future) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// IsDone indicates an expected call of IsDone. +func (mr *MockClientMockRecorder) IsDone(ctx, future interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "IsDone", reflect.TypeOf((*MockClient)(nil).IsDone), ctx, future) +} + +// Result mocks base method. +func (m *MockClient) Result(ctx context.Context, future azure.FutureAPI, futureType string) (interface{}, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Result", ctx, future, futureType) + ret0, _ := ret[0].(interface{}) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Result indicates an expected call of Result. +func (mr *MockClientMockRecorder) Result(ctx, future, futureType interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Result", reflect.TypeOf((*MockClient)(nil).Result), ctx, future, futureType) +} + +// MockgenericVMFuture is a mock of genericVMFuture interface. +type MockgenericVMFuture struct { + ctrl *gomock.Controller + recorder *MockgenericVMFutureMockRecorder +} + +// MockgenericVMFutureMockRecorder is the mock recorder for MockgenericVMFuture. +type MockgenericVMFutureMockRecorder struct { + mock *MockgenericVMFuture +} + +// NewMockgenericVMFuture creates a new mock instance. +func NewMockgenericVMFuture(ctrl *gomock.Controller) *MockgenericVMFuture { + mock := &MockgenericVMFuture{ctrl: ctrl} + mock.recorder = &MockgenericVMFutureMockRecorder{mock} + return mock +} + +// EXPECT returns an object that allows the caller to indicate expected use. +func (m *MockgenericVMFuture) EXPECT() *MockgenericVMFutureMockRecorder { + return m.recorder +} + +// DoneWithContext mocks base method. +func (m *MockgenericVMFuture) DoneWithContext(ctx context.Context, sender autorest.Sender) (bool, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DoneWithContext", ctx, sender) + ret0, _ := ret[0].(bool) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// DoneWithContext indicates an expected call of DoneWithContext. +func (mr *MockgenericVMFutureMockRecorder) DoneWithContext(ctx, sender interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DoneWithContext", reflect.TypeOf((*MockgenericVMFuture)(nil).DoneWithContext), ctx, sender) +} + +// Result mocks base method. +func (m *MockgenericVMFuture) Result(client compute.VirtualMachinesClient) (compute.VirtualMachine, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "Result", client) + ret0, _ := ret[0].(compute.VirtualMachine) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// Result indicates an expected call of Result. +func (mr *MockgenericVMFutureMockRecorder) Result(client interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Result", reflect.TypeOf((*MockgenericVMFuture)(nil).Result), client) +} diff --git a/azure/types.go b/azure/types.go index c1dc9b2f05b2..0de3aef2c1a7 100644 --- a/azure/types.go +++ b/azure/types.go @@ -18,6 +18,7 @@ package azure import ( "reflect" + "strings" "github.com/google/go-cmp/cmp" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" @@ -67,6 +68,7 @@ type ScaleSetSpec struct { FailureDomains []string VMExtensions []infrav1.VMExtension NetworkInterfaces []infrav1.NetworkInterface + OrchestrationMode infrav1.OrchestrationModeType } // TagsSpec defines the specification for a set of tags. @@ -137,9 +139,24 @@ func (vmss VMSS) InstancesByProviderID() map[string]VMSSVM { // ProviderID returns the K8s provider ID for the VMSS instance. func (vm VMSSVM) ProviderID() string { + if vm.IsFlex() { + // ProviderID for Flex scaleset VMs looks like this: + // azure:///subscriptions//resourceGroups/my-cluster/providers/Microsoft.Compute/virtualMachines/my-cluster_1234abcd + splitOnSlash := strings.Split(vm.ID, "/") + elems := splitOnSlash[:len(splitOnSlash)-4] + elems = append(elems, splitOnSlash[len(splitOnSlash)-2:]...) + return ProviderIDPrefix + strings.Join(elems, "/") + } + // ProviderID for Uniform scaleset VMs looks like this: + // azure:///subscriptions//resourceGroups/my-cluster/providers/Microsoft.Compute/virtualMachineScaleSets/my-cluster-mp-0/virtualMachines/0 return ProviderIDPrefix + vm.ID } +// IsFlex returns true if the VMSS instance is a Flex VM. +func (vm VMSSVM) IsFlex() bool { + return vm.InstanceID == "" +} + // HasLatestModelAppliedToAll returns true if all VMSS instance have the latest model applied. func (vmss VMSS) HasLatestModelAppliedToAll() bool { for _, instance := range vmss.Instances { diff --git a/azure/types_test.go b/azure/types_test.go index 5c9a0904a85c..d72d3b2878e3 100644 --- a/azure/types_test.go +++ b/azure/types_test.go @@ -163,3 +163,35 @@ func getDefaultVMSSForModelTesting() VMSS { }, } } + +func TestIsFlex(t *testing.T) { + cases := []struct { + Name string + VM VMSSVM + IsFlex bool + }{ + { + Name: "default empty VMSSVM", + VM: VMSSVM{}, + IsFlex: true, + }, + { + Name: "VMSSVM with an instance ID", + VM: VMSSVM{InstanceID: "instance-id"}, + IsFlex: false, + }, + { + Name: "VMSSVM with empty instance ID", + VM: VMSSVM{InstanceID: ""}, + IsFlex: true, + }, + } + + for _, c := range cases { + c := c + t.Run(c.Name, func(t *testing.T) { + g := NewWithT(t) + g.Expect(c.VM.IsFlex()).To(Equal(c.IsFlex)) + }) + } +} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepoolmachines.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepoolmachines.yaml index f7e842e5e2db..eab638aadbd0 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepoolmachines.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepoolmachines.yaml @@ -299,7 +299,6 @@ spec: Scale Set type: string required: - - instanceID - providerID type: object status: @@ -461,8 +460,6 @@ spec: version: description: Version defines the Kubernetes version for the VM Instance type: string - required: - - latestModelApplied type: object type: object served: true diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml index 9a559e5de54b..7098135d1ddf 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml @@ -1380,6 +1380,14 @@ spec: meaning that the node can be drained without any time limitations. NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`' type: string + orchestrationMode: + default: Uniform + description: OrchestrationMode specifies the orchestration mode for + the Virtual Machine Scale Set + enum: + - Flexible + - Uniform + type: string providerID: description: ProviderID is the identification ID of the Virtual Machine Scale Set diff --git a/controllers/helpers.go b/controllers/helpers.go index b83e00527f77..50514f868b5a 100644 --- a/controllers/helpers.go +++ b/controllers/helpers.go @@ -38,11 +38,13 @@ import ( "sigs.k8s.io/cluster-api-provider-azure/azure/scope" "sigs.k8s.io/cluster-api-provider-azure/azure/services/groups" infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" + "sigs.k8s.io/cluster-api-provider-azure/feature" "sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing" "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" "sigs.k8s.io/cluster-api-provider-azure/util/tele" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + capifeature "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" @@ -198,6 +200,16 @@ func GetCloudProviderSecret(d azure.ClusterScoper, namespace, name string, owner controlPlaneConfig, workerNodeConfig = newCloudProviderConfig(d) } + // Enable VMSS Flexible nodes if MachinePools are enabled + if feature.Gates.Enabled(capifeature.MachinePool) { + if controlPlaneConfig != nil && controlPlaneConfig.VMType == "vmss" { + controlPlaneConfig.EnableVmssFlexNodes = true + } + if workerNodeConfig != nil && workerNodeConfig.VMType == "vmss" { + workerNodeConfig.EnableVmssFlexNodes = true + } + } + controlPlaneData, err := json.MarshalIndent(controlPlaneConfig, "", " ") if err != nil { return nil, errors.Wrap(err, "failed control plane json marshal") @@ -315,6 +327,7 @@ type CloudProviderConfig struct { MaximumLoadBalancerRuleCount int `json:"maximumLoadBalancerRuleCount"` UseManagedIdentityExtension bool `json:"useManagedIdentityExtension"` UseInstanceMetadata bool `json:"useInstanceMetadata"` + EnableVmssFlexNodes bool `json:"enableVmssFlexNodes,omitempty"` UserAssignedIdentityID string `json:"userAssignedIdentityID,omitempty"` CloudProviderRateLimitConfig BackOffConfig diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 17e924a3906e..9bad5aacb06a 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -35,10 +35,13 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" clientgoscheme "k8s.io/client-go/kubernetes/scheme" + utilfeature "k8s.io/component-base/featuregate/testing" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure/scope" "sigs.k8s.io/cluster-api-provider-azure/internal/test/mock_log" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/feature" + capifeature "sigs.k8s.io/cluster-api/feature" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -97,6 +100,7 @@ func TestGetCloudProviderConfig(t *testing.T) { azureCluster *infrav1.AzureCluster identityType infrav1.VMIdentity identityID string + machinePoolFeature bool expectedControlPlaneConfig string expectedWorkerNodeConfig string }{ @@ -143,6 +147,14 @@ func TestGetCloudProviderConfig(t *testing.T) { expectedControlPlaneConfig: backOffCloudConfig, expectedWorkerNodeConfig: backOffCloudConfig, }, + "with machinepools": { + cluster: cluster, + azureCluster: azureCluster, + identityType: infrav1.VMIdentityNone, + machinePoolFeature: true, + expectedControlPlaneConfig: vmssCloudConfig, + expectedWorkerNodeConfig: vmssCloudConfig, + }, } os.Setenv(auth.ClientID, "fooClient") @@ -151,6 +163,9 @@ func TestGetCloudProviderConfig(t *testing.T) { for name, tc := range cases { t.Run(name, func(t *testing.T) { + if tc.machinePoolFeature { + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, capifeature.MachinePool, true)() + } initObjects := []runtime.Object{tc.cluster, tc.azureCluster} fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithRuntimeObjects(initObjects...).Build() @@ -676,6 +691,27 @@ const ( "cloudProviderBackoffExponent": 1.2000000000000002, "cloudProviderBackoffDuration": 60, "cloudProviderBackoffJitter": 1.2000000000000002 +}` + vmssCloudConfig = `{ + "cloud": "AzurePublicCloud", + "tenantId": "fooTenant", + "subscriptionId": "baz", + "aadClientId": "fooClient", + "aadClientSecret": "fooSecret", + "resourceGroup": "bar", + "securityGroupName": "foo-node-nsg", + "securityGroupResourceGroup": "bar", + "location": "bar", + "vmType": "vmss", + "vnetName": "foo-vnet", + "vnetResourceGroup": "bar", + "subnetName": "foo-node-subnet", + "routeTableName": "foo-node-routetable", + "loadBalancerSku": "Standard", + "maximumLoadBalancerRuleCount": 250, + "useManagedIdentityExtension": false, + "useInstanceMetadata": true, + "enableVmssFlexNodes": true }` ) diff --git a/docs/book/src/topics/machinepools.md b/docs/book/src/topics/machinepools.md index fc7aa3359e70..8dfcd5d83d4b 100644 --- a/docs/book/src/topics/machinepools.md +++ b/docs/book/src/topics/machinepools.md @@ -33,10 +33,27 @@ ## AzureMachinePool Cluster API Provider Azure (CAPZ) has experimental support for `MachinePool` through the infrastructure -type `AzureMachinePool` and `AzureMachinePoolMachine`. An `AzureMachinePool` corresponds to an -[Azure Virtual Machine Scale Set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/overview), -which provides the cloud provider specific resource for orchestrating a group of Virtual Machines. The -`AzureMachinePoolMachine` corresponds to a virtual machine instance within the Virtual Machine Scale Set. +types `AzureMachinePool` and `AzureMachinePoolMachine`. An `AzureMachinePool` corresponds to a +[Virtual Machine Scale Set](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/overview) (VMSS), +which provides the cloud provider-specific resource for orchestrating a group of Virtual Machines. The +`AzureMachinePoolMachine` corresponds to a virtual machine instance within the VMSS. + +### Orchestration Modes + +Azure Virtual Machine Scale Sets support two orchestration modes: `Uniform` and `Flexible`. CAPZ defaults to `Uniform` mode. See [VMSS Orchestration modes in Azure](https://learn.microsoft.com/azure/virtual-machine-scale-sets/virtual-machine-scale-sets-orchestration-modes) for more information. + +To use `Flexible` mode requires Kubernetes v1.26.0 or later with a workload cluster template like CAPZ's "external-cloud-provider-machinepool" flavor. Ensure that `orchestrationMode` on the `AzureMachinePool` spec is set: + +```yaml +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachinePool +metadata: + name: capz-mp-0 +spec: + orchestrationMode: Flexible +``` + +Then, after applying the template to start provisioning, install the [cloud-provider-azure Helm chart](https://github.com/kubernetes-sigs/cloud-provider-azure/tree/master/helm/cloud-provider-azure#readme) to the workload cluster. ### Safe Rolling Upgrades and Delete Policy `AzureMachinePools` provides the ability to safely deploy new versions of Kubernetes, or more generally, changes to the diff --git a/exp/api/v1alpha3/azuremachinepool_conversion.go b/exp/api/v1alpha3/azuremachinepool_conversion.go index e3c5c4e1f3cb..ad8d8f663b66 100644 --- a/exp/api/v1alpha3/azuremachinepool_conversion.go +++ b/exp/api/v1alpha3/azuremachinepool_conversion.go @@ -105,6 +105,9 @@ func (src *AzureMachinePool) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.Template.Diagnostics = restored.Spec.Template.Diagnostics } + // Restore orchestration mode + dst.Spec.OrchestrationMode = restored.Spec.OrchestrationMode + return nil } diff --git a/exp/api/v1alpha3/zz_generated.conversion.go b/exp/api/v1alpha3/zz_generated.conversion.go index 49578edbf067..12f2cbb91589 100644 --- a/exp/api/v1alpha3/zz_generated.conversion.go +++ b/exp/api/v1alpha3/zz_generated.conversion.go @@ -523,6 +523,7 @@ func autoConvert_v1beta1_AzureMachinePoolSpec_To_v1alpha3_AzureMachinePoolSpec(i out.RoleAssignmentName = in.RoleAssignmentName // WARNING: in.Strategy requires manual conversion: does not exist in peer-type // WARNING: in.NodeDrainTimeout requires manual conversion: does not exist in peer-type + // WARNING: in.OrchestrationMode requires manual conversion: does not exist in peer-type return nil } diff --git a/exp/api/v1alpha4/azuremachinepool_conversion.go b/exp/api/v1alpha4/azuremachinepool_conversion.go index c68047ad3b90..163a4ec1f524 100644 --- a/exp/api/v1alpha4/azuremachinepool_conversion.go +++ b/exp/api/v1alpha4/azuremachinepool_conversion.go @@ -20,7 +20,7 @@ import ( unsafe "unsafe" "k8s.io/apimachinery/pkg/api/resource" - convert "k8s.io/apimachinery/pkg/conversion" + apiconversion "k8s.io/apimachinery/pkg/conversion" infrav1alpha4 "sigs.k8s.io/cluster-api-provider-azure/api/v1alpha4" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" @@ -65,6 +65,15 @@ func (src *AzureMachinePool) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.Template.Diagnostics = restored.Spec.Template.Diagnostics } + for i, r := range restored.Status.LongRunningOperationStates { + if r.Name == dst.Status.LongRunningOperationStates[i].Name { + dst.Status.LongRunningOperationStates[i].ServiceName = r.ServiceName + } + } + + // Restore orchestration mode + dst.Spec.OrchestrationMode = restored.Spec.OrchestrationMode + return nil } @@ -83,6 +92,11 @@ func (dst *AzureMachinePool) ConvertFrom(srcRaw conversion.Hub) error { return utilconversion.MarshalData(src, dst) } +// Convert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec converts a v1beta1 AzureMachinePool.Spec to a v1alpha4 AzureMachinePool.Spec. +func Convert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(in *infrav1exp.AzureMachinePoolSpec, out *AzureMachinePoolSpec, s apiconversion.Scope) error { + return autoConvert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(in, out, s) +} + // ConvertTo converts this AzureMachinePool to the Hub version (v1beta1). func (src *AzureMachinePoolList) ConvertTo(dstRaw conversion.Hub) error { dst := dstRaw.(*infrav1exp.AzureMachinePoolList) @@ -96,18 +110,18 @@ func (dst *AzureMachinePoolList) ConvertFrom(srcRaw conversion.Hub) error { } // Convert_v1beta1_AzureMachinePoolMachineTemplate_To_v1alpha4_AzureMachinePoolMachineTemplate converts an Azure Machine Pool Machine Template from v1beta1 to v1alpha4. -func Convert_v1beta1_AzureMachinePoolMachineTemplate_To_v1alpha4_AzureMachinePoolMachineTemplate(in *infrav1exp.AzureMachinePoolMachineTemplate, out *AzureMachinePoolMachineTemplate, s convert.Scope) error { +func Convert_v1beta1_AzureMachinePoolMachineTemplate_To_v1alpha4_AzureMachinePoolMachineTemplate(in *infrav1exp.AzureMachinePoolMachineTemplate, out *AzureMachinePoolMachineTemplate, s apiconversion.Scope) error { return autoConvert_v1beta1_AzureMachinePoolMachineTemplate_To_v1alpha4_AzureMachinePoolMachineTemplate(in, out, s) } // Convert_v1beta1_SpotVMOptions_To_v1alpha4_SpotVMOptions converts a SpotVMOptions from v1beta1 to v1alpha4. -func Convert_v1beta1_SpotVMOptions_To_v1alpha4_SpotVMOptions(in *infrav1.SpotVMOptions, out *infrav1alpha4.SpotVMOptions, s convert.Scope) error { +func Convert_v1beta1_SpotVMOptions_To_v1alpha4_SpotVMOptions(in *infrav1.SpotVMOptions, out *infrav1alpha4.SpotVMOptions, s apiconversion.Scope) error { out.MaxPrice = (*resource.Quantity)(unsafe.Pointer(in.MaxPrice)) return nil } // Convert_v1alpha4_SpotVMOptions_To_v1beta1_SpotVMOptions converts a SpotVMOptions from v1alpha4 to v1beta1. -func Convert_v1alpha4_SpotVMOptions_To_v1beta1_SpotVMOptions(in *infrav1alpha4.SpotVMOptions, out *infrav1.SpotVMOptions, s convert.Scope) error { +func Convert_v1alpha4_SpotVMOptions_To_v1beta1_SpotVMOptions(in *infrav1alpha4.SpotVMOptions, out *infrav1.SpotVMOptions, s apiconversion.Scope) error { out.MaxPrice = (*resource.Quantity)(unsafe.Pointer(in.MaxPrice)) return nil } diff --git a/exp/api/v1alpha4/azuremachinepoolmachine_conversion.go b/exp/api/v1alpha4/azuremachinepoolmachine_conversion.go index cecfdc1915f8..5591d3f53180 100644 --- a/exp/api/v1alpha4/azuremachinepoolmachine_conversion.go +++ b/exp/api/v1alpha4/azuremachinepoolmachine_conversion.go @@ -40,7 +40,13 @@ func (src *AzureMachinePoolMachine) ConvertTo(dstRaw conversion.Hub) error { // ConvertFrom converts from the Hub version (v1beta1) to this version. func (dst *AzureMachinePoolMachine) ConvertFrom(srcRaw conversion.Hub) error { src := srcRaw.(*infrav1exp.AzureMachinePoolMachine) - return Convert_v1beta1_AzureMachinePoolMachine_To_v1alpha4_AzureMachinePoolMachine(src, dst, nil) + + if err := Convert_v1beta1_AzureMachinePoolMachine_To_v1alpha4_AzureMachinePoolMachine(src, dst, nil); err != nil { + return err + } + + // Preserve Hub data on down-conversion. + return utilconversion.MarshalData(src, dst) } // ConvertTo converts this AzureMachinePoolMachineList to the Hub version (v1beta1). diff --git a/exp/api/v1alpha4/zz_generated.conversion.go b/exp/api/v1alpha4/zz_generated.conversion.go index a0c4cba197f0..45e618bb8d8b 100644 --- a/exp/api/v1alpha4/zz_generated.conversion.go +++ b/exp/api/v1alpha4/zz_generated.conversion.go @@ -154,11 +154,6 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } - if err := s.AddGeneratedConversionFunc((*v1beta1.AzureMachinePoolSpec)(nil), (*AzureMachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { - return Convert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(a.(*v1beta1.AzureMachinePoolSpec), b.(*AzureMachinePoolSpec), scope) - }); err != nil { - return err - } if err := s.AddGeneratedConversionFunc((*AzureMachinePoolStatus)(nil), (*v1beta1.AzureMachinePoolStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha4_AzureMachinePoolStatus_To_v1beta1_AzureMachinePoolStatus(a.(*AzureMachinePoolStatus), b.(*v1beta1.AzureMachinePoolStatus), scope) }); err != nil { @@ -339,6 +334,11 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddConversionFunc((*v1beta1.AzureMachinePoolSpec)(nil), (*AzureMachinePoolSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(a.(*v1beta1.AzureMachinePoolSpec), b.(*AzureMachinePoolSpec), scope) + }); err != nil { + return err + } if err := s.AddConversionFunc((*v1beta1.AzureManagedControlPlaneSpec)(nil), (*AzureManagedControlPlaneSpec)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_AzureManagedControlPlaneSpec_To_v1alpha4_AzureManagedControlPlaneSpec(a.(*v1beta1.AzureManagedControlPlaneSpec), b.(*AzureManagedControlPlaneSpec), scope) }); err != nil { @@ -785,14 +785,10 @@ func autoConvert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(i return err } out.NodeDrainTimeout = (*metav1.Duration)(unsafe.Pointer(in.NodeDrainTimeout)) + // WARNING: in.OrchestrationMode requires manual conversion: does not exist in peer-type return nil } -// Convert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec is an autogenerated conversion function. -func Convert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(in *v1beta1.AzureMachinePoolSpec, out *AzureMachinePoolSpec, s conversion.Scope) error { - return autoConvert_v1beta1_AzureMachinePoolSpec_To_v1alpha4_AzureMachinePoolSpec(in, out, s) -} - func autoConvert_v1alpha4_AzureMachinePoolStatus_To_v1beta1_AzureMachinePoolStatus(in *AzureMachinePoolStatus, out *v1beta1.AzureMachinePoolStatus, s conversion.Scope) error { out.Ready = in.Ready out.Replicas = in.Replicas diff --git a/exp/api/v1beta1/azuremachinepool_test.go b/exp/api/v1beta1/azuremachinepool_test.go index 6bbdfb7a7c0c..4b727778568c 100644 --- a/exp/api/v1beta1/azuremachinepool_test.go +++ b/exp/api/v1beta1/azuremachinepool_test.go @@ -218,7 +218,7 @@ func TestAzureMachinePool_Validate(t *testing.T) { defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, capifeature.MachinePool, true)() g := gomega.NewGomegaWithT(t) amp := c.Factory(g) - actualErr := amp.Validate(nil) + actualErr := amp.Validate(nil, nil) c.Expect(g, actualErr) }) } diff --git a/exp/api/v1beta1/azuremachinepool_types.go b/exp/api/v1beta1/azuremachinepool_types.go index 3fb52789aab3..3ed3e787041a 100644 --- a/exp/api/v1beta1/azuremachinepool_types.go +++ b/exp/api/v1beta1/azuremachinepool_types.go @@ -157,6 +157,10 @@ type ( // NOTE: NodeDrainTimeout is different from `kubectl drain --timeout` // +optional NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"` + + // OrchestrationMode specifies the orchestration mode for the Virtual Machine Scale Set + // +kubebuilder:default=Uniform + OrchestrationMode infrav1.OrchestrationModeType `json:"orchestrationMode,omitempty"` } // AzureMachinePoolDeploymentStrategyType is the type of deployment strategy employed to rollout a new version of diff --git a/exp/api/v1beta1/azuremachinepool_webhook.go b/exp/api/v1beta1/azuremachinepool_webhook.go index 526dbd5fb45a..52b637b8453b 100644 --- a/exp/api/v1beta1/azuremachinepool_webhook.go +++ b/exp/api/v1beta1/azuremachinepool_webhook.go @@ -17,19 +17,23 @@ limitations under the License. package v1beta1 import ( - "errors" + "context" "fmt" "reflect" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute" + "github.com/blang/semver" + "github.com/pkg/errors" "k8s.io/apimachinery/pkg/runtime" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/validation/field" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/feature" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" capifeature "sigs.k8s.io/cluster-api/feature" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/client" ) // SetupWebhookWithManager sets up and registers the webhook with the manager. @@ -41,10 +45,8 @@ func (amp *AzureMachinePool) SetupWebhookWithManager(mgr ctrl.Manager) error { // +kubebuilder:webhook:path=/mutate-infrastructure-cluster-x-k8s-io-v1beta1-azuremachinepool,mutating=true,failurePolicy=fail,groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,verbs=create;update,versions=v1beta1,name=default.azuremachinepool.infrastructure.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 -var _ webhook.Defaulter = &AzureMachinePool{} - // Default implements webhook.Defaulter so a webhook will be registered for the type. -func (amp *AzureMachinePool) Default() { +func (amp *AzureMachinePool) Default(client client.Client) { if err := amp.SetDefaultSSHPublicKey(); err != nil { ctrl.Log.WithName("AzureMachinePoolLogger").Error(err, "SetDefaultSshPublicKey failed") } @@ -55,10 +57,8 @@ func (amp *AzureMachinePool) Default() { // +kubebuilder:webhook:verbs=create;update,path=/validate-infrastructure-cluster-x-k8s-io-v1beta1-azuremachinepool,mutating=false,failurePolicy=fail,groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepools,versions=v1beta1,name=validation.azuremachinepool.infrastructure.cluster.x-k8s.io,sideEffects=None,admissionReviewVersions=v1;v1beta1 -var _ webhook.Validator = &AzureMachinePool{} - // ValidateCreate implements webhook.Validator so a webhook will be registered for the type. -func (amp *AzureMachinePool) ValidateCreate() error { +func (amp *AzureMachinePool) ValidateCreate(client client.Client) error { // NOTE: AzureMachinePool is behind MachinePool feature gate flag; the web hook // must prevent creating new objects in case the feature flag is disabled. if !feature.Gates.Enabled(capifeature.MachinePool) { @@ -67,27 +67,28 @@ func (amp *AzureMachinePool) ValidateCreate() error { "can be set only if the MachinePool feature flag is enabled", ) } - return amp.Validate(nil) + return amp.Validate(nil, client) } // ValidateUpdate implements webhook.Validator so a webhook will be registered for the type. -func (amp *AzureMachinePool) ValidateUpdate(old runtime.Object) error { - return amp.Validate(old) +func (amp *AzureMachinePool) ValidateUpdate(old runtime.Object, client client.Client) error { + return amp.Validate(old, client) } // ValidateDelete implements webhook.Validator so a webhook will be registered for the type. -func (amp *AzureMachinePool) ValidateDelete() error { +func (amp *AzureMachinePool) ValidateDelete(client.Client) error { return nil } // Validate the Azure Machine Pool and return an aggregate error. -func (amp *AzureMachinePool) Validate(old runtime.Object) error { +func (amp *AzureMachinePool) Validate(old runtime.Object, client client.Client) error { validators := []func() error{ amp.ValidateImage, amp.ValidateTerminateNotificationTimeout, amp.ValidateSSHKey, amp.ValidateUserAssignedIdentity, amp.ValidateDiagnostics, + amp.ValidateOrchestrationMode(client), amp.ValidateStrategy(), amp.ValidateSystemAssignedIdentity(old), amp.ValidateNetwork, @@ -242,3 +243,36 @@ func (amp *AzureMachinePool) ValidateDiagnostics() error { return nil } + +// ValidateOrchestrationMode validates requirements for the VMSS orchestration mode. +func (amp *AzureMachinePool) ValidateOrchestrationMode(c client.Client) func() error { + return func() error { + // Only Flexible orchestration mode requires validation. + if amp.Spec.OrchestrationMode == infrav1.OrchestrationModeType(compute.OrchestrationModeFlexible) { + // Find the owner MachinePool + ownerMachinePool := &expv1.MachinePool{} + key := client.ObjectKey{ + Namespace: amp.Namespace, + Name: amp.Name, + } + ctx := context.Background() + if err := c.Get(ctx, key, ownerMachinePool); err != nil { + return errors.Wrap(err, "failed to get owner MachinePool") + } + + // Kubernetes must be >= 1.26.0 for cloud-provider-azure Helm chart support. + if ownerMachinePool.Spec.Template.Spec.Version == nil { + return errors.New("could not find Kubernetes version in MachinePool") + } + k8sVersion, err := semver.ParseTolerant(*ownerMachinePool.Spec.Template.Spec.Version) + if err != nil { + return errors.Wrap(err, "failed to parse Kubernetes version") + } + if k8sVersion.LT(semver.MustParse("1.26.0")) { + return errors.New(fmt.Sprintf("specified Kubernetes version %s must be >= 1.26.0 for Flexible orchestration mode", k8sVersion)) + } + } + + return nil + } +} diff --git a/exp/api/v1beta1/azuremachinepool_webhook_test.go b/exp/api/v1beta1/azuremachinepool_webhook_test.go index b6acac14b3c8..90e0a7a0474d 100644 --- a/exp/api/v1beta1/azuremachinepool_webhook_test.go +++ b/exp/api/v1beta1/azuremachinepool_webhook_test.go @@ -17,11 +17,13 @@ limitations under the License. package v1beta1 import ( + "context" "crypto/rand" "crypto/rsa" "encoding/base64" "testing" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute" "github.com/Azure/go-autorest/autorest/to" guuid "github.com/google/uuid" . "github.com/onsi/gomega" @@ -31,7 +33,9 @@ import ( utilfeature "k8s.io/component-base/featuregate/testing" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/feature" + expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" capifeature "sigs.k8s.io/cluster-api/feature" + "sigs.k8s.io/controller-runtime/pkg/client" ) var ( @@ -50,6 +54,7 @@ func TestAzureMachinePool_ValidateCreate(t *testing.T) { tests := []struct { name string amp *AzureMachinePool + version string wantErr bool }{ { @@ -184,10 +189,24 @@ func TestAzureMachinePool_ValidateCreate(t *testing.T) { amp: createMachinePoolWithNetworkConfig("", []infrav1.NetworkInterface{{SubnetName: "testSubnet"}}), wantErr: false, }, + { + name: "azuremachinepool with Flexible orchestration mode", + amp: createMachinePoolWithOrchestrationMode(compute.OrchestrationModeFlexible), + version: "v1.26.0", + wantErr: false, + }, + { + name: "azuremachinepool with Flexible orchestration mode and invalid Kubernetes version", + amp: createMachinePoolWithOrchestrationMode(compute.OrchestrationModeFlexible), + version: "v1.25.6", + wantErr: true, + }, } + for _, tc := range tests { + client := mockClient{Version: tc.version} t.Run(tc.name, func(t *testing.T) { - err := tc.amp.ValidateCreate() + err := tc.amp.ValidateCreate(client) if tc.wantErr { g.Expect(err).To(HaveOccurred()) } else { @@ -197,6 +216,16 @@ func TestAzureMachinePool_ValidateCreate(t *testing.T) { } } +type mockClient struct { + client.Client + Version string +} + +func (m mockClient) Get(ctx context.Context, key client.ObjectKey, obj client.Object, opts ...client.GetOption) error { + obj.(*expv1.MachinePool).Spec.Template.Spec.Version = &m.Version + return nil +} + func TestAzureMachinePool_ValidateUpdate(t *testing.T) { // NOTE: AzureMachinePool is behind MachinePool feature gate flag; the web hook // must prevent creating new objects in case the feature flag is disabled. @@ -284,7 +313,7 @@ func TestAzureMachinePool_ValidateUpdate(t *testing.T) { } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { - err := tc.amp.ValidateUpdate(tc.oldAMP) + err := tc.amp.ValidateUpdate(tc.oldAMP, nil) if tc.wantErr { g.Expect(err).To(HaveOccurred()) } else { @@ -320,18 +349,18 @@ func TestAzureMachinePool_Default(t *testing.T) { RoleAssignmentName: "", }}} - roleAssignmentExistTest.amp.Default() + roleAssignmentExistTest.amp.Default(nil) g.Expect(roleAssignmentExistTest.amp.Spec.RoleAssignmentName).To(Equal(existingRoleAssignmentName)) - roleAssignmentEmptyTest.amp.Default() + roleAssignmentEmptyTest.amp.Default(nil) g.Expect(roleAssignmentEmptyTest.amp.Spec.RoleAssignmentName).To(Not(BeEmpty())) _, err := guuid.Parse(roleAssignmentEmptyTest.amp.Spec.RoleAssignmentName) g.Expect(err).To(Not(HaveOccurred())) - publicKeyExistTest.amp.Default() + publicKeyExistTest.amp.Default(nil) g.Expect(publicKeyExistTest.amp.Spec.Template.SSHPublicKey).To(Equal(existingPublicKey)) - publicKeyNotExistTest.amp.Default() + publicKeyNotExistTest.amp.Default(nil) g.Expect(publicKeyNotExistTest.amp.Spec.Template.SSHPublicKey).NotTo(BeEmpty()) } @@ -474,6 +503,14 @@ func createMachinePoolWithStrategy(strategy AzureMachinePoolDeploymentStrategy) } } +func createMachinePoolWithOrchestrationMode(mode compute.OrchestrationMode) *AzureMachinePool { + return &AzureMachinePool{ + Spec: AzureMachinePoolSpec{ + OrchestrationMode: infrav1.OrchestrationModeType(mode), + }, + } +} + func TestAzureMachinePool_ValidateCreateFailure(t *testing.T) { g := NewWithT(t) @@ -496,7 +533,7 @@ func TestAzureMachinePool_ValidateCreateFailure(t *testing.T) { for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { defer tc.deferFunc() - err := tc.amp.ValidateCreate() + err := tc.amp.ValidateCreate(nil) g.Expect(err).To(HaveOccurred()) }) } diff --git a/exp/api/v1beta1/azuremachinepoolmachine_types.go b/exp/api/v1beta1/azuremachinepoolmachine_types.go index b240c44b6e04..5648dd8bc3df 100644 --- a/exp/api/v1beta1/azuremachinepoolmachine_types.go +++ b/exp/api/v1beta1/azuremachinepoolmachine_types.go @@ -37,7 +37,8 @@ type ( ProviderID string `json:"providerID"` // InstanceID is the identification of the Machine Instance within the VMSS - InstanceID string `json:"instanceID"` + // +optional + InstanceID string `json:"instanceID,omitempty"` } // AzureMachinePoolMachineStatus defines the observed state of AzureMachinePoolMachine. @@ -90,7 +91,8 @@ type ( // LatestModelApplied indicates the instance is running the most up-to-date VMSS model. A VMSS model describes // the image version the VM is running. If the instance is not running the latest model, it means the instance // may not be running the version of Kubernetes the Machine Pool has specified and needs to be updated. - LatestModelApplied bool `json:"latestModelApplied"` + // +optional + LatestModelApplied bool `json:"latestModelApplied,omitempty"` // Ready is true when the provider resource is ready. // +optional diff --git a/main.go b/main.go index fd04641c345c..e58d558be243 100644 --- a/main.go +++ b/main.go @@ -504,12 +504,6 @@ func registerWebhooks(mgr manager.Manager) { setupLog.Error(err, "unable to create webhook", "webhook", "AzureClusterIdentity") os.Exit(1) } - // NOTE: AzureMachinePool is behind MachinePool feature gate flag; the webhook - // is going to prevent creating or updating new objects in case the feature flag is disabled - if err := (&infrav1exp.AzureMachinePool{}).SetupWebhookWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create webhook", "webhook", "AzureMachinePool") - os.Exit(1) - } if err := (&infrav1exp.AzureMachinePoolMachine{}).SetupWebhookWithManager(mgr); err != nil { setupLog.Error(err, "unable to create webhook", "webhook", "AzureMachinePoolMachine") @@ -524,6 +518,12 @@ func registerWebhooks(mgr manager.Manager) { } hookServer := mgr.GetWebhookServer() + hookServer.Register("/mutate-infrastructure-cluster-x-k8s-io-v1beta1-azuremachinepool", webhookutils.NewMutatingWebhook( + &infrav1exp.AzureMachinePool{}, mgr.GetClient(), + )) + hookServer.Register("/validate-infrastructure-cluster-x-k8s-io-v1beta1-azuremachinepool", webhookutils.NewValidatingWebhook( + &infrav1exp.AzureMachinePool{}, mgr.GetClient(), + )) hookServer.Register("/mutate-infrastructure-cluster-x-k8s-io-v1beta1-azuremanagedmachinepool", webhookutils.NewMutatingWebhook( &infrav1exp.AzureManagedMachinePool{}, mgr.GetClient(), )) diff --git a/templates/cluster-template-external-cloud-provider-machinepool.yaml b/templates/cluster-template-external-cloud-provider-machinepool.yaml new file mode 100644 index 000000000000..e9db35bbe151 --- /dev/null +++ b/templates/cluster-template-external-cloud-provider-machinepool.yaml @@ -0,0 +1,233 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 192.168.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: ${CLUSTER_NAME}-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureCluster + name: ${CLUSTER_NAME} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + identityRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureClusterIdentity + name: ${CLUSTER_IDENTITY_NAME} + location: ${AZURE_LOCATION} + networkSpec: + subnets: + - name: control-plane-subnet + role: control-plane + - name: node-subnet + natGateway: + name: node-natgateway + role: node + vnet: + name: ${AZURE_VNET_NAME:=${CLUSTER_NAME}-vnet} + resourceGroup: ${AZURE_RESOURCE_GROUP:=${CLUSTER_NAME}} + subscriptionID: ${AZURE_SUBSCRIPTION_ID} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + kubeadmConfigSpec: + clusterConfiguration: + apiServer: + extraArgs: + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + extraVolumes: + - hostPath: /etc/kubernetes/azure.json + mountPath: /etc/kubernetes/azure.json + name: cloud-config + readOnly: true + timeoutForControlPlane: 20m + controllerManager: + extraArgs: + allocate-node-cidrs: "false" + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + cluster-name: ${CLUSTER_NAME} + external-cloud-volume-plugin: azure + feature-gates: CSIMigrationAzureDisk=true + extraVolumes: + - hostPath: /etc/kubernetes/azure.json + mountPath: /etc/kubernetes/azure.json + name: cloud-config + readOnly: true + etcd: + local: + dataDir: /var/lib/etcddisk/etcd + extraArgs: + quota-backend-bytes: "8589934592" + diskSetup: + filesystems: + - device: /dev/disk/azure/scsi1/lun0 + extraOpts: + - -E + - lazy_itable_init=1,lazy_journal_init=1 + filesystem: ext4 + label: etcd_disk + - device: ephemeral0.1 + filesystem: ext4 + label: ephemeral0 + replaceFS: ntfs + partitions: + - device: /dev/disk/azure/scsi1/lun0 + layout: true + overwrite: false + tableType: gpt + files: + - contentFrom: + secret: + key: control-plane-azure.json + name: ${CLUSTER_NAME}-control-plane-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + azure-container-registry-config: /etc/kubernetes/azure.json + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + name: '{{ ds.meta_data["local_hostname"] }}' + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + azure-container-registry-config: /etc/kubernetes/azure.json + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + name: '{{ ds.meta_data["local_hostname"] }}' + mounts: + - - LABEL=etcd_disk + - /var/lib/etcddisk + postKubeadmCommands: [] + preKubeadmCommands: [] + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachineTemplate + name: ${CLUSTER_NAME}-control-plane + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + template: + spec: + dataDisks: + - diskSizeGB: 256 + lun: 0 + nameSuffix: etcddisk + osDisk: + diskSizeGB: 128 + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + vmSize: ${AZURE_CONTROL_PLANE_MACHINE_TYPE} +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachinePool +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + clusterName: ${CLUSTER_NAME} + replicas: ${WORKER_MACHINE_COUNT} + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfig + name: ${CLUSTER_NAME}-mp-0 + clusterName: ${CLUSTER_NAME} + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachinePool + name: ${CLUSTER_NAME}-mp-0 + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachinePool +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + location: ${AZURE_LOCATION} + orchestrationMode: Flexible + strategy: + rollingUpdate: + maxSurge: 0% + maxUnavailable: 0 + type: RollingUpdate + template: + osDisk: + diskSizeGB: 30 + managedDisk: + storageAccountType: Premium_LRS + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + vmSize: ${AZURE_NODE_MACHINE_TYPE} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfig +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + files: + - contentFrom: + secret: + key: worker-node-azure.json + name: ${CLUSTER_NAME}-mp-0-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + azure-container-registry-config: /etc/kubernetes/azure.json + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + name: '{{ ds.meta_data["local_hostname"] }}' +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureClusterIdentity +metadata: + labels: + clusterctl.cluster.x-k8s.io/move-hierarchy: "true" + name: ${CLUSTER_IDENTITY_NAME} + namespace: default +spec: + allowedNamespaces: {} + clientID: ${AZURE_CLIENT_ID} + clientSecret: + name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} + namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + tenantID: ${AZURE_TENANT_ID} + type: ServicePrincipal diff --git a/templates/flavors/external-cloud-provider-machinepool/kustomization.yaml b/templates/flavors/external-cloud-provider-machinepool/kustomization.yaml new file mode 100644 index 000000000000..a83b7479daf9 --- /dev/null +++ b/templates/flavors/external-cloud-provider-machinepool/kustomization.yaml @@ -0,0 +1,7 @@ +namespace: default +resources: + - ../machinepool + +patchesStrategicMerge: + - patches/external-cloud-provider.yaml + - patches/vmss-flex.yaml diff --git a/templates/flavors/external-cloud-provider-machinepool/machine-pool-deployment.yaml b/templates/flavors/external-cloud-provider-machinepool/machine-pool-deployment.yaml new file mode 100644 index 000000000000..a29f6fe9cc8f --- /dev/null +++ b/templates/flavors/external-cloud-provider-machinepool/machine-pool-deployment.yaml @@ -0,0 +1,62 @@ +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachinePool +metadata: + name: "${CLUSTER_NAME}-mp-0" +spec: + clusterName: "${CLUSTER_NAME}" + replicas: ${WORKER_MACHINE_COUNT} + template: + spec: + clusterName: "${CLUSTER_NAME}" + version: "${KUBERNETES_VERSION}" + bootstrap: + configRef: + name: "${CLUSTER_NAME}-mp-0" + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfig + infrastructureRef: + name: "${CLUSTER_NAME}-mp-0" + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachinePool +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachinePool +metadata: + name: "${CLUSTER_NAME}-mp-0" +spec: + location: ${AZURE_LOCATION} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0% + maxUnavailable: 0 + template: + vmSize: ${AZURE_NODE_MACHINE_TYPE} + osDisk: + osType: "Linux" + diskSizeGB: 30 + managedDisk: + storageAccountType: "Premium_LRS" + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfig +metadata: + name: "${CLUSTER_NAME}-mp-0" +spec: + joinConfiguration: + nodeRegistration: + name: '{{ ds.meta_data["local_hostname"] }}' + kubeletExtraArgs: + cloud-provider: azure + cloud-config: /etc/kubernetes/azure.json + azure-container-registry-config: /etc/kubernetes/azure.json + files: + - contentFrom: + secret: + name: ${CLUSTER_NAME}-mp-0-azure-json + key: worker-node-azure.json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" diff --git a/templates/flavors/external-cloud-provider-machinepool/patches/external-cloud-provider.yaml b/templates/flavors/external-cloud-provider-machinepool/patches/external-cloud-provider.yaml new file mode 100644 index 000000000000..03ea70e29b46 --- /dev/null +++ b/templates/flavors/external-cloud-provider-machinepool/patches/external-cloud-provider.yaml @@ -0,0 +1,41 @@ +--- +kind: KubeadmControlPlane +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +metadata: + name: "${CLUSTER_NAME}-control-plane" +spec: + kubeadmConfigSpec: + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + azure-container-registry-config: /etc/kubernetes/azure.json + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + azure-container-registry-config: /etc/kubernetes/azure.json + clusterConfiguration: + apiServer: + timeoutForControlPlane: 20m + extraArgs: + cloud-provider: external + controllerManager: + extraArgs: + cloud-provider: external + external-cloud-volume-plugin: azure + feature-gates: CSIMigrationAzureDisk=true + version: "${KUBERNETES_VERSION}" +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfig +metadata: + name: ${CLUSTER_NAME}-mp-0 +spec: + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + feature-gates: CSIMigrationAzureDisk=true + cloud-provider: external diff --git a/templates/flavors/external-cloud-provider-machinepool/patches/vmss-flex.yaml b/templates/flavors/external-cloud-provider-machinepool/patches/vmss-flex.yaml new file mode 100644 index 000000000000..c424f6427a23 --- /dev/null +++ b/templates/flavors/external-cloud-provider-machinepool/patches/vmss-flex.yaml @@ -0,0 +1,13 @@ +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachinePool +metadata: + name: "${CLUSTER_NAME}-mp-0" +spec: + orchestrationMode: Flexible + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 0% + maxUnavailable: 0 + deletePolicy: diff --git a/templates/test/ci/cluster-template-prow-external-cloud-provider-vmss-flex.yaml b/templates/test/ci/cluster-template-prow-external-cloud-provider-vmss-flex.yaml new file mode 100644 index 000000000000..fc8bd817d09f --- /dev/null +++ b/templates/test/ci/cluster-template-prow-external-cloud-provider-vmss-flex.yaml @@ -0,0 +1,245 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 192.168.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: KubeadmControlPlane + name: ${CLUSTER_NAME}-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureCluster + name: ${CLUSTER_NAME} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureCluster +metadata: + name: ${CLUSTER_NAME} + namespace: default +spec: + additionalTags: + buildProvenance: ${BUILD_PROVENANCE} + creationTimestamp: ${TIMESTAMP} + jobName: ${JOB_NAME} + identityRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureClusterIdentity + name: ${CLUSTER_IDENTITY_NAME} + location: ${AZURE_LOCATION} + networkSpec: + subnets: + - name: control-plane-subnet + role: control-plane + - name: node-subnet + natGateway: + name: node-natgateway + role: node + vnet: + name: ${AZURE_VNET_NAME:=${CLUSTER_NAME}-vnet} + resourceGroup: ${AZURE_RESOURCE_GROUP:=${CLUSTER_NAME}} + subscriptionID: ${AZURE_SUBSCRIPTION_ID} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: KubeadmControlPlane +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + kubeadmConfigSpec: + clusterConfiguration: + apiServer: + extraArgs: + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: MixedProtocolLBService=true + extraVolumes: + - hostPath: /etc/kubernetes/azure.json + mountPath: /etc/kubernetes/azure.json + name: cloud-config + readOnly: true + timeoutForControlPlane: 20m + controllerManager: + extraArgs: + allocate-node-cidrs: "false" + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + cluster-name: ${CLUSTER_NAME} + external-cloud-volume-plugin: azure + feature-gates: CSIMigrationAzureDisk=true + v: "4" + extraVolumes: + - hostPath: /etc/kubernetes/azure.json + mountPath: /etc/kubernetes/azure.json + name: cloud-config + readOnly: true + etcd: + local: + dataDir: /var/lib/etcddisk/etcd + extraArgs: + quota-backend-bytes: "8589934592" + diskSetup: + filesystems: + - device: /dev/disk/azure/scsi1/lun0 + extraOpts: + - -E + - lazy_itable_init=1,lazy_journal_init=1 + filesystem: ext4 + label: etcd_disk + - device: ephemeral0.1 + filesystem: ext4 + label: ephemeral0 + replaceFS: ntfs + partitions: + - device: /dev/disk/azure/scsi1/lun0 + layout: true + overwrite: false + tableType: gpt + files: + - contentFrom: + secret: + key: control-plane-azure.json + name: ${CLUSTER_NAME}-control-plane-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + initConfiguration: + nodeRegistration: + kubeletExtraArgs: + azure-container-registry-config: /etc/kubernetes/azure.json + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + name: '{{ ds.meta_data["local_hostname"] }}' + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + azure-container-registry-config: /etc/kubernetes/azure.json + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + name: '{{ ds.meta_data["local_hostname"] }}' + mounts: + - - LABEL=etcd_disk + - /var/lib/etcddisk + postKubeadmCommands: [] + preKubeadmCommands: [] + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachineTemplate + name: ${CLUSTER_NAME}-control-plane + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: default +spec: + template: + spec: + dataDisks: + - diskSizeGB: 256 + lun: 0 + nameSuffix: etcddisk + identity: UserAssigned + osDisk: + diskSizeGB: 128 + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} + vmSize: ${AZURE_CONTROL_PLANE_MACHINE_TYPE} +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachinePool +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + clusterName: ${CLUSTER_NAME} + replicas: ${WORKER_MACHINE_COUNT} + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: KubeadmConfig + name: ${CLUSTER_NAME}-mp-0 + clusterName: ${CLUSTER_NAME} + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: AzureMachinePool + name: ${CLUSTER_NAME}-mp-0 + version: ${KUBERNETES_VERSION} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachinePool +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + identity: UserAssigned + location: ${AZURE_LOCATION} + orchestrationMode: Flexible + strategy: + rollingUpdate: + maxSurge: 0% + maxUnavailable: 0 + type: RollingUpdate + template: + osDisk: + diskSizeGB: 30 + managedDisk: + storageAccountType: Premium_LRS + osType: Linux + sshPublicKey: ${AZURE_SSH_PUBLIC_KEY_B64:=""} + vmSize: ${AZURE_NODE_MACHINE_TYPE} + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: KubeadmConfig +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + files: + - contentFrom: + secret: + key: worker-node-azure.json + name: ${CLUSTER_NAME}-mp-0-azure-json + owner: root:root + path: /etc/kubernetes/azure.json + permissions: "0644" + joinConfiguration: + nodeRegistration: + kubeletExtraArgs: + azure-container-registry-config: /etc/kubernetes/azure.json + cloud-config: /etc/kubernetes/azure.json + cloud-provider: external + feature-gates: CSIMigrationAzureDisk=true + name: '{{ ds.meta_data["local_hostname"] }}' +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureClusterIdentity +metadata: + labels: + clusterctl.cluster.x-k8s.io/move-hierarchy: "true" + name: ${CLUSTER_IDENTITY_NAME} + namespace: default +spec: + allowedNamespaces: {} + clientID: ${AZURE_CLIENT_ID} + clientSecret: + name: ${AZURE_CLUSTER_IDENTITY_SECRET_NAME} + namespace: ${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE} + tenantID: ${AZURE_TENANT_ID} + type: ServicePrincipal diff --git a/templates/test/ci/patches/uami-mp-0.yaml b/templates/test/ci/patches/uami-mp-0.yaml new file mode 100644 index 000000000000..035888c7959a --- /dev/null +++ b/templates/test/ci/patches/uami-mp-0.yaml @@ -0,0 +1,9 @@ +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: AzureMachinePool +metadata: + name: ${CLUSTER_NAME}-mp-0 + namespace: default +spec: + identity: UserAssigned + userAssignedIdentities: + - providerID: /subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG:=capz-ci}/providers/Microsoft.ManagedIdentity/userAssignedIdentities/${USER_IDENTITY:=cloud-provider-user-identity} diff --git a/templates/test/ci/prow-external-cloud-provider-vmss-flex/kustomization.yaml b/templates/test/ci/prow-external-cloud-provider-vmss-flex/kustomization.yaml new file mode 100644 index 000000000000..959ef488b6aa --- /dev/null +++ b/templates/test/ci/prow-external-cloud-provider-vmss-flex/kustomization.yaml @@ -0,0 +1,11 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: default +resources: + - ../../../flavors/external-cloud-provider-machinepool +patchesStrategicMerge: + - ../patches/tags.yaml + - ../patches/controller-manager.yaml + - ../patches/apiserver.yaml + - ../patches/uami-mp-0.yaml + - ../patches/uami-control-plane.yaml diff --git a/test/e2e/azure_machinepools.go b/test/e2e/azure_machinepools.go new file mode 100644 index 000000000000..73f15dabbfcd --- /dev/null +++ b/test/e2e/azure_machinepools.go @@ -0,0 +1,80 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" + infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + AzureMachinePoolsSpecName = "azure-machinepools" + regexpFlexibleVM = `^azure:\/\/\/subscriptions\/[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\/resourceGroups\/.+\/providers\/Microsoft.Compute\/virtualMachines\/.+$` + regexpUniformInstance = `^azure:\/\/\/subscriptions\/[0-9a-fA-F]{8}-([0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\/resourceGroups\/.+\/providers\/Microsoft.Compute\/virtualMachineScaleSets\/.+\/virtualMachines\/\d+$` +) + +// AzureMachinePoolsSpecInput is the input for AzureMachinePoolsSpec. +type ( + AzureMachinePoolsSpecInput struct { + BootstrapClusterProxy framework.ClusterProxy + Namespace *corev1.Namespace + ClusterName string + } +) + +// AzureMachinePoolsSpec tests that the expected machinepool resources exist. +func AzureMachinePoolsSpec(ctx context.Context, inputGetter func() AzureMachinePoolsSpecInput) { + input := inputGetter() + Expect(input.BootstrapClusterProxy).NotTo(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", AzureMachinePoolsSpecName) + Expect(input.Namespace).NotTo(BeNil(), "Invalid argument. input.Namespace can't be nil when calling %s spec", AzureMachinePoolsSpecName) + Expect(input.ClusterName).NotTo(BeEmpty(), "Invalid argument. input.ClusterName can't be empty when calling %s spec", AzureMachinePoolsSpecName) + + var ( + bootstrapClusterProxy = input.BootstrapClusterProxy + workloadClusterProxy = bootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName) + labels = map[string]string{clusterv1.ClusterLabelName: workloadClusterProxy.GetName()} + ) + + Expect(workloadClusterProxy).NotTo(BeNil()) + + Byf("listing AzureMachinePools for cluster %s in namespace %s", input.ClusterName, input.Namespace.Name) + ampList := &infrav1exp.AzureMachinePoolList{} + Expect(bootstrapClusterProxy.GetClient().List(ctx, ampList, client.InNamespace(input.Namespace.Name), client.MatchingLabels(labels))).To(Succeed()) + for _, amp := range ampList.Items { + Byf("checking AzureMachinePool %s in %s orchestration mode", amp.Name, amp.Spec.OrchestrationMode) + Expect(amp.Status.Replicas).To(BeNumerically("==", len(amp.Spec.ProviderIDList))) + for _, providerID := range amp.Spec.ProviderIDList { + switch amp.Spec.OrchestrationMode { + case infrav1.OrchestrationModeType(compute.OrchestrationModeFlexible): + Expect(providerID).To(MatchRegexp(regexpFlexibleVM)) + default: + Expect(providerID).To(MatchRegexp(regexpUniformInstance)) + } + } + } +} diff --git a/test/e2e/azure_test.go b/test/e2e/azure_test.go index 4a39a2bb2ada..a749a1bc5d69 100644 --- a/test/e2e/azure_test.go +++ b/test/e2e/azure_test.go @@ -491,6 +491,70 @@ var _ = Describe("Workload cluster creation", func() { }) }) + // ci-e2e.sh and Prow CI skip this test by default. To include this test, set `GINKGO_SKIP=""`. + // This spec expects a user-assigned identity named "cloud-provider-user-identity" in a "capz-ci" + // resource group. Override these defaults by setting the USER_IDENTITY and CI_RG environment variables. + Context("Creating a cluster that uses the external cloud provider and machinepools [OPTIONAL]", func() { + It("with 1 control plane node and 1 machinepool", func() { + By("using user-assigned identity") + clusterName = getClusterName(clusterNamePrefix, "flex") + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()), + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: clusterctl.DefaultInfrastructureProvider, + Flavor: "external-cloud-provider-vmss-flex", + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: "v1.26.0", + ControlPlaneMachineCount: pointer.Int64Ptr(1), + WorkerMachineCount: pointer.Int64Ptr(1), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachinePools: e2eConfig.GetIntervals(specName, "wait-machine-pool-nodes"), + ControlPlaneWaiters: clusterctl.ControlPlaneWaiters{ + WaitForControlPlaneInitialized: EnsureControlPlaneInitialized, + }, + }, result) + + By("Verifying machinepool resources", func() { + AzureMachinePoolsSpec(ctx, func() AzureMachinePoolsSpecInput { + return AzureMachinePoolsSpecInput{ + BootstrapClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + ClusterName: clusterName, + } + }) + }) + + By("Verifying expected VM extensions are present on the node", func() { + AzureVMExtensionsSpec(ctx, func() AzureVMExtensionsSpecInput { + return AzureVMExtensionsSpecInput{ + BootstrapClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + ClusterName: clusterName, + } + }) + }) + + By("Creating an accessible load balancer", func() { + AzureLBSpec(ctx, func() AzureLBSpecInput { + return AzureLBSpecInput{ + BootstrapClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + ClusterName: clusterName, + SkipCleanup: skipCleanup, + } + }) + }) + + By("PASSED!") + }) + }) + // ci-e2e.sh and Prow CI skip this test by default. To include this test, set `GINKGO_SKIP=""`. // This spec expects a user-assigned identity named "cloud-provider-user-identity" in a "capz-ci" // resource group. Override these defaults by setting the USER_IDENTITY and CI_RG environment variables. diff --git a/test/e2e/config/azure-dev.yaml b/test/e2e/config/azure-dev.yaml index 2e1a124f0cf7..d594e0a19b3a 100644 --- a/test/e2e/config/azure-dev.yaml +++ b/test/e2e/config/azure-dev.yaml @@ -126,6 +126,8 @@ providers: targetName: "cluster-template-conformance-presubmit-artifacts-windows-containerd.yaml" - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-external-cloud-provider.yaml" targetName: "cluster-template-external-cloud-provider.yaml" + - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-external-cloud-provider-vmss-flex.yaml" + targetName: "cluster-template-external-cloud-provider-vmss-flex.yaml" - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-azurediskcsi-migration-off.yaml" targetName: "cluster-template-azurediskcsi-migration-off.yaml" - sourcePath: "${PWD}/templates/test/ci/cluster-template-prow-external-azurediskcsi-driver.yaml"