From 8e350f065fff4f4f53647659dee915da9aa4e820 Mon Sep 17 00:00:00 2001 From: Jont828 Date: Tue, 7 Nov 2023 20:25:19 -0500 Subject: [PATCH] Add implementation for MachinePool Machines --- azure/scope/machinepool.go | 105 ++++-- azure/scope/machinepool_test.go | 154 +++++++- azure/scope/machinepoolmachine.go | 193 ++-------- azure/scope/machinepoolmachine_test.go | 160 ++++----- .../machinepool_deployment_strategy.go | 20 ++ .../machinepool_deployment_strategy_test.go | 66 +++- ...re.cluster.x-k8s.io_azuremachinepools.yaml | 10 +- config/rbac/role.yaml | 1 + exp/api/v1beta1/azuremachinepool_types.go | 10 +- .../v1beta1/azuremachinepoolmachine_types.go | 3 + exp/api/v1beta1/zz_generated.deepcopy.go | 10 +- .../azuremachinepool_controller.go | 40 ++- .../azuremachinepoolmachine_controller.go | 111 ++++-- ...azuremachinepoolmachine_controller_test.go | 47 ++- go.mod | 6 - go.sum | 11 - test/e2e/azure_machinepool_drain.go | 332 ------------------ test/e2e/azure_test.go | 11 - 18 files changed, 542 insertions(+), 748 deletions(-) delete mode 100644 test/e2e/azure_machinepool_drain.go diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index 3e85e089523..f7de52c8c3b 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -28,6 +28,7 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2" "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" @@ -43,8 +44,10 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" capierrors "sigs.k8s.io/cluster-api/errors" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/labels/format" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -234,6 +237,18 @@ func (m *MachinePoolScope) Name() string { return m.AzureMachinePool.Name } +// SetInfrastructureMachineKind sets the infrastructure machine kind in the status if it is not set already, returning +// `true` if the status was updated. This supports MachinePool Machines. +func (m *MachinePoolScope) SetInfrastructureMachineKind() bool { + if m.AzureMachinePool.Status.InfrastructureMachineKind != infrav1exp.AzureMachinePoolMachineKind { + m.AzureMachinePool.Status.InfrastructureMachineKind = infrav1exp.AzureMachinePoolMachineKind + + return true + } + + return false +} + // ProviderID returns the AzureMachinePool ID by parsing Spec.ProviderID. func (m *MachinePoolScope) ProviderID() string { resourceID, err := azureutil.ParseResourceID(m.AzureMachinePool.Spec.ProviderID) @@ -326,7 +341,7 @@ func (m *MachinePoolScope) updateReplicasAndProviderIDs(ctx context.Context) err ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.UpdateInstanceStatuses") defer done() - machines, err := m.getMachinePoolMachines(ctx) + machines, err := m.GetMachinePoolMachines(ctx) if err != nil { return errors.Wrap(err, "failed to get machine pool machines") } @@ -345,14 +360,21 @@ func (m *MachinePoolScope) updateReplicasAndProviderIDs(ctx context.Context) err return nil } -func (m *MachinePoolScope) getMachinePoolMachines(ctx context.Context) ([]infrav1exp.AzureMachinePoolMachine, error) { - ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.getMachinePoolMachines") - defer done() - - labels := map[string]string{ +func (m *MachinePoolScope) getMachinePoolMachineLabels() map[string]string { + return map[string]string{ clusterv1.ClusterNameLabel: m.ClusterName(), infrav1exp.MachinePoolNameLabel: m.AzureMachinePool.Name, + clusterv1.MachinePoolNameLabel: format.MustFormatValue(m.MachinePool.Name), + m.ClusterName(): string(infrav1.ResourceLifecycleOwned), } +} + +// GetMachinePoolMachines returns the list of AzureMachinePoolMachines associated with this AzureMachinePool. +func (m *MachinePoolScope) GetMachinePoolMachines(ctx context.Context) ([]infrav1exp.AzureMachinePoolMachine, error) { + ctx, _, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.getMachinePoolMachines") + defer done() + + labels := m.getMachinePoolMachineLabels() ampml := &infrav1exp.AzureMachinePoolMachineList{} if err := m.client.List(ctx, ampml, client.InNamespace(m.AzureMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { return nil, errors.Wrap(err, "failed to list AzureMachinePoolMachines") @@ -366,21 +388,16 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er defer done() if m.vmssState == nil { - log.Info("vmssState is nil") return nil } - labels := map[string]string{ - clusterv1.ClusterNameLabel: m.ClusterName(), - infrav1exp.MachinePoolNameLabel: m.AzureMachinePool.Name, - } - ampml := &infrav1exp.AzureMachinePoolMachineList{} - if err := m.client.List(ctx, ampml, client.InNamespace(m.AzureMachinePool.Namespace), client.MatchingLabels(labels)); err != nil { - return errors.Wrap(err, "failed to list AzureMachinePoolMachines") + ampms, err := m.GetMachinePoolMachines(ctx) + if err != nil { + return err } - existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampml.Items)) - for _, machine := range ampml.Items { + existingMachinesByProviderID := make(map[string]infrav1exp.AzureMachinePoolMachine, len(ampms)) + for _, machine := range ampms { existingMachinesByProviderID[machine.Spec.ProviderID] = machine } @@ -397,14 +414,14 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er } deleted := false - // delete machines that no longer exist in Azure - for key, machine := range existingMachinesByProviderID { - machine := machine + // Delete MachinePool Machines for instances that no longer exist in Azure, i.e. deleted out-of-band + for key, ampm := range existingMachinesByProviderID { + ampm := ampm if _, ok := azureMachinesByProviderID[key]; !ok { deleted = true log.V(4).Info("deleting AzureMachinePoolMachine because it no longer exists in the VMSS", "providerID", key) delete(existingMachinesByProviderID, key) - if err := m.client.Delete(ctx, &machine); err != nil { + if err := m.DeleteMachine(ctx, ampm); err != nil { return errors.Wrap(err, "failed deleting AzureMachinePoolMachine no longer existing in Azure") } } @@ -436,16 +453,17 @@ func (m *MachinePoolScope) applyAzureMachinePoolMachines(ctx context.Context) er return nil } - // select machines to delete to lower the replica count + // Select Machines to delete to lower the replica count toDelete, err := deleteSelector.SelectMachinesToDelete(ctx, m.DesiredReplicas(), existingMachinesByProviderID) if err != nil { return errors.Wrap(err, "failed selecting AzureMachinePoolMachine(s) to delete") } - for _, machine := range toDelete { - machine := machine - log.Info("deleting selected AzureMachinePoolMachine", "providerID", machine.Spec.ProviderID) - if err := m.client.Delete(ctx, &machine); err != nil { + // Delete MachinePool Machines as a part of scaling down + for i := range toDelete { + ampm := toDelete[i] + log.Info("deleting selected AzureMachinePoolMachine", "providerID", ampm.Spec.ProviderID) + if err := m.DeleteMachine(ctx, ampm); err != nil { return errors.Wrap(err, "failed deleting AzureMachinePoolMachine to reduce replica count") } } @@ -477,11 +495,7 @@ func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSS UID: m.AzureMachinePool.UID, }, }, - Labels: map[string]string{ - m.ClusterName(): string(infrav1.ResourceLifecycleOwned), - clusterv1.ClusterNameLabel: m.ClusterName(), - infrav1exp.MachinePoolNameLabel: m.AzureMachinePool.Name, - }, + Annotations: map[string]string{}, }, Spec: infrav1exp.AzureMachinePoolMachineSpec{ ProviderID: machine.ProviderID(), @@ -489,6 +503,9 @@ func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSS }, } + labels := m.getMachinePoolMachineLabels() + ampm.Labels = labels + controllerutil.AddFinalizer(&m, infrav1exp.AzureMachinePoolMachineFinalizer) conditions.MarkFalse(&m, infrav1.VMRunningCondition, string(infrav1.Creating), clusterv1.ConditionSeverityInfo, "") if err := m.client.Create(ctx, &m); err != nil { @@ -498,6 +515,34 @@ func (m *MachinePoolScope) createMachine(ctx context.Context, machine azure.VMSS return nil } +// DeleteMachine deletes an AzureMachinePoolMachine by fetching its owner Machine and deleting it. This ensures that the node cordon/drain happens before deleting the infrastructure. +func (m *MachinePoolScope) DeleteMachine(ctx context.Context, ampm infrav1exp.AzureMachinePoolMachine) error { + ctx, log, done := tele.StartSpanWithLogger(ctx, "scope.MachinePoolScope.DeleteMachine") + defer done() + + machine, err := util.GetOwnerMachine(ctx, m.client, ampm.ObjectMeta) + if err != nil { + return errors.Wrapf(err, "error getting owner Machine for AzureMachinePoolMachine %s/%s", ampm.Namespace, ampm.Name) + } + if machine == nil { + log.V(2).Info("No owner Machine exists for AzureMachinePoolMachine", ampm, klog.KObj(&m)) + // If the AzureMachinePoolMachine does not have an owner Machine, do not attempt to delete the AzureMachinePoolMachine as the MachinePool controller will create the + // Machine and we want to let it catch up. If we are too hasty to delete, that introduces a race condition where the AzureMachinePoolMachine could be deleted + // just as the Machine comes online. + + // In the case where the MachinePool is being deleted and the Machine will never come online, the AzureMachinePoolMachine will be deleted via its ownerRef to the + // AzureMachinePool, so that is covered as well. + + return nil + } + + if err := m.client.Delete(ctx, machine); err != nil { + return errors.Wrapf(err, "failed to delete Machine %s for AzureMachinePoolMachine %s in MachinePool %s", machine.Name, ampm.Name, m.MachinePool.Name) + } + + return nil +} + // SetLongRunningOperationState will set the future on the AzureMachinePool status to allow the resource to continue // in the next reconciliation. func (m *MachinePoolScope) SetLongRunningOperationState(future *infrav1.Future) { diff --git a/azure/scope/machinepool_test.go b/azure/scope/machinepool_test.go index 86e33696350..1e1c1245243 100644 --- a/azure/scope/machinepool_test.go +++ b/azure/scope/machinepool_test.go @@ -633,6 +633,7 @@ func TestMachinePoolScope_updateReplicasAndProviderIDs(t *testing.T) { scheme := runtime.NewScheme() _ = clusterv1.AddToScheme(scheme) _ = infrav1exp.AddToScheme(scheme) + _ = expv1.AddToScheme(scheme) cases := []struct { Name string @@ -705,6 +706,12 @@ func TestMachinePoolScope_updateReplicasAndProviderIDs(t *testing.T) { InfrastructureReady: true, }, } + mp = &expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mp1", + Namespace: "default", + }, + } amp = &infrav1exp.AzureMachinePool{ ObjectMeta: metav1.ObjectMeta{ Name: "amp1", @@ -728,6 +735,7 @@ func TestMachinePoolScope_updateReplicasAndProviderIDs(t *testing.T) { Cluster: cluster, }, AzureMachinePool: amp, + MachinePool: mp, } err := s.updateReplicasAndProviderIDs(context.TODO()) c.Verify(g, s.AzureMachinePool, err) @@ -1210,6 +1218,8 @@ func getReadyAzureMachinePoolMachines(count int32) []infrav1exp.AzureMachinePool Labels: map[string]string{ clusterv1.ClusterNameLabel: "cluster1", infrav1exp.MachinePoolNameLabel: "amp1", + clusterv1.MachinePoolNameLabel: "mp1", + "cluster1": string(infrav1.ResourceLifecycleOwned), }, }, Spec: infrav1exp.AzureMachinePoolMachineSpec{ @@ -1225,6 +1235,111 @@ func getReadyAzureMachinePoolMachines(count int32) []infrav1exp.AzureMachinePool return machines } +func getAzureMachinePoolMachine(index int) infrav1exp.AzureMachinePoolMachine { + return infrav1exp.AzureMachinePoolMachine{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("ampm%d", index), + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "amp", + Kind: "AzureMachinePool", + APIVersion: infrav1exp.GroupVersion.String(), + }, + }, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: "cluster1", + infrav1exp.MachinePoolNameLabel: "amp1", + clusterv1.MachinePoolNameLabel: "mp1", + "cluster1": string(infrav1.ResourceLifecycleOwned), + }, + }, + Spec: infrav1exp.AzureMachinePoolMachineSpec{ + ProviderID: fmt.Sprintf("azure:///subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/%d", index), + }, + Status: infrav1exp.AzureMachinePoolMachineStatus{ + Ready: true, + ProvisioningState: ptr.To(infrav1.Succeeded), + }, + } +} + +func getAzureMachinePoolMachineWithOwnerMachine(index int) (clusterv1.Machine, infrav1exp.AzureMachinePoolMachine) { + ampm := getAzureMachinePoolMachine(index) + machine := clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("mpm%d", index), + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "mp", + Kind: "MachinePool", + APIVersion: expv1.GroupVersion.String(), + }, + }, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: "cluster1", + clusterv1.MachinePoolNameLabel: "mp1", + }, + }, + Spec: clusterv1.MachineSpec{ + ProviderID: &m.Spec.ProviderID, + InfrastructureRef: corev1.ObjectReference{ + Kind: "AzureMachinePoolMachine", + Name: ampm.Name, + Namespace: ampm.Namespace, + }, + }, + } + + ampm.OwnerReferences = append(ampm.OwnerReferences, metav1.OwnerReference{ + Name: machine.Name, + Kind: "Machine", + APIVersion: clusterv1.GroupVersion.String(), + }) + + return machine, ampm +} + +func TestMachinePoolScope_SetInfrastructureMachineKind(t *testing.T) { + testcases := []struct { + name string + azureMachinePool infrav1exp.AzureMachinePool + updated bool + }{ + { + name: "should set infrastructure machine kind", + azureMachinePool: infrav1exp.AzureMachinePool{ + Status: infrav1exp.AzureMachinePoolStatus{}, + }, + updated: true, + }, + { + name: "already set infrastructure machine kind", + azureMachinePool: infrav1exp.AzureMachinePool{ + Status: infrav1exp.AzureMachinePoolStatus{ + InfrastructureMachineKind: infrav1exp.AzureMachinePoolMachineKind, + }, + }, + updated: false, + }, + } + + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + machinePoolScope := &MachinePoolScope{ + AzureMachinePool: &tt.azureMachinePool, + } + + got := machinePoolScope.SetInfrastructureMachineKind() + g.Expect(machinePoolScope.AzureMachinePool.Status.InfrastructureMachineKind).To(Equal(infrav1exp.AzureMachinePoolMachineKind)) + g.Expect(got).To(Equal(tt.updated)) + }) + } +} + func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -1243,24 +1358,25 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { mp.Annotations = map[string]string{clusterv1.ReplicasManagedByAnnotation: "cluster-autoscaler"} mp.Spec.Replicas = ptr.To[int32](1) - for _, machine := range getReadyAzureMachinePoolMachines(2) { - obj := machine - cb.WithObjects(&obj) - } + mpm1, ampm1 := getAzureMachinePoolMachineWithOwnerMachine(1) + mpm2, ampm2 := getAzureMachinePoolMachineWithOwnerMachine(2) + objects := []client.Object{} + objects = append(objects, &mpm1, &m1, &mpm2, &m2) + cb.WithObjects(objects...) vmssState.Instances = []azure.VMSSVM{ { - ID: "foo/ampm0", - Name: "ampm0", + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/1", + Name: "ampm1", }, { - ID: "foo/ampm1", - Name: "ampm1", + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/2", + Name: "ampm2", }, } }, Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client, err error) { g.Expect(err).NotTo(HaveOccurred()) - list := infrav1exp.AzureMachinePoolMachineList{} + list := clusterv1.MachineList{} g.Expect(c.List(ctx, &list)).NotTo(HaveOccurred()) g.Expect(len(list.Items)).Should(Equal(2)) }, @@ -1270,24 +1386,26 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { Setup: func(mp *expv1.MachinePool, amp *infrav1exp.AzureMachinePool, vmssState *azure.VMSS, cb *fake.ClientBuilder) { mp.Spec.Replicas = ptr.To[int32](1) - for _, machine := range getReadyAzureMachinePoolMachines(2) { - obj := machine - cb.WithObjects(&obj) - } + mpm1, ampm1 := getAzureMachinePoolMachineWithOwnerMachine(1) + mpm2, ampm2 := getAzureMachinePoolMachineWithOwnerMachine(2) + objects := []client.Object{} + objects = append(objects, &mpm1, &m1, &mpm2, &m2) + cb.WithObjects(objects...) + vmssState.Instances = []azure.VMSSVM{ { - ID: "foo/ampm0", - Name: "ampm0", + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/1", + Name: "ampm1", }, { - ID: "foo/ampm1", - Name: "ampm1", + ID: "/subscriptions/123/resourceGroups/my-rg/providers/Microsoft.Compute/virtualMachineScaleSets/my-vmss/virtualMachines/2", + Name: "ampm2", }, } }, Verify: func(g *WithT, amp *infrav1exp.AzureMachinePool, c client.Client, err error) { g.Expect(err).NotTo(HaveOccurred()) - list := infrav1exp.AzureMachinePoolMachineList{} + list := clusterv1.MachineList{} g.Expect(c.List(ctx, &list)).NotTo(HaveOccurred()) g.Expect(len(list.Items)).Should(Equal(1)) }, diff --git a/azure/scope/machinepoolmachine.go b/azure/scope/machinepoolmachine.go index 5183303b643..80661e9361e 100644 --- a/azure/scope/machinepoolmachine.go +++ b/azure/scope/machinepoolmachine.go @@ -18,17 +18,12 @@ package scope import ( "context" - "fmt" "reflect" "strings" - "time" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/client-go/kubernetes" - "k8s.io/klog/v2" - kubedrain "k8s.io/kubectl/pkg/drain" "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" @@ -71,6 +66,7 @@ type ( Client client.Client ClusterScope azure.ClusterScoper MachinePool *expv1.MachinePool + Machine *clusterv1.Machine // workloadNodeGetter is only used for testing purposes and provides a way for mocking requests to the workload cluster workloadNodeGetter nodeGetter @@ -82,6 +78,7 @@ type ( AzureMachinePoolMachine *infrav1exp.AzureMachinePoolMachine AzureMachinePool *infrav1exp.AzureMachinePool MachinePool *expv1.MachinePool + Machine *clusterv1.Machine MachinePoolScope *MachinePoolScope client client.Client patchHelper *patch.Helper @@ -115,6 +112,10 @@ func NewMachinePoolMachineScope(params MachinePoolMachineScopeParams) (*MachineP return nil, errors.New("azure machine pool machine is required when creating a MachinePoolScope") } + if params.Machine == nil { + return nil, errors.New("machine is required when creating a MachinePoolScope") + } + if params.workloadNodeGetter == nil { params.workloadNodeGetter = newWorkloadClusterProxy( params.Client, @@ -145,6 +146,7 @@ func NewMachinePoolMachineScope(params MachinePoolMachineScopeParams) (*MachineP AzureMachinePoolMachine: params.AzureMachinePoolMachine, ClusterScoper: params.ClusterScope, MachinePool: params.MachinePool, + Machine: params.Machine, MachinePoolScope: mpScope, client: params.Client, patchHelper: helper, @@ -283,6 +285,19 @@ func (s *MachinePoolMachineScope) ProviderID() string { return s.AzureMachinePoolMachine.Spec.ProviderID } +// updateDeleteMachineAnnotation sets the clusterv1.DeleteMachineAnnotation on the AzureMachinePoolMachine if it exists on the owner Machine. +func (s *MachinePoolMachineScope) updateDeleteMachineAnnotation() { + if s.Machine.Annotations != nil { + if _, ok := s.Machine.Annotations[clusterv1.DeleteMachineAnnotation]; ok { + if s.AzureMachinePoolMachine.Annotations == nil { + s.AzureMachinePoolMachine.Annotations = map[string]string{} + } + + s.AzureMachinePoolMachine.Annotations[clusterv1.DeleteMachineAnnotation] = "true" + } + } +} + // PatchObject persists the MachinePoolMachine spec and status. func (s *MachinePoolMachineScope) PatchObject(ctx context.Context) error { conditions.SetSummary(s.AzureMachinePoolMachine) @@ -293,7 +308,6 @@ func (s *MachinePoolMachineScope) PatchObject(ctx context.Context) error { patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ clusterv1.ReadyCondition, clusterv1.MachineNodeHealthyCondition, - clusterv1.DrainingSucceededCondition, }}) } @@ -305,6 +319,8 @@ func (s *MachinePoolMachineScope) Close(ctx context.Context) error { ) defer done() + s.updateDeleteMachineAnnotation() + return s.PatchObject(ctx) } @@ -396,160 +412,6 @@ func (s *MachinePoolMachineScope) UpdateInstanceStatus(ctx context.Context) erro return nil } -// CordonAndDrain will cordon and drain the Kubernetes node associated with this AzureMachinePoolMachine. -func (s *MachinePoolMachineScope) CordonAndDrain(ctx context.Context) error { - ctx, log, done := tele.StartSpanWithLogger( - ctx, - "scope.MachinePoolMachineScope.CordonAndDrain", - ) - defer done() - - // See if we can fetch a node using either the providerID or the nodeRef - node, found, err := s.GetNode(ctx) - if err != nil { - if apierrors.IsNotFound(err) { - return nil - } - // failed due to an unexpected error - return errors.Wrap(err, "failed to get node") - } else if !found { - // node was not found due to not finding a nodes with the ProviderID - return nil - } - - // Drain node before deletion and issue a patch in order to make this operation visible to the users. - if s.isNodeDrainAllowed() { - patchHelper, err := patch.NewHelper(s.AzureMachinePoolMachine, s.client) - if err != nil { - return errors.Wrap(err, "failed to build a patchHelper when draining node") - } - - log.V(4).Info("Draining node", "node", node.Name) - // The DrainingSucceededCondition never exists before the node is drained for the first time, - // so its transition time can be used to record the first time draining. - // This `if` condition prevents the transition time to be changed more than once. - if conditions.Get(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition) == nil { - conditions.MarkFalse(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition, clusterv1.DrainingReason, clusterv1.ConditionSeverityInfo, "Draining the node before deletion") - } - - if err := patchHelper.Patch(ctx, s.AzureMachinePoolMachine); err != nil { - return errors.Wrap(err, "failed to patch AzureMachinePoolMachine") - } - - if err := s.drainNode(ctx, node); err != nil { - // Check for condition existence. If the condition exists, it may have a different severity or message, which - // would cause the last transition time to be updated. The last transition time is used to determine how - // long to wait to timeout the node drain operation. If we were to keep updating the last transition time, - // a drain operation may never timeout. - if conditions.Get(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition) == nil { - conditions.MarkFalse(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition, clusterv1.DrainingFailedReason, clusterv1.ConditionSeverityWarning, err.Error()) - } - return err - } - - conditions.MarkTrue(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition) - } - - return nil -} - -func (s *MachinePoolMachineScope) drainNode(ctx context.Context, node *corev1.Node) error { - ctx, log, done := tele.StartSpanWithLogger( - ctx, - "scope.MachinePoolMachineScope.drainNode", - ) - defer done() - - restConfig, err := remote.RESTConfig(ctx, MachinePoolMachineScopeName, s.client, client.ObjectKey{ - Name: s.ClusterName(), - Namespace: s.AzureMachinePoolMachine.Namespace, - }) - - if err != nil { - log.Error(err, "Error creating a remote client while deleting Machine, won't retry") - return nil - } - - kubeClient, err := kubernetes.NewForConfig(restConfig) - if err != nil { - log.Error(err, "Error creating a remote client while deleting Machine, won't retry") - return nil - } - - drainer := &kubedrain.Helper{ - Client: kubeClient, - Ctx: ctx, - Force: true, - IgnoreAllDaemonSets: true, - DeleteEmptyDirData: true, - GracePeriodSeconds: -1, - // If a pod is not evicted in 20 seconds, retry the eviction next time the - // machine gets reconciled again (to allow other machines to be reconciled). - Timeout: 20 * time.Second, - OnPodDeletedOrEvicted: func(pod *corev1.Pod, usingEviction bool) { - verbStr := "Deleted" - if usingEviction { - verbStr = "Evicted" - } - log.V(4).Info(fmt.Sprintf("%s pod from Node", verbStr), - "pod", fmt.Sprintf("%s/%s", pod.Name, pod.Namespace)) - }, - Out: writer{klog.Info}, - ErrOut: writer{klog.Error}, - } - - if noderefutil.IsNodeUnreachable(node) { - // When the node is unreachable and some pods are not evicted for as long as this timeout, we ignore them. - drainer.SkipWaitForDeleteTimeoutSeconds = 60 * 5 // 5 minutes - } - - if err := kubedrain.RunCordonOrUncordon(drainer, node, true); err != nil { - // Machine will be re-reconciled after a cordon failure. - return azure.WithTransientError(errors.Errorf("unable to cordon node %s: %v", node.Name, err), 20*time.Second) - } - - if err := kubedrain.RunNodeDrain(drainer, node.Name); err != nil { - // Machine will be re-reconciled after a drain failure. - return azure.WithTransientError(errors.Wrap(err, "Drain failed, retry in 20s"), 20*time.Second) - } - - log.V(4).Info("Drain successful") - return nil -} - -// isNodeDrainAllowed checks to see the node is excluded from draining or if the NodeDrainTimeout has expired. -func (s *MachinePoolMachineScope) isNodeDrainAllowed() bool { - if _, exists := s.AzureMachinePoolMachine.ObjectMeta.Annotations[clusterv1.ExcludeNodeDrainingAnnotation]; exists { - return false - } - - if s.nodeDrainTimeoutExceeded() { - return false - } - - return true -} - -// nodeDrainTimeoutExceeded will check to see if the AzureMachinePool's NodeDrainTimeout is exceeded for the -// AzureMachinePoolMachine. -func (s *MachinePoolMachineScope) nodeDrainTimeoutExceeded() bool { - // if the NodeDrainTineout type is not set by user - pool := s.AzureMachinePool - if pool == nil || pool.Spec.NodeDrainTimeout == nil || pool.Spec.NodeDrainTimeout.Seconds() <= 0 { - return false - } - - // if the draining succeeded condition does not exist - if conditions.Get(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition) == nil { - return false - } - - now := time.Now() - firstTimeDrain := conditions.GetLastTransitionTime(s.AzureMachinePoolMachine, clusterv1.DrainingSucceededCondition) - diff := now.Sub(firstTimeDrain.Time) - return diff.Seconds() >= s.AzureMachinePool.Spec.NodeDrainTimeout.Seconds() -} - func (s *MachinePoolMachineScope) hasLatestModelApplied(ctx context.Context) (bool, error) { ctx, _, done := tele.StartSpanWithLogger( ctx, @@ -694,14 +556,3 @@ func getWorkloadClient(ctx context.Context, c client.Client, cluster client.Obje return remote.NewClusterClient(ctx, MachinePoolMachineScopeName, c, cluster) } - -// writer implements io.Writer interface as a pass-through for klog. -type writer struct { - logFunc func(args ...interface{}) -} - -// Write passes string(p) into writer's logFunc and always returns len(p). -func (w writer) Write(p []byte) (n int, err error) { - w.logFunc(string(p)) - return len(p), nil -} diff --git a/azure/scope/machinepoolmachine_test.go b/azure/scope/machinepoolmachine_test.go index ea942292044..8ba0e4f4ed6 100644 --- a/azure/scope/machinepoolmachine_test.go +++ b/azure/scope/machinepoolmachine_test.go @@ -69,6 +69,7 @@ func TestNewMachinePoolMachineScope(t *testing.T) { }, MachinePool: new(expv1.MachinePool), AzureMachinePool: new(infrav1exp.AzureMachinePool), + Machine: new(clusterv1.Machine), AzureMachinePoolMachine: new(infrav1exp.AzureMachinePoolMachine), }, }, @@ -78,6 +79,7 @@ func TestNewMachinePoolMachineScope(t *testing.T) { ClusterScope: new(ClusterScope), MachinePool: new(expv1.MachinePool), AzureMachinePool: new(infrav1exp.AzureMachinePool), + Machine: new(clusterv1.Machine), AzureMachinePoolMachine: new(infrav1exp.AzureMachinePoolMachine), }, Err: "client is required when creating a MachinePoolScope", @@ -88,6 +90,7 @@ func TestNewMachinePoolMachineScope(t *testing.T) { Client: fake.NewClientBuilder().WithScheme(scheme).Build(), MachinePool: new(expv1.MachinePool), AzureMachinePool: new(infrav1exp.AzureMachinePool), + Machine: new(clusterv1.Machine), AzureMachinePoolMachine: new(infrav1exp.AzureMachinePoolMachine), }, Err: "cluster scope is required when creating a MachinePoolScope", @@ -98,6 +101,7 @@ func TestNewMachinePoolMachineScope(t *testing.T) { Client: fake.NewClientBuilder().WithScheme(scheme).Build(), ClusterScope: new(ClusterScope), AzureMachinePool: new(infrav1exp.AzureMachinePool), + Machine: new(clusterv1.Machine), AzureMachinePoolMachine: new(infrav1exp.AzureMachinePoolMachine), }, Err: "machine pool is required when creating a MachinePoolScope", @@ -108,6 +112,7 @@ func TestNewMachinePoolMachineScope(t *testing.T) { Client: fake.NewClientBuilder().WithScheme(scheme).Build(), ClusterScope: new(ClusterScope), MachinePool: new(expv1.MachinePool), + Machine: new(clusterv1.Machine), AzureMachinePoolMachine: new(infrav1exp.AzureMachinePoolMachine), }, Err: "azure machine pool is required when creating a MachinePoolScope", @@ -118,10 +123,22 @@ func TestNewMachinePoolMachineScope(t *testing.T) { Client: fake.NewClientBuilder().WithScheme(scheme).Build(), ClusterScope: new(ClusterScope), MachinePool: new(expv1.MachinePool), + Machine: new(clusterv1.Machine), AzureMachinePool: new(infrav1exp.AzureMachinePool), }, Err: "azure machine pool machine is required when creating a MachinePoolScope", }, + { + Name: "no MachinePool Machine", + Input: MachinePoolMachineScopeParams{ + Client: fake.NewClientBuilder().WithScheme(scheme).Build(), + ClusterScope: new(ClusterScope), + MachinePool: new(expv1.MachinePool), + AzureMachinePool: new(infrav1exp.AzureMachinePool), + AzureMachinePoolMachine: new(infrav1exp.AzureMachinePoolMachine), + }, + Err: "machine is required when creating a MachinePoolScope", + }, } for _, c := range cases { @@ -255,6 +272,55 @@ func TestMachinePoolMachineScope_ScaleSetVMSpecs(t *testing.T) { }) } } +func TestMachineScope_updateDeleteMachineAnnotation(t *testing.T) { + cases := []struct { + name string + machine clusterv1.Machine + ampm infrav1exp.AzureMachinePoolMachine + }{ + { + name: "add annotation to ampm", + machine: clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + clusterv1.DeleteMachineAnnotation: "true", + }, + }, + }, + ampm: infrav1exp.AzureMachinePoolMachine{}, + }, + { + name: "do not add annotation to ampm when machine annotations are nil", + machine: clusterv1.Machine{}, + ampm: infrav1exp.AzureMachinePoolMachine{}, + }, + { + name: "do not add annotation to ampm", + machine: clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{}, + }, + }, + ampm: infrav1exp.AzureMachinePoolMachine{}, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + g := NewWithT(t) + + machineScope := &MachinePoolMachineScope{ + Machine: &c.machine, + AzureMachinePoolMachine: &c.ampm, + } + + machineScope.updateDeleteMachineAnnotation() + _, machineHasAnnotation := machineScope.Machine.Annotations[clusterv1.DeleteMachineAnnotation] + _, ampmHasAnnotation := machineScope.AzureMachinePoolMachine.Annotations[clusterv1.DeleteMachineAnnotation] + g.Expect(machineHasAnnotation).To(Equal(ampmHasAnnotation)) + }) + } +} func TestMachineScope_UpdateNodeStatus(t *testing.T) { scheme := runtime.NewScheme() @@ -364,6 +430,7 @@ func TestMachineScope_UpdateNodeStatus(t *testing.T) { }, }, AzureMachinePool: new(infrav1exp.AzureMachinePool), + Machine: new(clusterv1.Machine), } ) @@ -395,99 +462,6 @@ func TestMachineScope_UpdateNodeStatus(t *testing.T) { } } -func TestMachinePoolMachineScope_CordonAndDrain(t *testing.T) { - scheme := runtime.NewScheme() - _ = expv1.AddToScheme(scheme) - _ = infrav1exp.AddToScheme(scheme) - - var ( - clusterScope = ClusterScope{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "cluster-foo", - }, - }, - } - ) - - cases := []struct { - Name string - Setup func(mockNodeGetter *mock_scope.MocknodeGetter, ampm *infrav1exp.AzureMachinePoolMachine) *infrav1exp.AzureMachinePoolMachine - Err string - }{ - { - Name: "should skip cordon and drain if the node does not exist with provider ID", - Setup: func(mockNodeGetter *mock_scope.MocknodeGetter, ampm *infrav1exp.AzureMachinePoolMachine) *infrav1exp.AzureMachinePoolMachine { - mockNodeGetter.EXPECT().GetNodeByProviderID(gomock2.AContext(), FakeProviderID).Return(nil, nil) - return ampm - }, - }, - { - Name: "should skip cordon and drain if the node does not exist with node reference", - Setup: func(mockNodeGetter *mock_scope.MocknodeGetter, ampm *infrav1exp.AzureMachinePoolMachine) *infrav1exp.AzureMachinePoolMachine { - nodeRef := corev1.ObjectReference{ - Name: "node1", - } - ampm.Status.NodeRef = &nodeRef - mockNodeGetter.EXPECT().GetNodeByObjectReference(gomock2.AContext(), nodeRef).Return(nil, nil) - return ampm - }, - }, - { - Name: "if GetNodeByProviderID fails with an error, an error will be returned", - Setup: func(mockNodeGetter *mock_scope.MocknodeGetter, ampm *infrav1exp.AzureMachinePoolMachine) *infrav1exp.AzureMachinePoolMachine { - mockNodeGetter.EXPECT().GetNodeByProviderID(gomock2.AContext(), FakeProviderID).Return(nil, errors.New("boom")) - return ampm - }, - Err: "failed to get node: failed to get node by providerID: boom", - }, - } - - for _, c := range cases { - t.Run(c.Name, func(t *testing.T) { - var ( - controller = gomock.NewController(t) - mockClient = mock_scope.NewMocknodeGetter(controller) - g = NewWithT(t) - params = MachinePoolMachineScopeParams{ - Client: fake.NewClientBuilder().WithScheme(scheme).Build(), - ClusterScope: &clusterScope, - MachinePool: &expv1.MachinePool{ - Spec: expv1.MachinePoolSpec{ - Template: clusterv1.MachineTemplateSpec{ - Spec: clusterv1.MachineSpec{ - Version: ptr.To("v1.19.11"), - }, - }, - }, - }, - AzureMachinePool: new(infrav1exp.AzureMachinePool), - } - ) - - defer controller.Finish() - - ampm := c.Setup(mockClient, &infrav1exp.AzureMachinePoolMachine{ - Spec: infrav1exp.AzureMachinePoolMachineSpec{ - ProviderID: FakeProviderID, - }, - }) - params.AzureMachinePoolMachine = ampm - s, err := NewMachinePoolMachineScope(params) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(s).NotTo(BeNil()) - s.workloadNodeGetter = mockClient - - err = s.CordonAndDrain(context.TODO()) - if c.Err == "" { - g.Expect(err).To(Succeed()) - } else { - g.Expect(err).To(MatchError(c.Err)) - } - }) - } -} - func getReadyNode() *corev1.Node { return &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ diff --git a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go index 607d9bab9e1..2daaac8a464 100644 --- a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go +++ b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy.go @@ -27,6 +27,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/util/tele" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ctrl "sigs.k8s.io/controller-runtime" ) @@ -142,6 +143,13 @@ func (rollingUpdateStrategy rollingUpdateStrategy) SelectMachinesToDelete(ctx co }() ) + // Order AzureMachinePoolMachines with the clutserv1.DeleteMachineAnnotation to the front so that they have delete priority. + // This allows MachinePool Machines to work with the autoscaler. + failedMachines = orderByDeleteMachineAnnotation(failedMachines) + deletingMachines = orderByDeleteMachineAnnotation(deletingMachines) + readyMachines = orderByDeleteMachineAnnotation(readyMachines) + machinesWithoutLatestModel = orderByDeleteMachineAnnotation(machinesWithoutLatestModel) + log.Info("selecting machines to delete", "readyMachines", len(readyMachines), "desiredReplicaCount", desiredReplicaCount, @@ -296,6 +304,18 @@ func orderRandom(machines []infrav1exp.AzureMachinePoolMachine) []infrav1exp.Azu return machines } +// orderByDeleteMachineAnnotation will sort AzureMachinePoolMachines with the clusterv1.DeleteMachineAnnotation to the front of the list. +// It will preserve the existing order of the list otherwise so that it respects the existing delete priority otherwise. +func orderByDeleteMachineAnnotation(machines []infrav1exp.AzureMachinePoolMachine) []infrav1exp.AzureMachinePoolMachine { + sort.SliceStable(machines, func(i, j int) bool { + _, iHasAnnotation := machines[i].Annotations[clusterv1.DeleteMachineAnnotation] + + return iHasAnnotation + }) + + return machines +} + func getProviderIDs(machines []infrav1exp.AzureMachinePoolMachine) []string { ids := make([]string, len(machines)) for i, machine := range machines { diff --git a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go index 4395547c24a..d3528fb91e7 100644 --- a/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go +++ b/azure/scope/strategies/machinepool_deployments/machinepool_deployment_strategy_test.go @@ -28,6 +28,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/internal/test/matchers/gomega" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) func TestMachinePoolRollingUpdateStrategy_Type(t *testing.T) { @@ -237,6 +238,21 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), }), }, + { + name: "if over-provisioned and has delete machine annotation, select machines those first and then by oldest", + strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}), + desiredReplicas: 2, + input: map[string]infrav1exp.AzureMachinePoolMachine{ + "foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), + "bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), + "baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), + "bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), + }, + want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{ + makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), + makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), + }), + }, { name: "if over-provisioned, select machines ordered by creation date", strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}), @@ -252,6 +268,21 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), }), }, + { + name: "if over-provisioned and has delete machine annotation, prioritize those machines first over creation date", + strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}), + desiredReplicas: 2, + input: map[string]infrav1exp.AzureMachinePoolMachine{ + "foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), + "bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), + "baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), + "bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), + }, + want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{ + makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour)), HasDeleteMachineAnnotation: true}), + makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour))}), + }), + }, { name: "if over-provisioned, select machines ordered by newest first", strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}), @@ -267,6 +298,21 @@ func TestMachinePoolRollingUpdateStrategy_SelectMachinesToDelete(t *testing.T) { makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}), }), }, + { + name: "if over-provisioned and has delete machine annotation, select those machines machines first followed by newest", + strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.NewestDeletePolicyType}), + desiredReplicas: 2, + input: map[string]infrav1exp.AzureMachinePoolMachine{ + "foo": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), + "bin": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(3 * time.Hour))}), + "baz": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(2 * time.Hour))}), + "bar": makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}), + }, + want: gomega.DiffEq([]infrav1exp.AzureMachinePoolMachine{ + makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(1 * time.Hour)), HasDeleteMachineAnnotation: true}), + makeAMPM(ampmOptions{Ready: true, LatestModel: true, ProvisioningState: succeeded, CreationTime: metav1.NewTime(baseTime.Add(4 * time.Hour))}), + }), + }, { name: "if over-provisioned but with an equivalent number marked for deletion, nothing to do; this is the case where Azure has not yet caught up to capz", strategy: makeRollingUpdateStrategy(infrav1exp.MachineRollingUpdateDeployment{DeletePolicy: infrav1exp.OldestDeletePolicyType}), @@ -403,18 +449,20 @@ func makeRollingUpdateStrategy(rolling infrav1exp.MachineRollingUpdateDeployment } type ampmOptions struct { - Ready bool - LatestModel bool - ProvisioningState infrav1.ProvisioningState - CreationTime metav1.Time - DeletionTime *metav1.Time + Ready bool + LatestModel bool + ProvisioningState infrav1.ProvisioningState + CreationTime metav1.Time + DeletionTime *metav1.Time + HasDeleteMachineAnnotation bool } func makeAMPM(opts ampmOptions) infrav1exp.AzureMachinePoolMachine { - return infrav1exp.AzureMachinePoolMachine{ + ampm := infrav1exp.AzureMachinePoolMachine{ ObjectMeta: metav1.ObjectMeta{ CreationTimestamp: opts.CreationTime, DeletionTimestamp: opts.DeletionTime, + Annotations: map[string]string{}, }, Status: infrav1exp.AzureMachinePoolMachineStatus{ Ready: opts.Ready, @@ -422,4 +470,10 @@ func makeAMPM(opts ampmOptions) infrav1exp.AzureMachinePoolMachine { ProvisioningState: &opts.ProvisioningState, }, } + + if opts.HasDeleteMachineAnnotation { + ampm.Annotations[clusterv1.DeleteMachineAnnotation] = "true" + } + + return ampm } diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml index 8621e9e4d49..bf8a3baa9fc 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_azuremachinepools.yaml @@ -100,12 +100,6 @@ spec: location: description: Location is the Azure region location e.g. westus2 type: string - nodeDrainTimeout: - description: 'NodeDrainTimeout is the total amount of time that the - controller will spend on draining a node. The default value is 0, - meaning that the node can be drained without any time limitations. - NOTE: NodeDrainTimeout is different from `kubectl drain --timeout`' - type: string orchestrationMode: default: Uniform description: OrchestrationMode specifies the orchestration mode for @@ -1046,6 +1040,10 @@ spec: - version type: object type: object + infrastructureMachineKind: + description: InfrastructureMachineKind is the kind of the infrastructure + resources behind MachinePool Machines. + type: string instances: description: Instances is the VM instance status for each VM in the VMSS diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 3cf9633c4c2..ac300067ed7 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -88,6 +88,7 @@ rules: - machines - machines/status verbs: + - delete - get - list - watch diff --git a/exp/api/v1beta1/azuremachinepool_types.go b/exp/api/v1beta1/azuremachinepool_types.go index 7b2a325bb9c..98dc73606cc 100644 --- a/exp/api/v1beta1/azuremachinepool_types.go +++ b/exp/api/v1beta1/azuremachinepool_types.go @@ -157,12 +157,6 @@ type ( // +kubebuilder:default={type: "RollingUpdate", rollingUpdate: {maxSurge: 1, maxUnavailable: 0, deletePolicy: Oldest}} Strategy AzureMachinePoolDeploymentStrategy `json:"strategy,omitempty"` - // NodeDrainTimeout is the total amount of time that the controller will spend on draining a node. - // The default value is 0, meaning that the node can be drained without any time limitations. - // NOTE: NodeDrainTimeout is different from `kubectl drain --timeout` - // +optional - NodeDrainTimeout *metav1.Duration `json:"nodeDrainTimeout,omitempty"` - // OrchestrationMode specifies the orchestration mode for the Virtual Machine Scale Set // +kubebuilder:default=Uniform OrchestrationMode infrav1.OrchestrationModeType `json:"orchestrationMode,omitempty"` @@ -308,6 +302,10 @@ type ( // next reconciliation loop. // +optional LongRunningOperationStates infrav1.Futures `json:"longRunningOperationStates,omitempty"` + + // InfrastructureMachineKind is the kind of the infrastructure resources behind MachinePool Machines. + // +optional + InfrastructureMachineKind string `json:"infrastructureMachineKind,omitempty"` } // AzureMachinePoolInstanceStatus provides status information for each instance in the VMSS. diff --git a/exp/api/v1beta1/azuremachinepoolmachine_types.go b/exp/api/v1beta1/azuremachinepoolmachine_types.go index 5648dd8bc3d..907bf7a81ad 100644 --- a/exp/api/v1beta1/azuremachinepoolmachine_types.go +++ b/exp/api/v1beta1/azuremachinepoolmachine_types.go @@ -27,6 +27,9 @@ import ( const ( // AzureMachinePoolMachineFinalizer is used to ensure deletion of dependencies (nodes, infra). AzureMachinePoolMachineFinalizer = "azuremachinepoolmachine.infrastructure.cluster.x-k8s.io" + + // AzureMachinePoolMachineKind indicates the kind of an AzureMachinePoolMachine. + AzureMachinePoolMachineKind = "AzureMachinePoolMachine" ) type ( diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go index 0e53ab98298..f41a3c57fb1 100644 --- a/exp/api/v1beta1/zz_generated.deepcopy.go +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -22,8 +22,7 @@ limitations under the License. package v1beta1 import ( - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" apiv1beta1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" @@ -209,7 +208,7 @@ func (in *AzureMachinePoolMachineStatus) DeepCopyInto(out *AzureMachinePoolMachi *out = *in if in.NodeRef != nil { in, out := &in.NodeRef, &out.NodeRef - *out = new(corev1.ObjectReference) + *out = new(v1.ObjectReference) **out = **in } if in.ProvisioningState != nil { @@ -345,11 +344,6 @@ func (in *AzureMachinePoolSpec) DeepCopyInto(out *AzureMachinePoolSpec) { copy(*out, *in) } in.Strategy.DeepCopyInto(&out.Strategy) - if in.NodeDrainTimeout != nil { - in, out := &in.NodeDrainTimeout, &out.NodeDrainTimeout - *out = new(v1.Duration) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AzureMachinePoolSpec. diff --git a/exp/controllers/azuremachinepool_controller.go b/exp/controllers/azuremachinepool_controller.go index 4bd132afc6c..41dd1fa6aff 100644 --- a/exp/controllers/azuremachinepool_controller.go +++ b/exp/controllers/azuremachinepool_controller.go @@ -24,6 +24,7 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" + kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/record" infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1" "sigs.k8s.io/cluster-api-provider-azure/azure" @@ -262,14 +263,18 @@ func (ampr *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, mac defer done() log.Info("Reconciling AzureMachinePool") + // If the AzureMachine is in an error state, return early. if machinePoolScope.AzureMachinePool.Status.FailureReason != nil || machinePoolScope.AzureMachinePool.Status.FailureMessage != nil { log.Info("Error state detected, skipping reconciliation") return reconcile.Result{}, nil } - // If the AzureMachine doesn't have our finalizer, add it. - if controllerutil.AddFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) { + // Add the finalizer and the InfrastructureMachineKind if it is not already set, and patch if either changed. + + needsPatch := controllerutil.AddFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) + needsPatch = machinePoolScope.SetInfrastructureMachineKind() || needsPatch + if needsPatch { // Register the finalizer immediately to avoid orphaning Azure resources on delete if err := machinePoolScope.PatchObject(ctx); err != nil { return reconcile.Result{}, err @@ -385,6 +390,37 @@ func (ampr *AzureMachinePoolReconciler) reconcileDelete(ctx context.Context, mac } } + // Block deletion until all AzureMachinePoolMachines are finished deleting. + ampms, err := machinePoolScope.GetMachinePoolMachines(ctx) + if err != nil { + return reconcile.Result{}, errors.Wrapf(err, "error finding AzureMachinePoolMachines while deleting AzureMachinePool %s/%s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.Name()) + } + + if len(ampms) > 0 { + log.Info("AzureMachinePool still has dependent AzureMachinePoolMachines, deleting them first and requeing", "count", len(ampms)) + + var errs []error + + for _, ampm := range ampms { + if !ampm.GetDeletionTimestamp().IsZero() { + // Don't handle deleted child + continue + } + + if err := machinePoolScope.DeleteMachine(ctx, ampm); err != nil { + err = errors.Wrapf(err, "error deleting AzureMachinePool %s/%s: failed to delete %s %s", machinePoolScope.AzureMachinePool.Namespace, machinePoolScope.AzureMachinePool.Name, ampm.Namespace, ampm.Name) + log.Error(err, "Error deleting AzureMachinePoolMachine", "namespace", ampm.Namespace, "name", ampm.Name) + errs = append(errs, err) + } + } + + if len(errs) > 0 { + return ctrl.Result{}, kerrors.NewAggregate(errs) + } + + return reconcile.Result{}, nil + } + // Delete succeeded, remove finalizer log.V(4).Info("removing finalizer for AzureMachinePool") controllerutil.RemoveFinalizer(machinePoolScope.AzureMachinePool, expv1.MachinePoolFinalizer) diff --git a/exp/controllers/azuremachinepoolmachine_controller.go b/exp/controllers/azuremachinepoolmachine_controller.go index 39cca8a31b9..8fab7f3a795 100644 --- a/exp/controllers/azuremachinepoolmachine_controller.go +++ b/exp/controllers/azuremachinepoolmachine_controller.go @@ -35,6 +35,7 @@ import ( "sigs.k8s.io/cluster-api-provider-azure/pkg/coalescing" "sigs.k8s.io/cluster-api-provider-azure/util/reconciler" "sigs.k8s.io/cluster-api-provider-azure/util/tele" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" @@ -110,6 +111,15 @@ func (ampmr *AzureMachinePoolMachineController) SetupWithManager(ctx context.Con return errors.Wrapf(err, "failed adding a watch for AzureMachinePool model changes") } + // Add a watch on CAPI Machines for MachinePool Machines + if err := c.Watch( + source.Kind(mgr.GetCache(), &clusterv1.Machine{}), + handler.EnqueueRequestsFromMapFunc(util.MachineToInfrastructureMapFunc(infrav1exp.GroupVersion.WithKind("AzureMachinePoolMachine"))), + predicates.ResourceNotPausedAndHasFilterLabel(log, ampmr.WatchFilterValue), + ); err != nil { + return errors.Wrapf(err, "failed adding a watch for Machine model changes") + } + return nil } @@ -118,6 +128,7 @@ func (ampmr *AzureMachinePoolMachineController) SetupWithManager(ctx context.Con // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=azuremachinepoolmachines/status,verbs=get;update;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;delete // +kubebuilder:rbac:groups="",resources=events,verbs=get;list;watch;create;update;patch // +kubebuilder:rbac:groups="",resources=secrets;,verbs=get;list;watch // +kubebuilder:rbac:groups=core,resources=nodes,verbs=get;list;watch @@ -138,29 +149,55 @@ func (ampmr *AzureMachinePoolMachineController) Reconcile(ctx context.Context, r ctx, cancel := context.WithTimeout(ctx, reconciler.DefaultedLoopTimeout(ampmr.ReconcileTimeout)) defer cancel() - machine := &infrav1exp.AzureMachinePoolMachine{} - err := ampmr.Get(ctx, req.NamespacedName, machine) + azureMachine := &infrav1exp.AzureMachinePoolMachine{} + err := ampmr.Get(ctx, req.NamespacedName, azureMachine) if err != nil { if apierrors.IsNotFound(err) { return reconcile.Result{}, nil } return reconcile.Result{}, err } + logger.V(2).Info("Fetching cluster for AzureMachinePoolMachine", "ampm", azureMachine.Name) + + // Fetch the Cluster. + cluster, err := util.GetClusterFromMetadata(ctx, ampmr.Client, azureMachine.ObjectMeta) + if err != nil { + logger.Info("AzureMachinePoolMachine is missing cluster label or cluster does not exist") + return reconcile.Result{}, nil + } + + logger = logger.WithValues("cluster", cluster.Name) + + // Return early if the object or Cluster is paused. + if annotations.IsPaused(cluster, azureMachine) { + logger.Info("AzureMachinePoolMachine or linked Cluster is marked as paused. Won't reconcile") + return ctrl.Result{}, nil + } + + clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampmr.Client, cluster) + if err != nil { + return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name) + } + logger.V(2).Info("Fetching AzureMachinePool with object meta", "meta", azureMachine.ObjectMeta) // Fetch the owning AzureMachinePool (VMSS) - azureMachinePool, err := infracontroller.GetOwnerAzureMachinePool(ctx, ampmr.Client, machine.ObjectMeta) + azureMachinePool, err := infracontroller.GetOwnerAzureMachinePool(ctx, ampmr.Client, azureMachine.ObjectMeta) if err != nil { if apierrors.IsNotFound(err) { - controllerutil.RemoveFinalizer(machine, infrav1exp.AzureMachinePoolMachineFinalizer) - return reconcile.Result{}, ampmr.Client.Update(ctx, machine) + logger.Info("AzureMachinePool not found error missing, removing finalizer", "azureMachinePoolMachine", azureMachine.Name) + controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer) + return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine) } return reconcile.Result{}, err } - - if azureMachinePool != nil { - logger = logger.WithValues("azureMachinePool", azureMachinePool.Name) + if azureMachinePool == nil { + logger.Info("AzureMachinePool not found error missing, removing finalizer", "azureMachinePoolMachine", azureMachine.Name) + controllerutil.RemoveFinalizer(azureMachine, infrav1exp.AzureMachinePoolMachineFinalizer) + return reconcile.Result{}, ampmr.Client.Update(ctx, azureMachine) } + logger = logger.WithValues("azureMachinePool", azureMachinePool.Name) + // Fetch the CAPI MachinePool. machinePool, err := infracontroller.GetOwnerMachinePool(ctx, ampmr.Client, azureMachinePool.ObjectMeta) if err != nil && !apierrors.IsNotFound(err) { @@ -171,24 +208,17 @@ func (ampmr *AzureMachinePoolMachineController) Reconcile(ctx context.Context, r logger = logger.WithValues("machinePool", machinePool.Name) } - // Fetch the Cluster. - cluster, err := util.GetClusterFromMetadata(ctx, ampmr.Client, machinePool.ObjectMeta) - if err != nil { - logger.Info("MachinePool is missing cluster label or cluster does not exist") - return reconcile.Result{}, nil - } - - logger = logger.WithValues("cluster", cluster.Name) - - // Return early if the object or Cluster is paused. - if annotations.IsPaused(cluster, machine) { - logger.Info("AzureMachinePoolMachine or linked Cluster is marked as paused. Won't reconcile") - return ctrl.Result{}, nil + // Fetch the CAPI Machine. + machine, err := util.GetOwnerMachine(ctx, ampmr.Client, azureMachine.ObjectMeta) + if err != nil && !apierrors.IsNotFound(err) { + return reconcile.Result{}, err } - clusterScope, err := infracontroller.GetClusterScoper(ctx, logger, ampmr.Client, cluster) - if err != nil { - return reconcile.Result{}, errors.Wrapf(err, "failed to create cluster scope for cluster %s/%s", cluster.Namespace, cluster.Name) + if machine != nil { + logger = logger.WithValues("machine", machine.Name) + } else { + logger.Info("Waiting for Machine Controller to set OwnerRef on AzureMachinePoolMachine") + return reconcile.Result{}, nil } // Create the machine pool scope @@ -196,7 +226,8 @@ func (ampmr *AzureMachinePoolMachineController) Reconcile(ctx context.Context, r Client: ampmr.Client, MachinePool: machinePool, AzureMachinePool: azureMachinePool, - AzureMachinePoolMachine: machine, + AzureMachinePoolMachine: azureMachine, + Machine: machine, ClusterScope: clusterScope, }) if err != nil { @@ -211,8 +242,8 @@ func (ampmr *AzureMachinePoolMachineController) Reconcile(ctx context.Context, r }() // Handle deleted machine pools machine - if !machine.ObjectMeta.DeletionTimestamp.IsZero() { - return ampmr.reconcileDelete(ctx, machineScope) + if !azureMachine.ObjectMeta.DeletionTimestamp.IsZero() { + return ampmr.reconcileDelete(ctx, machineScope, clusterScope) } if !cluster.Status.InfrastructureReady { @@ -289,22 +320,29 @@ func (ampmr *AzureMachinePoolMachineController) reconcileNormal(ctx context.Cont return reconcile.Result{}, nil } -func (ampmr *AzureMachinePoolMachineController) reconcileDelete(ctx context.Context, machineScope *scope.MachinePoolMachineScope) (_ reconcile.Result, reterr error) { +func (ampmr *AzureMachinePoolMachineController) reconcileDelete(ctx context.Context, machineScope *scope.MachinePoolMachineScope, clusterScope infracontroller.ClusterScoper) (_ reconcile.Result, reterr error) { ctx, log, done := tele.StartSpanWithLogger(ctx, "controllers.AzureMachinePoolMachineController.reconcileDelete") defer done() - log.Info("Handling deleted AzureMachinePoolMachine") + if !infracontroller.ShouldDeleteIndividualResources(ctx, clusterScope) { + log.Info("Skipping VMSS VM deletion as the whole resource group is being deleted") + + controllerutil.RemoveFinalizer(machineScope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer) + return reconcile.Result{}, nil + } + + if !machineScope.AzureMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { + log.Info("Skipping VMSS VM deletion as VMSS delete will delete individual instances") - if machineScope.AzureMachinePool == nil || !machineScope.AzureMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { - // deleting the entire VMSS, so just remove finalizer and VMSS delete remove the underlying infrastructure. controllerutil.RemoveFinalizer(machineScope.AzureMachinePoolMachine, infrav1exp.AzureMachinePoolMachineFinalizer) return reconcile.Result{}, nil } + log.Info("Deleting AzureMachinePoolMachine") + // deleting a single machine - // 1) drain the node (TODO: @devigned) - // 2) after drained, delete the infrastructure - // 3) remove finalizer + // 1) delete the infrastructure, node drain already done by owner Machine + // 2) remove finalizer ampms, err := ampmr.reconcilerFactory(machineScope) if err != nil { @@ -379,11 +417,6 @@ func (r *azureMachinePoolMachineReconciler) Delete(ctx context.Context) error { } }() - // cordon and drain stuff - if err := r.Scope.CordonAndDrain(ctx); err != nil { - return errors.Wrap(err, "failed to cordon and drain the scalesetVMs") - } - if err := r.scalesetVMsService.Delete(ctx); err != nil { return errors.Wrap(err, "failed to reconcile scalesetVMs") } diff --git a/exp/controllers/azuremachinepoolmachine_controller_test.go b/exp/controllers/azuremachinepoolmachine_controller_test.go index 7f4c4d176bc..bc536d64dec 100644 --- a/exp/controllers/azuremachinepoolmachine_controller_test.go +++ b/exp/controllers/azuremachinepoolmachine_controller_test.go @@ -38,6 +38,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) @@ -50,9 +51,9 @@ func TestAzureMachinePoolMachineReconciler_Reconcile(t *testing.T) { { Name: "should successfully reconcile", Setup: func(cb *fake.ClientBuilder, reconciler *mock_azure.MockReconcilerMockRecorder) { - cluster, azCluster, mp, amp, ampm := getAReadyMachinePoolMachineCluster() + objects := getReadyMachinePoolMachineClusterObjects(false) reconciler.Reconcile(gomock2.AContext()).Return(nil) - cb.WithObjects(cluster, azCluster, mp, amp, ampm) + cb.WithObjects(objects...) }, Verify: func(g *WithT, result ctrl.Result, err error) { g.Expect(err).NotTo(HaveOccurred()) @@ -61,12 +62,9 @@ func TestAzureMachinePoolMachineReconciler_Reconcile(t *testing.T) { { Name: "should successfully delete", Setup: func(cb *fake.ClientBuilder, reconciler *mock_azure.MockReconcilerMockRecorder) { - cluster, azCluster, mp, amp, ampm := getAReadyMachinePoolMachineCluster() - ampm.DeletionTimestamp = &metav1.Time{ - Time: time.Now(), - } + objects := getReadyMachinePoolMachineClusterObjects(true) reconciler.Delete(gomock2.AContext()).Return(nil) - cb.WithObjects(cluster, azCluster, mp, amp, ampm) + cb.WithObjects(objects...) }, Verify: func(g *WithT, result ctrl.Result, err error) { g.Expect(err).NotTo(HaveOccurred()) @@ -118,7 +116,7 @@ func TestAzureMachinePoolMachineReconciler_Reconcile(t *testing.T) { } } -func getAReadyMachinePoolMachineCluster() (*clusterv1.Cluster, *infrav1.AzureCluster, *expv1.MachinePool, *infrav1exp.AzureMachinePool, *infrav1exp.AzureMachinePoolMachine) { +func getReadyMachinePoolMachineClusterObjects(ampmIsDeleting bool) []client.Object { azCluster := &infrav1.AzureCluster{ TypeMeta: metav1.TypeMeta{ Kind: "AzureCluster", @@ -184,20 +182,51 @@ func getAReadyMachinePoolMachineCluster() (*clusterv1.Cluster, *infrav1.AzureClu }, } + ma := &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ma1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: mp.Name, + Kind: "MachinePool", + APIVersion: expv1.GroupVersion.String(), + }, + }, + Labels: map[string]string{ + "cluster.x-k8s.io/cluster-name": cluster.Name, + }, + }, + } + ampm := &infrav1exp.AzureMachinePoolMachine{ ObjectMeta: metav1.ObjectMeta{ Name: "ampm1", Namespace: "default", Finalizers: []string{"test"}, + Labels: map[string]string{ + clusterv1.ClusterNameLabel: cluster.Name, + }, OwnerReferences: []metav1.OwnerReference{ { Name: amp.Name, Kind: "AzureMachinePool", APIVersion: infrav1exp.GroupVersion.String(), }, + { + Name: ma.Name, + Kind: "Machine", + APIVersion: clusterv1.GroupVersion.String(), + }, }, }, } - return cluster, azCluster, mp, amp, ampm + if ampmIsDeleting { + ampm.DeletionTimestamp = &metav1.Time{ + Time: time.Now(), + } + } + + return []client.Object{cluster, azCluster, mp, amp, ma, ampm} } diff --git a/go.mod b/go.mod index f0013cf9d18..34349aad6da 100644 --- a/go.mod +++ b/go.mod @@ -60,7 +60,6 @@ require ( require ( github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 // indirect - github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect github.com/Azure/go-autorest v14.2.0+incompatible // indirect github.com/Azure/go-autorest/autorest/adal v0.9.23 // indirect github.com/Azure/go-autorest/autorest/azure/cli v0.4.5 // indirect @@ -85,7 +84,6 @@ require ( github.com/blang/semver/v4 v4.0.0 // indirect github.com/cenkalti/backoff/v4 v4.2.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect - github.com/chai2010/gettext-go v1.0.2 // indirect github.com/coredns/caddy v1.1.0 // indirect github.com/coredns/corefile-migration v1.0.21 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -98,7 +96,6 @@ require ( github.com/emicklei/go-restful/v3 v3.10.2 // indirect github.com/evanphx/json-patch v5.6.0+incompatible // indirect github.com/evanphx/json-patch/v5 v5.6.0 // indirect - github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d // indirect github.com/fatih/camelcase v1.0.0 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect github.com/go-errors/errors v1.4.2 // indirect @@ -142,11 +139,9 @@ require ( github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect - github.com/mitchellh/go-wordwrap v1.0.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/moby/spdystream v0.2.0 // indirect - github.com/moby/term v0.0.0-20221205130635-1aeaba878587 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/monochromegane/go-gitignore v0.0.0-20200626010858-205db1a8cc00 // indirect @@ -160,7 +155,6 @@ require ( github.com/prometheus/client_model v0.4.1-0.20230718164431-9a2bf3000d16 // indirect github.com/prometheus/common v0.44.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect - github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/shopspring/decimal v1.3.1 // indirect github.com/spf13/afero v1.9.5 // indirect github.com/spf13/cast v1.5.1 // indirect diff --git a/go.sum b/go.sum index 9624c36ca69..21c9460eb27 100644 --- a/go.sum +++ b/go.sum @@ -78,7 +78,6 @@ github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/subscription/armsubscripti github.com/Azure/azure-service-operator/v2 v2.3.0 h1:kG0eY4bSrGOdRYTy28PbdpYLuyX6k8uv64vRx/oN4c0= github.com/Azure/azure-service-operator/v2 v2.3.0/go.mod h1:q0DFanVTyiTyRqeMu+s16zokLNXsLkCzoD4QcMmdAi0= github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 h1:UQHMgLO+TxOElx5B5HZ4hJQsoJ/PvUvKRhJHDQXO8P8= -github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= github.com/Azure/go-autorest/autorest v0.11.24/go.mod h1:G6kyRlFnTuSbEYkQGawPfsCswgme4iYf6rfSKUDzbCc= @@ -158,8 +157,6 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/chai2010/gettext-go v1.0.2 h1:1Lwwip6Q2QGsAdl/ZKPCwTe9fe0CjlUbqj5bFNSjIRk= -github.com/chai2010/gettext-go v1.0.2/go.mod h1:y+wnP2cHYaVj19NZhYKAwEMH2CI1gNHeQQ+5AjwawxA= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= @@ -181,7 +178,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= -github.com/creack/pty v1.1.18 h1:n56/Zwd5o6whRC5PMGretI4IdRLlmBXYNjScPaBgsbY= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -214,8 +210,6 @@ github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCv github.com/evanphx/json-patch v5.6.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.6.0 h1:b91NhWfaz02IuVxO9faSllyAtNXHMPkC5J8sJCLunww= github.com/evanphx/json-patch/v5 v5.6.0/go.mod h1:G79N1coSVB93tBe7j6PhzjmR3/2VvlbKOFpnXhI9Bw4= -github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d h1:105gxyaGwCFad8crR9dcMQWvV9Hvulu6hwUh4tWPJnM= -github.com/exponent-io/jsonpath v0.0.0-20151013193312-d6023ce2651d/go.mod h1:ZZMPRZwes7CROmyNKgQzC3XPs6L/G2EJLHddWejkmf4= github.com/fatih/camelcase v1.0.0 h1:hxNvNX/xYBp0ovncs8WyWZrOrpBNub/JfaMvbURyft8= github.com/fatih/camelcase v1.0.0/go.mod h1:yN2Sb0lFhZJUdVvtELVWefmrXpuZESvPmqwoZc+/fpc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -466,8 +460,6 @@ github.com/mitchellh/go-homedir v1.0.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrk github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-testing-interface v1.0.0/go.mod h1:kRemZodwjscx+RGhAo8eIhFbs2+BFgRtFPeD/KE+zxI= -github.com/mitchellh/go-wordwrap v1.0.0 h1:6GlHJ/LTGMrIJbwgdqdl2eEH8o+Exx/0m8ir9Gns0u4= -github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/mitchellh/gox v0.4.0/go.mod h1:Sd9lOJ0+aimLBi73mGofS1ycjY8lL3uZM3JPS42BGNg= github.com/mitchellh/iochan v1.0.0/go.mod h1:JwYml1nuB7xOzsp52dPpHFffvOCDupsG0QubkSMEySY= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= @@ -480,7 +472,6 @@ github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx github.com/moby/spdystream v0.2.0 h1:cjW1zVyyoiM0T7b6UoySUFqzXMoqRckQtXwGPiBhOM8= github.com/moby/spdystream v0.2.0/go.mod h1:f7i0iNDQJ059oMTcWxx8MA/zKFIuD/lY+0GqbN2Wy8c= github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= -github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -547,7 +538,6 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= @@ -822,7 +812,6 @@ golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210510120138-977fb7262007/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= diff --git a/test/e2e/azure_machinepool_drain.go b/test/e2e/azure_machinepool_drain.go deleted file mode 100644 index 8c301cc09b4..00000000000 --- a/test/e2e/azure_machinepool_drain.go +++ /dev/null @@ -1,332 +0,0 @@ -//go:build e2e -// +build e2e - -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package e2e - -import ( - "context" - "fmt" - "time" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - "github.com/pkg/errors" - v1 "k8s.io/api/apps/v1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/client-go/kubernetes" - "sigs.k8s.io/cluster-api-provider-azure/azure" - infrav1exp "sigs.k8s.io/cluster-api-provider-azure/exp/api/v1beta1" - deployments "sigs.k8s.io/cluster-api-provider-azure/test/e2e/kubernetes/deployment" - "sigs.k8s.io/cluster-api-provider-azure/test/e2e/kubernetes/node" - "sigs.k8s.io/cluster-api-provider-azure/test/e2e/kubernetes/windows" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" - "sigs.k8s.io/cluster-api/test/framework" - "sigs.k8s.io/cluster-api/util" - "sigs.k8s.io/cluster-api/util/conditions" - "sigs.k8s.io/cluster-api/util/patch" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" -) - -const ( - AzureMachinePoolDrainSpecName = "azure-mp-drain" - waitforResourceOperationTimeout = 30 * time.Second - azureMachinePoolMachineTestFinalizer = "azuremachinepoolmachine.infrastructure.cluster.x-k8s.io/test" -) - -// AzureMachinePoolDrainSpecInput is the input for AzureMachinePoolDrainSpec. -type ( - AzureMachinePoolDrainSpecInput struct { - BootstrapClusterProxy framework.ClusterProxy - Namespace *corev1.Namespace - ClusterName string - SkipCleanup bool - IPFamilies []corev1.IPFamily - } - - deployCustomizerOption func(builder *deployments.Builder, service *corev1.Service) -) - -// AzureMachinePoolDrainSpec implements a test that verifies Azure AzureMachinePool cordon and drain by creating a load -// balanced service in a MachinePool with 1+ nodes, verifies the workload is running on each of the nodes, then reduces -// the replica count -1 watching to ensure the workload is gracefully terminated and migrated to another node in the -// machine pool prior to deleting the Azure infrastructure. -func AzureMachinePoolDrainSpec(ctx context.Context, inputGetter func() AzureMachinePoolDrainSpecInput) { - input := inputGetter() - Expect(input.BootstrapClusterProxy).NotTo(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", AzureMachinePoolDrainSpecName) - Expect(input.Namespace).NotTo(BeNil(), "Invalid argument. input.Namespace can't be nil when calling %s spec", AzureMachinePoolDrainSpecName) - - var ( - bootstrapClusterProxy = input.BootstrapClusterProxy - workloadClusterProxy = input.BootstrapClusterProxy.GetWorkloadCluster(ctx, input.Namespace.Name, input.ClusterName) - clientset = workloadClusterProxy.GetClientSet() - labels = map[string]string{clusterv1.ClusterNameLabel: workloadClusterProxy.GetName()} - ) - - Expect(workloadClusterProxy).NotTo(BeNil()) - Expect(clientset).NotTo(BeNil()) - - By(fmt.Sprintf("listing AzureMachinePools in the cluster in namespace %s", input.Namespace.Name)) - ampList := &infrav1exp.AzureMachinePoolList{} - Expect(bootstrapClusterProxy.GetClient().List(ctx, ampList, client.InNamespace(input.Namespace.Name), client.MatchingLabels(labels))).To(Succeed()) - for _, amp := range ampList.Items { - testMachinePoolCordonAndDrain(ctx, bootstrapClusterProxy, workloadClusterProxy, amp) - } -} - -func testMachinePoolCordonAndDrain(ctx context.Context, mgmtClusterProxy, workloadClusterProxy framework.ClusterProxy, amp infrav1exp.AzureMachinePool) { - var ( - isWindows = amp.Spec.Template.OSDisk.OSType == azure.WindowsOS - clientset = workloadClusterProxy.GetClientSet() - owningMachinePool = func() *expv1.MachinePool { - mp, err := getOwnerMachinePool(ctx, mgmtClusterProxy.GetClient(), amp.ObjectMeta) - Expect(err).NotTo(HaveOccurred()) - return mp - }() - - machinePoolReplicas = func() int32 { - Expect(owningMachinePool.Spec.Replicas).NotTo(BeNil(), "owning machine pool replicas must not be nil") - Expect(*owningMachinePool.Spec.Replicas).To(BeNumerically(">=", 2), "owning machine pool replicas must be greater than or equal to 2") - return *owningMachinePool.Spec.Replicas - }() - - deploymentReplicas = func() int32 { - return machinePoolReplicas * 2 - }() - - customizers = []deployCustomizerOption{ - func(builder *deployments.Builder, service *corev1.Service) { - antiAffinity := corev1.PodAntiAffinity{ - PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{ - { - Weight: 90, - PodAffinityTerm: corev1.PodAffinityTerm{ - TopologyKey: corev1.LabelHostname, - LabelSelector: &metav1.LabelSelector{ - MatchExpressions: []metav1.LabelSelectorRequirement{ - { - Key: "app", - Operator: metav1.LabelSelectorOpIn, - Values: []string{ - builder.GetName(), - }, - }, - }, - }, - }, - }, - }, - } - builder.AddMachinePoolSelectors(owningMachinePool.Name). - SetReplicas(deploymentReplicas). - AddPodAntiAffinity(antiAffinity) - }, - } - ) - - By("labeling the machine pool nodes with machine pool type and name") - ampmls, err := getAzureMachinePoolMachines(ctx, mgmtClusterProxy, workloadClusterProxy, amp) - Expect(err).NotTo(HaveOccurred()) - labelNodesWithMachinePoolName(ctx, workloadClusterProxy.GetClient(), amp.Name, ampmls) - - By("adding the AzureMachinePoolMachine finalizer to the machine pool machines") - for i := range ampmls { - controllerutil.AddFinalizer(&mls[i], azureMachinePoolMachineTestFinalizer) - } - - By(fmt.Sprintf("deploying a publicly exposed HTTP service with pod anti-affinity on machine pool: %s/%s", amp.Namespace, amp.Name)) - _, _, _, cleanup := deployHTTPService(ctx, clientset, isWindows, customizers...) - defer cleanup() - - var decreasedReplicas int32 - By(fmt.Sprintf("decreasing the replica count by 1 on the machine pool: %s/%s", amp.Namespace, amp.Name)) - Eventually(func() error { - helper, err := patch.NewHelper(owningMachinePool, mgmtClusterProxy.GetClient()) - if err != nil { - LogWarning(err.Error()) - return err - } - - decreasedReplicas = *owningMachinePool.Spec.Replicas - int32(1) - owningMachinePool.Spec.Replicas = &decreasedReplicas - return helper.Patch(ctx, owningMachinePool) - }, 3*time.Minute, 3*time.Second).Should(Succeed()) - - By(fmt.Sprintf("checking for a machine to start draining for machine pool: %s/%s", amp.Namespace, amp.Name)) - Eventually(func() error { - ampmls, err := getAzureMachinePoolMachines(ctx, mgmtClusterProxy, workloadClusterProxy, amp) - if err != nil { - LogWarning(errors.Wrap(err, "failed to list the azure machine pool machines").Error()) - return err - } - - for i := range ampmls { - if conditions.IsTrue(&mls[i], clusterv1.DrainingSucceededCondition) { - controllerutil.RemoveFinalizer(&mls[i], azureMachinePoolMachineTestFinalizer) - return nil // started draining the node prior to delete - } - } - - return errors.New("no machine has started to drain") - }, 10*time.Minute, 3*time.Second).Should(Succeed()) - - By(fmt.Sprintf("waiting for the machine pool to scale down to %d replicas: %s/%s", decreasedReplicas, amp.Namespace, amp.Name)) - Eventually(func() (int32, error) { - mp, err := getOwnerMachinePool(ctx, mgmtClusterProxy.GetClient(), amp.ObjectMeta) - if err != nil { - LogWarning(err.Error()) - return 0, err - } - - return int32(len(mp.Spec.ProviderIDList)), nil - }, 10*time.Minute, 3*time.Second).Should(Equal(decreasedReplicas)) -} - -func labelNodesWithMachinePoolName(ctx context.Context, workloadClient client.Client, mpName string, ampms []infrav1exp.AzureMachinePoolMachine) { - for _, ampm := range ampms { - n := &corev1.Node{} - Eventually(func(g Gomega) { - err := workloadClient.Get(ctx, client.ObjectKey{ - Name: ampm.Status.NodeRef.Name, - Namespace: ampm.Status.NodeRef.Namespace, - }, n) - if err != nil { - LogWarning(err.Error()) - } - g.Expect(err).NotTo(HaveOccurred()) - n.Labels[clusterv1.OwnerKindAnnotation] = "MachinePool" - n.Labels[clusterv1.OwnerNameAnnotation] = mpName - err = workloadClient.Update(ctx, n) - if err != nil { - LogWarning(err.Error()) - } - g.Expect(err).NotTo(HaveOccurred()) - }, waitforResourceOperationTimeout, 3*time.Second).Should(Succeed()) - } -} - -func getAzureMachinePoolMachines(ctx context.Context, mgmtClusterProxy, workloadClusterProxy framework.ClusterProxy, amp infrav1exp.AzureMachinePool) ([]infrav1exp.AzureMachinePoolMachine, error) { - labels := map[string]string{ - clusterv1.ClusterNameLabel: workloadClusterProxy.GetName(), - infrav1exp.MachinePoolNameLabel: amp.Name, - } - ampml := &infrav1exp.AzureMachinePoolMachineList{} - if err := mgmtClusterProxy.GetClient().List(ctx, ampml, client.InNamespace(amp.Namespace), client.MatchingLabels(labels)); err != nil { - return ampml.Items, errors.Wrap(err, "failed to list the azure machine pool machines") - } - - return ampml.Items, nil -} - -// getOwnerMachinePool returns the name of MachinePool object owning the current resource. -func getOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expv1.MachinePool, error) { - for _, ref := range obj.OwnerReferences { - gv, err := schema.ParseGroupVersion(ref.APIVersion) - if err != nil { - return nil, err - } - - if ref.Kind == "MachinePool" && gv.Group == expv1.GroupVersion.Group { - mp := &expv1.MachinePool{} - Eventually(func(g Gomega) { - err := c.Get(ctx, client.ObjectKey{ - Name: ref.Name, - Namespace: obj.Namespace, - }, mp) - if err != nil { - LogWarning(err.Error()) - } - g.Expect(err).NotTo(HaveOccurred()) - }, waitforResourceOperationTimeout, 3*time.Second).Should(Succeed()) - return mp, err - } - } - - return nil, fmt.Errorf("failed to find owner machine pool for obj %+v", obj) -} - -// deployHTTPService creates a publicly exposed http service for Linux or Windows -func deployHTTPService(ctx context.Context, clientset *kubernetes.Clientset, isWindows bool, opts ...deployCustomizerOption) (*deployments.Builder, *v1.Deployment, *corev1.Service, func()) { - var ( - deploymentName = func() string { - if isWindows { - return "web-windows" + util.RandomString(6) - } - - return "web" + util.RandomString(6) - }() - webDeploymentBuilder = deployments.Create("httpd", deploymentName, corev1.NamespaceDefault) - servicesClient = clientset.CoreV1().Services(corev1.NamespaceDefault) - ports = []corev1.ServicePort{ - { - Name: "http", - Port: 80, - Protocol: corev1.ProtocolTCP, - }, - { - Name: "https", - Port: 443, - Protocol: corev1.ProtocolTCP, - }, - } - ) - - webDeploymentBuilder.AddContainerPort("http", "http", 80, corev1.ProtocolTCP) - - if isWindows { - windowsVersion, err := node.GetWindowsVersion(ctx, clientset) - Expect(err).NotTo(HaveOccurred()) - iisImage := windows.GetWindowsImage(windows.Httpd, windowsVersion) - webDeploymentBuilder.SetImage(deploymentName, iisImage) - webDeploymentBuilder.AddWindowsSelectors() - } - - elbService := webDeploymentBuilder.CreateServiceResourceSpec(ports, deployments.ExternalLoadbalancer, nil) - - for _, opt := range opts { - opt(webDeploymentBuilder, elbService) - } - - Log("creating deployment and service") - deployment, err := webDeploymentBuilder.Deploy(ctx, clientset) - Expect(err).NotTo(HaveOccurred()) - deployInput := WaitForDeploymentsAvailableInput{ - Getter: deploymentsClientAdapter{client: webDeploymentBuilder.Client(clientset)}, - Deployment: deployment, - Clientset: clientset, - } - WaitForDeploymentsAvailable(ctx, deployInput, e2eConfig.GetIntervals(AzureMachinePoolDrainSpecName, "wait-deployment")...) - - service, err := servicesClient.Create(ctx, elbService, metav1.CreateOptions{}) - Expect(err).NotTo(HaveOccurred()) - elbSvcInput := WaitForServiceAvailableInput{ - Getter: servicesClientAdapter{client: servicesClient}, - Service: elbService, - Clientset: clientset, - } - WaitForServiceAvailable(ctx, elbSvcInput, e2eConfig.GetIntervals(AzureMachinePoolDrainSpecName, "wait-service")...) - - return webDeploymentBuilder, deployment, service, func() { - Expect(servicesClient.Delete(ctx, elbService.Name, metav1.DeleteOptions{})).To(Succeed()) - Expect(webDeploymentBuilder.Client(clientset).Delete(ctx, deployment.Name, metav1.DeleteOptions{})).To(Succeed()) - } -} diff --git a/test/e2e/azure_test.go b/test/e2e/azure_test.go index 02cbab7c7e8..c47996ed31b 100644 --- a/test/e2e/azure_test.go +++ b/test/e2e/azure_test.go @@ -525,17 +525,6 @@ var _ = Describe("Workload cluster creation", func() { }) }) - By("Cordon and draining a node", func() { - AzureMachinePoolDrainSpec(ctx, func() AzureMachinePoolDrainSpecInput { - return AzureMachinePoolDrainSpecInput{ - BootstrapClusterProxy: bootstrapClusterProxy, - Namespace: namespace, - ClusterName: clusterName, - SkipCleanup: skipCleanup, - } - }) - }) - By("PASSED!") }) })