Skip to content

Commit

Permalink
dont proactively decrement azure cache for unregistered nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
marwanad committed Jun 10, 2021
1 parent c70efcf commit 756a3e1
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 5 deletions.
17 changes: 12 additions & 5 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ func (scaleSet *ScaleSet) Belongs(node *apiv1.Node) (bool, error) {
}

// DeleteInstances deletes the given instances. All instances must be controlled by the same ASG.
func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {
func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef, hasUnregisteredNodes bool) error {
if len(instances) == 0 {
return nil
}
Expand Down Expand Up @@ -405,9 +405,12 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {

// Proactively decrement scale set size so that we don't
// go below minimum node count if cache data is stale
scaleSet.sizeMutex.Lock()
scaleSet.curSize -= int64(len(instanceIDs))
scaleSet.sizeMutex.Unlock()
// only do it for non-unregistered nodes
if !hasUnregisteredNodes {
scaleSet.sizeMutex.Lock()
scaleSet.curSize -= int64(len(instanceIDs))
scaleSet.sizeMutex.Unlock()
}

// Proactively set the status of the instances to be deleted in cache
for _, instance := range instancesToDelete {
Expand All @@ -432,6 +435,7 @@ func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error {
}

refs := make([]*azureRef, 0, len(nodes))
hasUnregisteredNodes := false
for _, node := range nodes {
belongs, err := scaleSet.Belongs(node)
if err != nil {
Expand All @@ -442,13 +446,16 @@ func (scaleSet *ScaleSet) DeleteNodes(nodes []*apiv1.Node) error {
return fmt.Errorf("%s belongs to a different asg than %s", node.Name, scaleSet.Id())
}

if node.Annotations[cloudprovider.FakeNodeReasonAnnotation] == cloudprovider.FakeNodeUnregistered {
hasUnregisteredNodes = true
}
ref := &azureRef{
Name: node.Spec.ProviderID,
}
refs = append(refs, ref)
}

return scaleSet.DeleteInstances(refs)
return scaleSet.DeleteInstances(refs, hasUnregisteredNodes)
}

// Id returns ScaleSet id.
Expand Down
77 changes: 77 additions & 0 deletions cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package azure

import (
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"net/http"
"testing"
"time"
Expand Down Expand Up @@ -346,6 +347,82 @@ func TestDeleteNodes(t *testing.T) {
assert.Equal(t, instance2.Status.State, cloudprovider.InstanceDeleting)
}

func TestDeleteNodeUnregistered(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

manager := newTestAzureManager(t)
vmssName := "test-asg"
var vmssCapacity int64 = 2

expectedScaleSets := []compute.VirtualMachineScaleSet{
{
Name: &vmssName,
Sku: &compute.Sku{
Capacity: &vmssCapacity,
},
},
}
expectedVMSSVMs := newTestVMSSVMList(2)

mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).Times(2)
mockVMSSClient.EXPECT().DeleteInstancesAsync(gomock.Any(), manager.config.ResourceGroup, gomock.Any(), gomock.Any()).Return(nil, nil)
mockVMSSClient.EXPECT().WaitForAsyncOperationResult(gomock.Any(), gomock.Any()).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
err := manager.forceRefresh()
assert.NoError(t, err)

resourceLimiter := cloudprovider.NewResourceLimiter(
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})
provider, err := BuildAzureCloudProvider(manager, resourceLimiter)
assert.NoError(t, err)

registered := manager.RegisterNodeGroup(
newTestScaleSet(manager, "test-asg"))
manager.explicitlyConfigured["test-asg"] = true
assert.True(t, registered)
err = manager.forceRefresh()
assert.NoError(t, err)

scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet)
assert.True(t, ok)

targetSize, err := scaleSet.TargetSize()
assert.NoError(t, err)
assert.Equal(t, 2, targetSize)

// annotate node with unregistered annotation
annotations := make(map[string]string)
annotations[cloudprovider.FakeNodeReasonAnnotation] = cloudprovider.FakeNodeUnregistered
nodesToDelete := []*apiv1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Annotations: annotations,
},
Spec: apiv1.NodeSpec{
ProviderID: "azure://" + fmt.Sprintf(fakeVirtualMachineScaleSetVMID, 0),
},
},
}
err = scaleSet.DeleteNodes(nodesToDelete)
assert.NoError(t, err)

// Ensure the the cached size has NOT been proactively decremented
targetSize, err = scaleSet.TargetSize()
assert.NoError(t, err)
assert.Equal(t, 2, targetSize)

// Ensure that the status for the instances is Deleting
instance0, found := scaleSet.getInstanceByProviderID("azure://" + fmt.Sprintf(fakeVirtualMachineScaleSetVMID, 0))
assert.True(t, found, true)
assert.Equal(t, instance0.Status.State, cloudprovider.InstanceDeleting)
}

func TestDeleteNoConflictRequest(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down

0 comments on commit 756a3e1

Please sign in to comment.