From 8bdd6de3fdd6d3121122a38afaa044b24cb6db19 Mon Sep 17 00:00:00 2001
From: Marwan Ahmed <marwanad@microsoft.com>
Date: Sun, 6 Jun 2021 13:16:14 -0700
Subject: [PATCH 1/4] annotate fakeNodes so that cloudprovider implementations
 can identify them if needed

---
 cluster-autoscaler/clusterstate/clusterstate.go | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go
index 08126c2a9d74..7f20266140e5 100644
--- a/cluster-autoscaler/clusterstate/clusterstate.go
+++ b/cluster-autoscaler/clusterstate/clusterstate.go
@@ -51,6 +51,14 @@ const (
 
 	// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
 	NodeGroupBackoffResetTimeout = 3 * time.Hour
+
+	// FakeNodeReasonAnnotation is an annotation added to the fake placeholder nodes CA has created
+	// Note that this don't map to real nodes in k8s and are merely used for error handling
+	FakeNodeReasonAnnotation = "k8s.io/cluster-autoscaler/fake-node-reason"
+	// FakeNodeUnregistered represents a node that is identified by CA as unregistered
+	FakeNodeUnregistered = "unregistered"
+	// FakeNodeCreateError represents a node that is identified by CA as a created node with errors
+	FakeNodeCreateError = "create-error"
 )
 
 // ScaleUpRequest contains information about the requested node group scale up.
@@ -944,7 +952,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma
 		for _, instance := range instances {
 			if !registered.Has(instance.Id) {
 				notRegistered = append(notRegistered, UnregisteredNode{
-					Node:              fakeNode(instance),
+					Node:              fakeNode(instance, FakeNodeUnregistered),
 					UnregisteredSince: time,
 				})
 			}
@@ -1091,7 +1099,7 @@ func (csr *ClusterStateRegistry) GetCreatedNodesWithErrors() []*apiv1.Node {
 		_, _, instancesByErrorCode := csr.buildInstanceToErrorCodeMappings(nodeGroupInstances)
 		for _, instances := range instancesByErrorCode {
 			for _, instance := range instances {
-				nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance))
+				nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance, FakeNodeCreateError))
 			}
 		}
 	}
@@ -1108,10 +1116,13 @@ func (csr *ClusterStateRegistry) InvalidateNodeInstancesCacheEntry(nodeGroup clo
 	csr.cloudProviderNodeInstancesCache.InvalidateCacheEntry(nodeGroup)
 }
 
-func fakeNode(instance cloudprovider.Instance) *apiv1.Node {
+func fakeNode(instance cloudprovider.Instance, reason string) *apiv1.Node {
 	return &apiv1.Node{
 		ObjectMeta: metav1.ObjectMeta{
 			Name: instance.Id,
+			Annotations: map[string]string{
+				FakeNodeReasonAnnotation: reason,
+			},
 		},
 		Spec: apiv1.NodeSpec{
 			ProviderID: instance.Id,

From bfb50d8a63e5266bb66f8e5e137e84fb763e7215 Mon Sep 17 00:00:00 2001
From: Marwan Ahmed <marwanad@microsoft.com>
Date: Tue, 8 Jun 2021 10:56:35 -0700
Subject: [PATCH 2/4] move annotations to cloudprovider package

---
 cluster-autoscaler/cloudprovider/cloud_provider.go | 10 ++++++++++
 cluster-autoscaler/clusterstate/clusterstate.go    | 14 +++-----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go
index 23a132f0aeb1..2543212a5b3c 100644
--- a/cluster-autoscaler/cloudprovider/cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/cloud_provider.go
@@ -240,6 +240,16 @@ func (c InstanceErrorClass) String() string {
 	}
 }
 
+const (
+	// FakeNodeReasonAnnotation is an annotation added to the fake placeholder nodes CA has created
+	// Note that this don't map to real nodes in k8s and are merely used for error handling
+	FakeNodeReasonAnnotation = "k8s.io/cluster-autoscaler/fake-node-reason"
+	// FakeNodeUnregistered represents a node that is identified by CA as unregistered
+	FakeNodeUnregistered = "unregistered"
+	// FakeNodeCreateError represents a node that is identified by CA as a created node with errors
+	FakeNodeCreateError = "create-error"
+)
+
 // PricingModel contains information about the node price and how it changes in time.
 type PricingModel interface {
 	// NodePrice returns a price of running the given node for a given period of time.
diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go
index 7f20266140e5..381a6024dc9b 100644
--- a/cluster-autoscaler/clusterstate/clusterstate.go
+++ b/cluster-autoscaler/clusterstate/clusterstate.go
@@ -51,14 +51,6 @@ const (
 
 	// NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset.
 	NodeGroupBackoffResetTimeout = 3 * time.Hour
-
-	// FakeNodeReasonAnnotation is an annotation added to the fake placeholder nodes CA has created
-	// Note that this don't map to real nodes in k8s and are merely used for error handling
-	FakeNodeReasonAnnotation = "k8s.io/cluster-autoscaler/fake-node-reason"
-	// FakeNodeUnregistered represents a node that is identified by CA as unregistered
-	FakeNodeUnregistered = "unregistered"
-	// FakeNodeCreateError represents a node that is identified by CA as a created node with errors
-	FakeNodeCreateError = "create-error"
 )
 
 // ScaleUpRequest contains information about the requested node group scale up.
@@ -952,7 +944,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma
 		for _, instance := range instances {
 			if !registered.Has(instance.Id) {
 				notRegistered = append(notRegistered, UnregisteredNode{
-					Node:              fakeNode(instance, FakeNodeUnregistered),
+					Node:              fakeNode(instance, cloudprovider.FakeNodeUnregistered),
 					UnregisteredSince: time,
 				})
 			}
@@ -1099,7 +1091,7 @@ func (csr *ClusterStateRegistry) GetCreatedNodesWithErrors() []*apiv1.Node {
 		_, _, instancesByErrorCode := csr.buildInstanceToErrorCodeMappings(nodeGroupInstances)
 		for _, instances := range instancesByErrorCode {
 			for _, instance := range instances {
-				nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance, FakeNodeCreateError))
+				nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance, cloudprovider.FakeNodeCreateError))
 			}
 		}
 	}
@@ -1121,7 +1113,7 @@ func fakeNode(instance cloudprovider.Instance, reason string) *apiv1.Node {
 		ObjectMeta: metav1.ObjectMeta{
 			Name: instance.Id,
 			Annotations: map[string]string{
-				FakeNodeReasonAnnotation: reason,
+				cloudprovider.FakeNodeReasonAnnotation: reason,
 			},
 		},
 		Spec: apiv1.NodeSpec{

From ead14c83d15b95d71d73fe09402f1bd9b1d3b14e Mon Sep 17 00:00:00 2001
From: Marwan Ahmed <marwanad@microsoft.com>
Date: Thu, 30 Sep 2021 16:29:46 -0700
Subject: [PATCH 3/4] fix 1.19 test

---
 .../azure/azure_scale_set_test.go             | 26 +++++++------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
index 376a00af46f3..5a8bce7832fd 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go
@@ -337,7 +337,7 @@ func TestDeleteNodeUnregistered(t *testing.T) {
 
 	manager := newTestAzureManager(t)
 	vmssName := "test-asg"
-	var vmssCapacity int64 = 2
+	var vmssCapacity int64 = 3
 
 	expectedScaleSets := []compute.VirtualMachineScaleSet{
 		{
@@ -347,18 +347,17 @@ func TestDeleteNodeUnregistered(t *testing.T) {
 			},
 		},
 	}
-	expectedVMSSVMs := newTestVMSSVMList(2)
+	expectedVMSSVMs := newTestVMSSVMList(3)
 
 	mockVMSSClient := mockvmssclient.NewMockInterface(ctrl)
-	mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).Times(2)
+	mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes()
 	mockVMSSClient.EXPECT().DeleteInstancesAsync(gomock.Any(), manager.config.ResourceGroup, gomock.Any(), gomock.Any()).Return(nil, nil)
 	mockVMSSClient.EXPECT().WaitForAsyncOperationResult(gomock.Any(), gomock.Any()).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes()
 	manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient
 	mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl)
 	mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes()
 	manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient
-	err := manager.forceRefresh()
-	assert.NoError(t, err)
+	manager.regenerateCache()
 
 	resourceLimiter := cloudprovider.NewResourceLimiter(
 		map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
@@ -366,23 +365,21 @@ func TestDeleteNodeUnregistered(t *testing.T) {
 	provider, err := BuildAzureCloudProvider(manager, resourceLimiter)
 	assert.NoError(t, err)
 
-	registered := manager.RegisterNodeGroup(
-		newTestScaleSet(manager, "test-asg"))
-	manager.explicitlyConfigured["test-asg"] = true
+	registered := manager.RegisterAsg(newTestScaleSet(manager, "test-asg"))
 	assert.True(t, registered)
-	err = manager.forceRefresh()
-	assert.NoError(t, err)
+	manager.regenerateCache()
 
 	scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet)
 	assert.True(t, ok)
 
 	targetSize, err := scaleSet.TargetSize()
 	assert.NoError(t, err)
-	assert.Equal(t, 2, targetSize)
+	assert.Equal(t, 3, targetSize)
 
 	// annotate node with unregistered annotation
 	annotations := make(map[string]string)
 	annotations[cloudprovider.FakeNodeReasonAnnotation] = cloudprovider.FakeNodeUnregistered
+	// Perform the delete operation
 	nodesToDelete := []*apiv1.Node{
 		{
 			ObjectMeta: metav1.ObjectMeta{
@@ -399,12 +396,7 @@ func TestDeleteNodeUnregistered(t *testing.T) {
 	// Ensure the the cached size has NOT been proactively decremented
 	targetSize, err = scaleSet.TargetSize()
 	assert.NoError(t, err)
-	assert.Equal(t, 2, targetSize)
-
-	// Ensure that the status for the instances is Deleting
-	instance0, found := scaleSet.getInstanceByProviderID("azure://" + fmt.Sprintf(fakeVirtualMachineScaleSetVMID, 0))
-	assert.True(t, found, true)
-	assert.Equal(t, instance0.Status.State, cloudprovider.InstanceDeleting)
+	assert.Equal(t, 3, targetSize)
 }
 
 func TestDeleteNoConflictRequest(t *testing.T) {

From 968774059c0ae1375c04f3098f0814d1fa2fa4f6 Mon Sep 17 00:00:00 2001
From: Marwan Ahmed <marwanad@microsoft.com>
Date: Thu, 30 Sep 2021 17:22:54 -0700
Subject: [PATCH 4/4] remove flaky test that's removed in master

---
 .../cloudprovider/azure/azure_config_test.go              | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/azure/azure_config_test.go b/cluster-autoscaler/cloudprovider/azure/azure_config_test.go
index fbeb90bc21b6..7a04465ff7a2 100644
--- a/cluster-autoscaler/cloudprovider/azure/azure_config_test.go
+++ b/cluster-autoscaler/cloudprovider/azure/azure_config_test.go
@@ -235,11 +235,3 @@ func TestOverrideDefaultRateLimitConfig(t *testing.T) {
 	newconfig = overrideDefaultRateLimitConfig(&defaultConfigWithRateLimits.RateLimitConfig, &falseCloudProviderRateLimit.RateLimitConfig)
 	assert.Equal(t, &falseCloudProviderRateLimit.RateLimitConfig, newconfig)
 }
-
-func TestGetSubscriptionIdFromInstanceMetadata(t *testing.T) {
-	// metadataURL in azure_manager.go is not available for our tests, expect fail.
-	result, err := getSubscriptionIdFromInstanceMetadata()
-	expected := ""
-	assert.NotNil(t, err.Error())
-	assert.Equal(t, expected, result, "Verify return result failed, expected: %v, actual: %v", expected, result)
-}