From 8bdd6de3fdd6d3121122a38afaa044b24cb6db19 Mon Sep 17 00:00:00 2001 From: Marwan Ahmed Date: Sun, 6 Jun 2021 13:16:14 -0700 Subject: [PATCH 1/4] annotate fakeNodes so that cloudprovider implementations can identify them if needed --- cluster-autoscaler/clusterstate/clusterstate.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index 08126c2a9d74..7f20266140e5 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -51,6 +51,14 @@ const ( // NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset. NodeGroupBackoffResetTimeout = 3 * time.Hour + + // FakeNodeReasonAnnotation is an annotation added to the fake placeholder nodes CA has created + // Note that this don't map to real nodes in k8s and are merely used for error handling + FakeNodeReasonAnnotation = "k8s.io/cluster-autoscaler/fake-node-reason" + // FakeNodeUnregistered represents a node that is identified by CA as unregistered + FakeNodeUnregistered = "unregistered" + // FakeNodeCreateError represents a node that is identified by CA as a created node with errors + FakeNodeCreateError = "create-error" ) // ScaleUpRequest contains information about the requested node group scale up. @@ -944,7 +952,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma for _, instance := range instances { if !registered.Has(instance.Id) { notRegistered = append(notRegistered, UnregisteredNode{ - Node: fakeNode(instance), + Node: fakeNode(instance, FakeNodeUnregistered), UnregisteredSince: time, }) } @@ -1091,7 +1099,7 @@ func (csr *ClusterStateRegistry) GetCreatedNodesWithErrors() []*apiv1.Node { _, _, instancesByErrorCode := csr.buildInstanceToErrorCodeMappings(nodeGroupInstances) for _, instances := range instancesByErrorCode { for _, instance := range instances { - nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance)) + nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance, FakeNodeCreateError)) } } } @@ -1108,10 +1116,13 @@ func (csr *ClusterStateRegistry) InvalidateNodeInstancesCacheEntry(nodeGroup clo csr.cloudProviderNodeInstancesCache.InvalidateCacheEntry(nodeGroup) } -func fakeNode(instance cloudprovider.Instance) *apiv1.Node { +func fakeNode(instance cloudprovider.Instance, reason string) *apiv1.Node { return &apiv1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: instance.Id, + Annotations: map[string]string{ + FakeNodeReasonAnnotation: reason, + }, }, Spec: apiv1.NodeSpec{ ProviderID: instance.Id, From bfb50d8a63e5266bb66f8e5e137e84fb763e7215 Mon Sep 17 00:00:00 2001 From: Marwan Ahmed Date: Tue, 8 Jun 2021 10:56:35 -0700 Subject: [PATCH 2/4] move annotations to cloudprovider package --- cluster-autoscaler/cloudprovider/cloud_provider.go | 10 ++++++++++ cluster-autoscaler/clusterstate/clusterstate.go | 14 +++----------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/cloud_provider.go b/cluster-autoscaler/cloudprovider/cloud_provider.go index 23a132f0aeb1..2543212a5b3c 100644 --- a/cluster-autoscaler/cloudprovider/cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/cloud_provider.go @@ -240,6 +240,16 @@ func (c InstanceErrorClass) String() string { } } +const ( + // FakeNodeReasonAnnotation is an annotation added to the fake placeholder nodes CA has created + // Note that this don't map to real nodes in k8s and are merely used for error handling + FakeNodeReasonAnnotation = "k8s.io/cluster-autoscaler/fake-node-reason" + // FakeNodeUnregistered represents a node that is identified by CA as unregistered + FakeNodeUnregistered = "unregistered" + // FakeNodeCreateError represents a node that is identified by CA as a created node with errors + FakeNodeCreateError = "create-error" +) + // PricingModel contains information about the node price and how it changes in time. type PricingModel interface { // NodePrice returns a price of running the given node for a given period of time. diff --git a/cluster-autoscaler/clusterstate/clusterstate.go b/cluster-autoscaler/clusterstate/clusterstate.go index 7f20266140e5..381a6024dc9b 100644 --- a/cluster-autoscaler/clusterstate/clusterstate.go +++ b/cluster-autoscaler/clusterstate/clusterstate.go @@ -51,14 +51,6 @@ const ( // NodeGroupBackoffResetTimeout is the time after last failed scale-up when the backoff duration is reset. NodeGroupBackoffResetTimeout = 3 * time.Hour - - // FakeNodeReasonAnnotation is an annotation added to the fake placeholder nodes CA has created - // Note that this don't map to real nodes in k8s and are merely used for error handling - FakeNodeReasonAnnotation = "k8s.io/cluster-autoscaler/fake-node-reason" - // FakeNodeUnregistered represents a node that is identified by CA as unregistered - FakeNodeUnregistered = "unregistered" - // FakeNodeCreateError represents a node that is identified by CA as a created node with errors - FakeNodeCreateError = "create-error" ) // ScaleUpRequest contains information about the requested node group scale up. @@ -952,7 +944,7 @@ func getNotRegisteredNodes(allNodes []*apiv1.Node, cloudProviderNodeInstances ma for _, instance := range instances { if !registered.Has(instance.Id) { notRegistered = append(notRegistered, UnregisteredNode{ - Node: fakeNode(instance, FakeNodeUnregistered), + Node: fakeNode(instance, cloudprovider.FakeNodeUnregistered), UnregisteredSince: time, }) } @@ -1099,7 +1091,7 @@ func (csr *ClusterStateRegistry) GetCreatedNodesWithErrors() []*apiv1.Node { _, _, instancesByErrorCode := csr.buildInstanceToErrorCodeMappings(nodeGroupInstances) for _, instances := range instancesByErrorCode { for _, instance := range instances { - nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance, FakeNodeCreateError)) + nodesWithCreateErrors = append(nodesWithCreateErrors, fakeNode(instance, cloudprovider.FakeNodeCreateError)) } } } @@ -1121,7 +1113,7 @@ func fakeNode(instance cloudprovider.Instance, reason string) *apiv1.Node { ObjectMeta: metav1.ObjectMeta{ Name: instance.Id, Annotations: map[string]string{ - FakeNodeReasonAnnotation: reason, + cloudprovider.FakeNodeReasonAnnotation: reason, }, }, Spec: apiv1.NodeSpec{ From ead14c83d15b95d71d73fe09402f1bd9b1d3b14e Mon Sep 17 00:00:00 2001 From: Marwan Ahmed Date: Thu, 30 Sep 2021 16:29:46 -0700 Subject: [PATCH 3/4] fix 1.19 test --- .../azure/azure_scale_set_test.go | 26 +++++++------------ 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go index 376a00af46f3..5a8bce7832fd 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go @@ -337,7 +337,7 @@ func TestDeleteNodeUnregistered(t *testing.T) { manager := newTestAzureManager(t) vmssName := "test-asg" - var vmssCapacity int64 = 2 + var vmssCapacity int64 = 3 expectedScaleSets := []compute.VirtualMachineScaleSet{ { @@ -347,18 +347,17 @@ func TestDeleteNodeUnregistered(t *testing.T) { }, }, } - expectedVMSSVMs := newTestVMSSVMList(2) + expectedVMSSVMs := newTestVMSSVMList(3) mockVMSSClient := mockvmssclient.NewMockInterface(ctrl) - mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).Times(2) + mockVMSSClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup).Return(expectedScaleSets, nil).AnyTimes() mockVMSSClient.EXPECT().DeleteInstancesAsync(gomock.Any(), manager.config.ResourceGroup, gomock.Any(), gomock.Any()).Return(nil, nil) mockVMSSClient.EXPECT().WaitForAsyncOperationResult(gomock.Any(), gomock.Any()).Return(&http.Response{StatusCode: http.StatusOK}, nil).AnyTimes() manager.azClient.virtualMachineScaleSetsClient = mockVMSSClient mockVMSSVMClient := mockvmssvmclient.NewMockInterface(ctrl) mockVMSSVMClient.EXPECT().List(gomock.Any(), manager.config.ResourceGroup, "test-asg", gomock.Any()).Return(expectedVMSSVMs, nil).AnyTimes() manager.azClient.virtualMachineScaleSetVMsClient = mockVMSSVMClient - err := manager.forceRefresh() - assert.NoError(t, err) + manager.regenerateCache() resourceLimiter := cloudprovider.NewResourceLimiter( map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000}, @@ -366,23 +365,21 @@ func TestDeleteNodeUnregistered(t *testing.T) { provider, err := BuildAzureCloudProvider(manager, resourceLimiter) assert.NoError(t, err) - registered := manager.RegisterNodeGroup( - newTestScaleSet(manager, "test-asg")) - manager.explicitlyConfigured["test-asg"] = true + registered := manager.RegisterAsg(newTestScaleSet(manager, "test-asg")) assert.True(t, registered) - err = manager.forceRefresh() - assert.NoError(t, err) + manager.regenerateCache() scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet) assert.True(t, ok) targetSize, err := scaleSet.TargetSize() assert.NoError(t, err) - assert.Equal(t, 2, targetSize) + assert.Equal(t, 3, targetSize) // annotate node with unregistered annotation annotations := make(map[string]string) annotations[cloudprovider.FakeNodeReasonAnnotation] = cloudprovider.FakeNodeUnregistered + // Perform the delete operation nodesToDelete := []*apiv1.Node{ { ObjectMeta: metav1.ObjectMeta{ @@ -399,12 +396,7 @@ func TestDeleteNodeUnregistered(t *testing.T) { // Ensure the the cached size has NOT been proactively decremented targetSize, err = scaleSet.TargetSize() assert.NoError(t, err) - assert.Equal(t, 2, targetSize) - - // Ensure that the status for the instances is Deleting - instance0, found := scaleSet.getInstanceByProviderID("azure://" + fmt.Sprintf(fakeVirtualMachineScaleSetVMID, 0)) - assert.True(t, found, true) - assert.Equal(t, instance0.Status.State, cloudprovider.InstanceDeleting) + assert.Equal(t, 3, targetSize) } func TestDeleteNoConflictRequest(t *testing.T) { From 968774059c0ae1375c04f3098f0814d1fa2fa4f6 Mon Sep 17 00:00:00 2001 From: Marwan Ahmed Date: Thu, 30 Sep 2021 17:22:54 -0700 Subject: [PATCH 4/4] remove flaky test that's removed in master --- .../cloudprovider/azure/azure_config_test.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/azure/azure_config_test.go b/cluster-autoscaler/cloudprovider/azure/azure_config_test.go index fbeb90bc21b6..7a04465ff7a2 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_config_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_config_test.go @@ -235,11 +235,3 @@ func TestOverrideDefaultRateLimitConfig(t *testing.T) { newconfig = overrideDefaultRateLimitConfig(&defaultConfigWithRateLimits.RateLimitConfig, &falseCloudProviderRateLimit.RateLimitConfig) assert.Equal(t, &falseCloudProviderRateLimit.RateLimitConfig, newconfig) } - -func TestGetSubscriptionIdFromInstanceMetadata(t *testing.T) { - // metadataURL in azure_manager.go is not available for our tests, expect fail. - result, err := getSubscriptionIdFromInstanceMetadata() - expected := "" - assert.NotNil(t, err.Error()) - assert.Equal(t, expected, result, "Verify return result failed, expected: %v, actual: %v", expected, result) -}