diff --git a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go index 7b8c92a6158b..6c9a7336016b 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go @@ -20,6 +20,7 @@ import ( "fmt" "math/rand" "sort" + "strings" "sync" "time" @@ -132,7 +133,7 @@ func (as *AgentPool) GetVMIndexes() ([]int, map[int]string, error) { } indexes = append(indexes, index) - indexToVM[index] = "azure://" + *instance.ID + indexToVM[index] = "azure://" + strings.ToLower(*instance.ID) } sortedIndexes := sort.IntSlice(indexes) @@ -242,7 +243,7 @@ func (as *AgentPool) GetVirtualMachines() (instances []compute.VirtualMachine, e tags := instance.Tags vmPoolName := tags["poolName"] - if vmPoolName == nil || *vmPoolName != as.Id() { + if vmPoolName == nil || !strings.EqualFold(*vmPoolName, as.Id()) { continue } @@ -292,7 +293,7 @@ func (as *AgentPool) Belongs(node *apiv1.Node) (bool, error) { if targetAsg == nil { return false, fmt.Errorf("%s doesn't belong to a known agent pool", node.Name) } - if targetAsg.Id() != as.Id() { + if !strings.EqualFold(targetAsg.Id(), as.Id()) { return false, nil } return true, nil @@ -315,7 +316,7 @@ func (as *AgentPool) DeleteInstances(instances []*azureRef) error { return err } - if asg != commonAsg { + if !strings.EqualFold(asg.Id(), commonAsg.Id()) { return fmt.Errorf("cannot delete instance (%s) which don't belong to the same node pool (%q)", instance.GetKey(), commonAsg) } } @@ -398,7 +399,7 @@ func (as *AgentPool) Nodes() ([]cloudprovider.Instance, error) { } // To keep consistent with providerID from kubernetes cloud provider, do not convert ID to lower case. - name := "azure://" + *instance.ID + name := "azure://" + strings.ToLower(*instance.ID) nodes = append(nodes, cloudprovider.Instance{Id: name}) } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cache.go b/cluster-autoscaler/cloudprovider/azure/azure_cache.go index 345155ef335a..6f4fa56bb9ba 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_cache.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_cache.go @@ -20,6 +20,7 @@ import ( "fmt" "reflect" "regexp" + "strings" "sync" "time" @@ -63,7 +64,7 @@ func (m *asgCache) Register(asg cloudprovider.NodeGroup) bool { defer m.mutex.Unlock() for i := range m.registeredAsgs { - if existing := m.registeredAsgs[i]; existing.Id() == asg.Id() { + if existing := m.registeredAsgs[i]; strings.EqualFold(existing.Id(), asg.Id()) { if reflect.DeepEqual(existing, asg) { return false } @@ -94,7 +95,7 @@ func (m *asgCache) Unregister(asg cloudprovider.NodeGroup) bool { updated := make([]cloudprovider.NodeGroup, 0, len(m.registeredAsgs)) changed := false for _, existing := range m.registeredAsgs { - if existing.Id() == asg.Id() { + if strings.EqualFold(existing.Id(), asg.Id()) { klog.V(1).Infof("Unregistered ASG %s", asg.Id()) changed = true continue @@ -117,7 +118,8 @@ func (m *asgCache) FindForInstance(instance *azureRef, vmType string) (cloudprov m.mutex.Lock() defer m.mutex.Unlock() - if m.notInRegisteredAsg[*instance] { + inst := azureRef{Name: strings.ToLower(instance.Name)} + if m.notInRegisteredAsg[inst] { // We already know we don't own this instance. Return early and avoid // additional calls. return nil, nil @@ -125,34 +127,37 @@ func (m *asgCache) FindForInstance(instance *azureRef, vmType string) (cloudprov if vmType == vmTypeVMSS { // Omit virtual machines not managed by vmss. - if ok := virtualMachineRE.Match([]byte(instance.Name)); ok { + if ok := virtualMachineRE.Match([]byte(inst.Name)); ok { klog.V(3).Infof("Instance %q is not managed by vmss, omit it in autoscaler", instance.Name) - m.notInRegisteredAsg[*instance] = true + m.notInRegisteredAsg[inst] = true return nil, nil } } if vmType == vmTypeStandard { // Omit virtual machines with providerID not in Azure resource ID format. - if ok := virtualMachineRE.Match([]byte(instance.Name)); !ok { + if ok := virtualMachineRE.Match([]byte(inst.Name)); !ok { klog.V(3).Infof("Instance %q is not in Azure resource ID format, omit it in autoscaler", instance.Name) - m.notInRegisteredAsg[*instance] = true + m.notInRegisteredAsg[inst] = true return nil, nil } } - if asg, found := m.instanceToAsg[*instance]; found { + // Look up caches for the instance. + if asg := m.getInstanceFromCache(inst.Name); asg != nil { return asg, nil } + // Not found, regenerate the cache and try again. if err := m.regenerate(); err != nil { - return nil, fmt.Errorf("error while looking for ASG for instance %+v, error: %v", *instance, err) + return nil, fmt.Errorf("error while looking for ASG for instance %q, error: %v", instance.Name, err) } - if config, found := m.instanceToAsg[*instance]; found { - return config, nil + if asg := m.getInstanceFromCache(inst.Name); asg != nil { + return asg, nil } - m.notInRegisteredAsg[*instance] = true + // Add the instance to notInRegisteredAsg since it's unknown from Azure. + m.notInRegisteredAsg[inst] = true return nil, nil } @@ -179,3 +184,15 @@ func (m *asgCache) regenerate() error { m.instanceToAsg = newCache return nil } + +// Get node group from cache. nil would be return if not found. +// Should be call with lock protected. +func (m *asgCache) getInstanceFromCache(providerID string) cloudprovider.NodeGroup { + for instanceID, asg := range m.instanceToAsg { + if strings.EqualFold(instanceID.GetKey(), providerID) { + return asg + } + } + + return nil +} diff --git a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go index 2a112ecd61fc..15dd6aa57357 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_cloud_provider.go @@ -19,6 +19,7 @@ package azure import ( "io" "os" + "strings" "k8s.io/klog" @@ -110,6 +111,11 @@ func (azure *AzureCloudProvider) Refresh() error { return azure.azureManager.Refresh() } +// GetInstanceID gets the instance ID for the specified node. +func (azure *AzureCloudProvider) GetInstanceID(node *apiv1.Node) string { + return strings.ToLower(node.Spec.ProviderID) +} + // azureRef contains a reference to some entity in Azure world. type azureRef struct { Name string diff --git a/cluster-autoscaler/cloudprovider/azure/azure_container_service_pool.go b/cluster-autoscaler/cloudprovider/azure/azure_container_service_pool.go index 96c52e9147ee..cb382baa453b 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_container_service_pool.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_container_service_pool.go @@ -78,7 +78,7 @@ func (agentPool *ContainerServiceAgentPool) GetAKSAgentPool(agentProfiles *[]con for _, value := range *agentProfiles { profileName := *value.Name klog.V(5).Infof("AKS AgentPool profile name: %s", profileName) - if profileName == (agentPool.azureRef.Name) { + if strings.EqualFold(profileName, agentPool.azureRef.Name) { return &value } } @@ -92,7 +92,7 @@ func (agentPool *ContainerServiceAgentPool) GetACSAgentPool(agentProfiles *[]con for _, value := range *agentProfiles { profileName := *value.Name klog.V(5).Infof("ACS AgentPool profile name: %s", profileName) - if profileName == (agentPool.azureRef.Name) { + if strings.EqualFold(profileName, agentPool.azureRef.Name) { return &value } } @@ -105,7 +105,7 @@ func (agentPool *ContainerServiceAgentPool) GetACSAgentPool(agentProfiles *[]con profileName := *value.Name poolName := agentPool.azureRef.Name + "pool0" klog.V(5).Infof("Workaround match check - ACS AgentPool Profile: %s <=> Poolname: %s", profileName, poolName) - if profileName == poolName { + if strings.EqualFold(profileName, poolName) { return &value } } @@ -270,7 +270,7 @@ func (agentPool *ContainerServiceAgentPool) SetNodeCount(count int) (err error) func (agentPool *ContainerServiceAgentPool) GetProviderID(name string) string { //TODO: come with a generic way to make it work with provider id formats // in different version of k8s. - return "azure://" + name + return "azure://" + strings.ToLower(name) } //GetName extracts the name of the node (a format which underlying cloud service understands) @@ -285,7 +285,7 @@ func (agentPool *ContainerServiceAgentPool) GetName(providerID string) (string, return "", err } for _, vm := range vms { - if strings.Compare(*vm.ID, providerID) == 0 { + if strings.EqualFold(*vm.ID, providerID) { return *vm.Name, nil } } @@ -398,7 +398,7 @@ func (agentPool *ContainerServiceAgentPool) IsContainerServiceNode(tags map[stri poolName := tags["poolName"] if poolName != nil { klog.V(5).Infof("Matching agentPool name: %s with tag name: %s", agentPool.azureRef.Name, *poolName) - if *poolName == agentPool.azureRef.Name { + if strings.EqualFold(*poolName, agentPool.azureRef.Name) { return true } } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_fakes.go b/cluster-autoscaler/cloudprovider/azure/azure_fakes.go index dd1f532bb88c..395f01a4c0b4 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_fakes.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_fakes.go @@ -30,7 +30,7 @@ import ( ) const ( - fakeVirtualMachineScaleSetVMID = "/subscriptions/test-subscription-id/resourceGroups/test-asg/providers/Microsoft.Compute/virtualMachineScaleSets/agents/virtualMachines/0" + fakeVirtualMachineScaleSetVMID = "/subscriptions/test-subscription-id/resourcegroups/test-asg/providers/microsoft.compute/virtualmachinescalesets/agents/virtualmachines/0" ) // VirtualMachineScaleSetsClientMock mocks for VirtualMachineScaleSetsClient. diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go index b3a760836957..72b555afba33 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set.go @@ -48,8 +48,6 @@ type ScaleSet struct { mutex sync.Mutex lastRefresh time.Time curSize int64 - // virtualMachines holds a list of vmss instances (instanceID -> resourceID). - virtualMachines map[string]string } // NewScaleSet creates a new NewScaleSet. @@ -58,11 +56,10 @@ func NewScaleSet(spec *dynamic.NodeGroupSpec, az *AzureManager) (*ScaleSet, erro azureRef: azureRef{ Name: spec.Name, }, - minSize: spec.MinSize, - maxSize: spec.MaxSize, - manager: az, - curSize: -1, - virtualMachines: make(map[string]string), + minSize: spec.MinSize, + maxSize: spec.MaxSize, + manager: az, + curSize: -1, } return scaleSet, nil @@ -196,55 +193,24 @@ func (scaleSet *ScaleSet) IncreaseSize(delta int) error { // GetScaleSetVms returns list of nodes for the given scale set. // Note that the list results is not used directly because their resource ID format // is not consistent with Get results. -// TODO(feiskyer): use list results directly after the issue fixed in Azure VMSS API. func (scaleSet *ScaleSet) GetScaleSetVms() ([]string, error) { ctx, cancel := getContextWithCancel() defer cancel() resourceGroup := scaleSet.manager.config.ResourceGroup - result, err := scaleSet.manager.azClient.virtualMachineScaleSetVMsClient.List(ctx, resourceGroup, scaleSet.Name, "", "", "") + vmList, err := scaleSet.manager.azClient.virtualMachineScaleSetVMsClient.List(ctx, resourceGroup, scaleSet.Name, "", "", "") if err != nil { klog.Errorf("VirtualMachineScaleSetVMsClient.List failed for %s: %v", scaleSet.Name, err) return nil, err } - instanceIDs := make([]string, 0) - for _, vm := range result { - instanceIDs = append(instanceIDs, *vm.InstanceID) - } - allVMs := make([]string, 0) - for _, instanceID := range instanceIDs { - // Get from cache first. - if v, ok := scaleSet.virtualMachines[instanceID]; ok { - allVMs = append(allVMs, v) - continue - } - - // Not in cache, get from Azure API. - getCtx, getCancel := getContextWithCancel() - defer getCancel() - vm, err := scaleSet.manager.azClient.virtualMachineScaleSetVMsClient.Get(getCtx, resourceGroup, scaleSet.Name, instanceID) - if err != nil { - exists, realErr := checkResourceExistsFromError(err) - if realErr != nil { - klog.Errorf("Failed to get VirtualMachineScaleSetVM by (%s,%s), error: %v", scaleSet.Name, instanceID, err) - return nil, realErr - } - - if !exists { - klog.Warningf("Couldn't find VirtualMachineScaleSetVM by (%s,%s), assuming it has been removed", scaleSet.Name, instanceID) - continue - } - } - + for _, vm := range vmList { // The resource ID is empty string, which indicates the instance may be in deleting state. if len(*vm.ID) == 0 { continue } - // Save into cache. - scaleSet.virtualMachines[instanceID] = *vm.ID allVMs = append(allVMs, *vm.ID) } @@ -294,7 +260,7 @@ func (scaleSet *ScaleSet) Belongs(node *apiv1.Node) (bool, error) { if targetAsg == nil { return false, fmt.Errorf("%s doesn't belong to a known scale set", node.Name) } - if targetAsg.Id() != scaleSet.Id() { + if !strings.EqualFold(targetAsg.Id(), scaleSet.Id()) { return false, nil } return true, nil @@ -320,7 +286,7 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error { return err } - if asg != commonAsg { + if !strings.EqualFold(asg.Id(), commonAsg.Id()) { return fmt.Errorf("cannot delete instance (%s) which don't belong to the same Scale Set (%q)", instance.Name, commonAsg) } @@ -490,7 +456,7 @@ func (scaleSet *ScaleSet) Nodes() ([]cloudprovider.Instance, error) { instances := make([]cloudprovider.Instance, 0, len(vms)) for i := range vms { - name := "azure://" + vms[i] + name := "azure://" + strings.ToLower(vms[i]) instances = append(instances, cloudprovider.Instance{Id: name}) } diff --git a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go index f6f9b6c8ca07..d9d9451ea44a 100644 --- a/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go +++ b/cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go @@ -33,10 +33,9 @@ func newTestScaleSet(manager *AzureManager, name string) *ScaleSet { azureRef: azureRef{ Name: name, }, - manager: manager, - minSize: 1, - maxSize: 5, - virtualMachines: make(map[string]string), + manager: manager, + minSize: 1, + maxSize: 5, } } @@ -92,7 +91,7 @@ func TestBelongs(t *testing.T) { invalidNode := &apiv1.Node{ Spec: apiv1.NodeSpec{ - ProviderID: "azure:///subscriptions/test-subscrition-id/resourceGroups/invalid-asg/providers/Microsoft.Compute/virtualMachineScaleSets/agents/virtualMachines/0", + ProviderID: "azure:///subscriptions/test-subscrition-id/resourcegroups/invalid-asg/providers/microsoft.compute/virtualmachinescalesets/agents/virtualmachines/0", }, } _, err := scaleSet.Belongs(invalidNode) @@ -183,10 +182,6 @@ func TestScaleSetNodes(t *testing.T) { ss, ok := group.(*ScaleSet) assert.True(t, ok) assert.NotNil(t, ss) - assert.Equal(t, ss.virtualMachines, map[string]string{ - "0": fakeVirtualMachineScaleSetVMID, - }) - instances, err := group.Nodes() assert.NoError(t, err) assert.Equal(t, len(instances), 1)