diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 68b870786d02..ac3103973931 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -955,6 +955,7 @@ func (a *StaticAutoscaler) obtainNodeLists(cp cloudprovider.CloudProvider) ([]*a klog.Errorf("Failed to list ready nodes: %v", err) return nil, nil, caerrors.ToAutoscalerError(caerrors.ApiCallError, err) } + a.reportTaintsCount(allNodes) // Handle GPU case - allocatable GPU may be equal to 0 up to 15 minutes after // node registers as ready. See https://github.com/kubernetes/kubernetes/issues/54959 @@ -978,6 +979,13 @@ func (a *StaticAutoscaler) updateClusterState(allNodes []*apiv1.Node, nodeInfosF return nil } +func (a *StaticAutoscaler) reportTaintsCount(nodes []*apiv1.Node) { + foundTaints := taints.CountNodeTaints(nodes, a.taintConfig) + for taintType, count := range foundTaints { + metrics.ObserveNodeTaintsCount(taintType, float64(count)) + } +} + func allPodsAreNew(pods []*apiv1.Pod, currentTime time.Time) bool { if core_utils.GetOldestCreateTime(pods).Add(unschedulablePodTimeBuffer).After(currentTime) { return true diff --git a/cluster-autoscaler/metrics/metrics.go b/cluster-autoscaler/metrics/metrics.go index 8f4e0d869ddd..44939c46e17d 100644 --- a/cluster-autoscaler/metrics/metrics.go +++ b/cluster-autoscaler/metrics/metrics.go @@ -373,6 +373,15 @@ var ( Help: "Number of node groups deleted by Node Autoprovisioning.", }, ) + + nodeTaintsCount = k8smetrics.NewGaugeVec( + &k8smetrics.GaugeOpts{ + Namespace: caNamespace, + Name: "node_taints_count", + Help: "Number of taints per type used in the cluster.", + }, + []string{"type"}, + ) ) // RegisterAll registers all metrics. @@ -407,6 +416,7 @@ func RegisterAll(emitPerNodeGroupMetrics bool) { legacyregistry.MustRegister(nodeGroupCreationCount) legacyregistry.MustRegister(nodeGroupDeletionCount) legacyregistry.MustRegister(pendingNodeDeletions) + legacyregistry.MustRegister(nodeTaintsCount) if emitPerNodeGroupMetrics { legacyregistry.MustRegister(nodesGroupMinNodes) @@ -615,3 +625,8 @@ func RegisterSkippedScaleUpMemory() { func ObservePendingNodeDeletions(value int) { pendingNodeDeletions.Set(float64(value)) } + +// ObserveNodeTaintsCount records the node taints count of given type. +func ObserveNodeTaintsCount(taintType string, count float64) { + nodeTaintsCount.WithLabelValues(taintType).Set(count) +} diff --git a/cluster-autoscaler/proposals/metrics.md b/cluster-autoscaler/proposals/metrics.md index 28855b5e3598..a3d954beed0e 100644 --- a/cluster-autoscaler/proposals/metrics.md +++ b/cluster-autoscaler/proposals/metrics.md @@ -21,17 +21,18 @@ All the metrics are prefixed with `cluster_autoscaler_`. ### Cluster state -| Metric name | Metric type | Labels | Description | -| ----------- | ----------- | ------ | ----------- | -| cluster_safe_to_autoscale | Gauge | | Whether or not cluster is healthy enough for autoscaling. 1 if it is, 0 otherwise. | -| nodes_count | Gauge | `state`=<node-state> | Number of nodes in cluster. | -| unschedulable_pods_count | Gauge | | Number of unschedulable ("Pending") pods in the cluster. | -| node_groups_count | Gauge | `node_group_type`=<node-group-type> | Number of node groups managed by CA. | -| max_nodes_count | Gauge | | Maximum number of nodes in all node groups. | -| cluster_cpu_current_cores | Gauge | | | Current number of cores in the cluster, minus deleting nodes. | -| cpu_limits_cores | Gauge | `direction`=<`minimum` or `maximum`> | Minimum and maximum number of cores in the cluster. | -| cluster_memory_current_bytes | Gauge | | Current number of bytes of memory in the cluster, minus deleting nodes. | -| memory_limits_bytes | Gauge | `direction`=<`minimum` or `maximum`> | Minimum and maximum number of bytes of memory in cluster. | +| Metric name | Metric type | Labels | Description | +| ----------- | ----------- |--------------------------------------------|------------------------------------------------------------------------------------| +| cluster_safe_to_autoscale | Gauge | | Whether or not cluster is healthy enough for autoscaling. 1 if it is, 0 otherwise. | +| nodes_count | Gauge | `state`=<node-state> | Number of nodes in cluster. | +| unschedulable_pods_count | Gauge | | Number of unschedulable ("Pending") pods in the cluster. | +| node_groups_count | Gauge | `node_group_type`=<node-group-type> | Number of node groups managed by CA. | +| max_nodes_count | Gauge | | Maximum number of nodes in all node groups. | +| cluster_cpu_current_cores | Gauge | | | Current number of cores in the cluster, minus deleting nodes. | +| cpu_limits_cores | Gauge | `direction`=<`minimum` or `maximum`> | Minimum and maximum number of cores in the cluster. | +| cluster_memory_current_bytes | Gauge | | Current number of bytes of memory in the cluster, minus deleting nodes. | +| memory_limits_bytes | Gauge | `direction`=<`minimum` or `maximum`> | Minimum and maximum number of bytes of memory in cluster. | + | node_taints_count | Gauge | `type`=<taint-type> | Number of taints per type in cluster. | * `cluster_safe_to_autoscale` indicates whether cluster is healthy enough for autoscaling. CA stops all operations if significant number of nodes are unready (by default 33% as of CA 0.5.4). * `nodes_count` records the total number of nodes, labeled by node state. Possible diff --git a/cluster-autoscaler/utils/taints/taints.go b/cluster-autoscaler/utils/taints/taints.go index c3a09db60e12..8e06a44fbcc8 100644 --- a/cluster-autoscaler/utils/taints/taints.go +++ b/cluster-autoscaler/utils/taints/taints.go @@ -54,6 +54,15 @@ const ( // AWS: Indicates that a node has volumes stuck in attaching state and hence it is not fit for scheduling more pods awsNodeWithImpairedVolumesTaint = "NodeWithImpairedVolumes" + + // statusNodeTaintReportedType is the value used when reporting node taint count defined as status taint in given taintConfig. + statusNodeTaintReportedType = "status-taint" + + // startupNodeTaintReportedType is the value used when reporting node taint count defined as startup taint in given taintConfig. + startupNodeTaintReportedType = "startup-taint" + + // unlistedNodeTaintReportedType is the value used when reporting node taint count in case taint key is other than defined in explicitlyReportedNodeTaints and taintConfig. + unlistedNodeTaintReportedType = "other" ) // TaintKeySet is a set of taint key @@ -108,6 +117,23 @@ var ( // Mutable only in unit tests maxRetryDeadline time.Duration = 5 * time.Second conflictRetryInterval time.Duration = 750 * time.Millisecond + + explicitlyReportedNodeTaints = TaintKeySet{ + apiv1.TaintNodeNotReady: true, + apiv1.TaintNodeUnreachable: true, + apiv1.TaintNodeUnschedulable: true, + apiv1.TaintNodeMemoryPressure: true, + apiv1.TaintNodeDiskPressure: true, + apiv1.TaintNodeNetworkUnavailable: true, + apiv1.TaintNodePIDPressure: true, + apiv1.TaintNodeOutOfService: true, + cloudproviderapi.TaintExternalCloudProvider: true, + cloudproviderapi.TaintNodeShutdown: true, + gkeNodeTerminationHandlerTaint: true, + awsNodeWithImpairedVolumesTaint: true, + ToBeDeletedTaint: true, + DeletionCandidateTaint: true, + } ) // getKeyShortName converts taint key to short name for logging @@ -416,3 +442,50 @@ func FilterOutNodesWithStartupTaints(taintConfig TaintConfig, allNodes, readyNod } return newAllNodes, newReadyNodes } + +// CountNodeTaints counts used node taints. +func CountNodeTaints(nodes []*apiv1.Node, taintConfig TaintConfig) map[string]int { + foundTaintsCount := make(map[string]int) + for _, node := range nodes { + for _, taint := range node.Spec.Taints { + key := getTaintTypeToReport(taint.Key, taintConfig) + if _, ok := foundTaintsCount[key]; ok { + foundTaintsCount[key] += 1 + } else { + foundTaintsCount[key] = 1 + } + } + } + return foundTaintsCount +} + +func getTaintTypeToReport(key string, taintConfig TaintConfig) string { + // Track deprecated taints. + if strings.HasPrefix(key, IgnoreTaintPrefix) { + return IgnoreTaintPrefix + } + + if _, ok := explicitlyReportedNodeTaints[key]; ok { + return key + } + + if _, ok := taintConfig.StartupTaints[key]; ok { + return startupNodeTaintReportedType + } + for _, pref := range taintConfig.StartupTaintPrefixes { + if strings.HasPrefix(key, pref) { + return startupNodeTaintReportedType + } + } + + if _, ok := taintConfig.StatusTaints[key]; ok { + return statusNodeTaintReportedType + } + for _, pref := range taintConfig.StatusTaintPrefixes { + if strings.HasPrefix(key, pref) { + return statusNodeTaintReportedType + } + } + + return unlistedNodeTaintReportedType +} diff --git a/cluster-autoscaler/utils/taints/taints_test.go b/cluster-autoscaler/utils/taints/taints_test.go index fff009c81792..141728607709 100644 --- a/cluster-autoscaler/utils/taints/taints_test.go +++ b/cluster-autoscaler/utils/taints/taints_test.go @@ -572,3 +572,104 @@ func TestSanitizeTaints(t *testing.T) { assert.Equal(t, newTaints[0].Key, StatusTaintPrefix+"some-taint") assert.Equal(t, newTaints[1].Key, "test-taint") } + +func TestCountNodeTaints(t *testing.T) { + node := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-count-node-taints", + CreationTimestamp: metav1.NewTime(time.Now()), + }, + Spec: apiv1.NodeSpec{ + Taints: []apiv1.Taint{ + { + Key: IgnoreTaintPrefix + "another-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: StatusTaintPrefix + "some-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: StartupTaintPrefix + "some-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: "test-taint", + Value: "test2", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: ToBeDeletedTaint, + Value: "1", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: "ignore-me", + Value: "1", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: "status-me", + Value: "1", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: "node.kubernetes.io/memory-pressure", + Value: "1", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: "ignore-taint.cluster-autoscaler.kubernetes.io/to-be-ignored", + Value: "myValue2", + Effect: apiv1.TaintEffectNoSchedule, + }, + }, + }, + Status: apiv1.NodeStatus{ + Conditions: []apiv1.NodeCondition{}, + }, + } + node2 := &apiv1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "node-count-node-taints", + CreationTimestamp: metav1.NewTime(time.Now()), + }, + Spec: apiv1.NodeSpec{ + Taints: []apiv1.Taint{ + { + Key: StatusTaintPrefix + "some-taint", + Value: "myValue", + Effect: apiv1.TaintEffectNoSchedule, + }, + { + Key: "node.kubernetes.io/unschedulable", + Value: "1", + Effect: apiv1.TaintEffectNoSchedule, + }, + }, + }, + Status: apiv1.NodeStatus{ + Conditions: []apiv1.NodeCondition{}, + }, + } + taintConfig := TaintConfig{ + StartupTaints: map[string]bool{"ignore-me": true}, + StatusTaints: map[string]bool{"status-me": true}, + StartupTaintPrefixes: []string{IgnoreTaintPrefix, StartupTaintPrefix}, + StatusTaintPrefixes: []string{StatusTaintPrefix}, + } + want := map[string]int{ + "ignore-taint.cluster-autoscaler.kubernetes.io/": 2, + "ToBeDeletedByClusterAutoscaler": 1, + "node.kubernetes.io/memory-pressure": 1, + "node.kubernetes.io/unschedulable": 1, + "other": 1, + "startup-taint": 2, + "status-taint": 3, + } + got := CountNodeTaints([]*apiv1.Node{node, node2}, taintConfig) + assert.Equal(t, want, got) +}