Skip to content

Commit

Permalink
add cores and memory limits count metrics
Browse files Browse the repository at this point in the history
This change adds metrics for `cores_limits_counts` and
`memory_limits_counts` which each have one label with two possible
values (`minimum` and `maximum`). These metrics are set once during
initialization of the cluster autosacler.

This change also adds the `max_cores_total` metric to the metrics
proposal doc, as it was previously not recorded there.
  • Loading branch information
elmiko committed Mar 30, 2021
1 parent 6dcda9d commit dcbf12c
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 0 deletions.
2 changes: 2 additions & 0 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ func buildAutoscaler() (core.Autoscaler, error) {
// These metrics should be published only once.
metrics.UpdateNapEnabled(autoscalingOptions.NodeAutoprovisioningEnabled)
metrics.UpdateMaxNodesCount(autoscalingOptions.MaxNodesTotal)
metrics.UpdateCoresLimitsCount(autoscalingOptions.MinCoresTotal, autoscalingOptions.MaxCoresTotal)
metrics.UpdateMemoryLimitsCount(autoscalingOptions.MinMemoryTotal, autoscalingOptions.MaxMemoryTotal)

// Create autoscaler.
return core.NewAutoscaler(opts)
Expand Down
30 changes: 30 additions & 0 deletions cluster-autoscaler/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,22 @@ var (
},
)

coresLimitsCount = k8smetrics.NewGaugeVec(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "cores_limits_count",
Help: "Minimum and maximum number of cores in the cluster.",
}, []string{"direction"},
)

memoryLimitsCount = k8smetrics.NewGaugeVec(
&k8smetrics.GaugeOpts{
Namespace: caNamespace,
Name: "memory_limits_count",
Help: "Minimum and maximum number of bytes of memory in cluster.",
}, []string{"direction"},
)

/**** Metrics related to autoscaler execution ****/
lastActivity = k8smetrics.NewGaugeVec(
&k8smetrics.GaugeOpts{
Expand Down Expand Up @@ -288,6 +304,8 @@ func RegisterAll() {
legacyregistry.MustRegister(nodeGroupsCount)
legacyregistry.MustRegister(unschedulablePodsCount)
legacyregistry.MustRegister(maxNodesCount)
legacyregistry.MustRegister(coresLimitsCount)
legacyregistry.MustRegister(memoryLimitsCount)
legacyregistry.MustRegister(lastActivity)
legacyregistry.MustRegister(functionDuration)
legacyregistry.MustRegister(functionDurationSummary)
Expand Down Expand Up @@ -364,6 +382,18 @@ func UpdateMaxNodesCount(nodesCount int) {
maxNodesCount.Set(float64(nodesCount))
}

// UpdateCoresLimitsCount records the minimum and maximum number of cores in the cluster
func UpdateCoresLimitsCount(minCoresCount int64, maxCoresCount int64) {
coresLimitsCount.WithLabelValues("minimum").Set(float64(minCoresCount))
coresLimitsCount.WithLabelValues("maximum").Set(float64(maxCoresCount))
}

// UpdateMemoryLimitsCount records the minimum and maximum bytes of memory in the cluster
func UpdateMemoryLimitsCount(minMemoryCount int64, maxMemoryCount int64) {
memoryLimitsCount.WithLabelValues("minimum").Set(float64(minMemoryCount))
memoryLimitsCount.WithLabelValues("maximum").Set(float64(maxMemoryCount))
}

// RegisterError records any errors preventing Cluster Autoscaler from working.
// No more than one error should be recorded per loop.
func RegisterError(err errors.AutoscalerError) {
Expand Down
3 changes: 3 additions & 0 deletions cluster-autoscaler/proposals/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ All the metrics are prefixed with `cluster_autoscaler_`.
| nodes_count | Gauge | `state`=<node-state> | Number of nodes in cluster. |
| unschedulable_pods_count | Gauge | | Number of unschedulable ("Pending") pods in the cluster. |
| node_groups_count | Gauge | `node_group_type`=<node-group-type> | Number of node groups managed by CA. |
| max_nodes_count | Gauge | | Maximum number of nodes in all node groups. |
| cores_limits_count | Gauge | `direction`=<`minimum` or `maximum`> | Minimum and maximum number of cores in the cluster. |
| memory_limits_count | Gauge | `direction`=<`minimum` or `maximum`> | Minimum and maximum number of bytes of memory in cluster. |

* `cluster_safe_to_autoscale` indicates whether cluster is healthy enough for autoscaling. CA stops all operations if significant number of nodes are unready (by default 33% as of CA 0.5.4).
* `nodes_count` records the total number of nodes, labeled by node state. Possible
Expand Down

0 comments on commit dcbf12c

Please sign in to comment.