Skip to content

Commit

Permalink
Merge pull request #365 from grafana/fix-scaling-dashboard-for-multi-…
Browse files Browse the repository at this point in the history
…zone-deployments

Fix scaling dashboard to work on multi-zone ingesters
  • Loading branch information
pracucci authored Jul 28, 2021
2 parents 8c2d6c0 + 28561cb commit dccf32a
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
* [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308
* [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329
* [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335
* [BUGFIX] Fixed scaling dashboard to correctly work when a Cortex service deployment spans across multiple zones (a zone is expected to have the `zone-[a-z]` suffix). #365

## 1.9.0 / 2021-05-18

Expand Down
74 changes: 58 additions & 16 deletions cortex-mixin/recording_rules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,20 @@ local utils = import 'mixin-utils/utils.libsonnet';
rules: [
{
// Convenience rule to get the number of replicas for both a deployment and a statefulset.
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
record: 'cluster_namespace_deployment:actual_replicas:count',
expr: |||
sum by (cluster, namespace, deployment) (kube_deployment_spec_replicas)
or
sum by (cluster, namespace, deployment) (
label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*)")
label_replace(
kube_deployment_spec_replicas,
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
or
sum by (cluster, namespace, deployment) (
label_replace(kube_statefulset_replicas, "deployment", "$1", "statefulset", "(.*?)(?:-zone-[a-z])?")
)
|||,
},
Expand Down Expand Up @@ -188,7 +196,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
expr: |||
ceil(
(sum by (cluster, namespace) (
cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester"}
cortex_ingester_tsdb_storage_blocks_bytes{job=~".+/ingester.*"}
) / 4)
/
avg by (cluster, namespace) (
Expand All @@ -199,18 +207,25 @@ local utils = import 'mixin-utils/utils.libsonnet';
},
{
// Convenience rule to get the CPU utilization for both a deployment and a statefulset.
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
record: 'cluster_namespace_deployment:container_cpu_usage_seconds_total:sum_rate',
expr: |||
sum by (cluster, namespace, deployment) (
label_replace(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
label_replace(
node_namespace_pod_container:container_cpu_usage_seconds_total:sum_rate,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
|||,
},
{
// Convenience rule to get the CPU request for both a deployment and a statefulset.
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_cpu_cores:sum',
expr: |||
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
Expand All @@ -223,8 +238,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
(
sum by (cluster, namespace, deployment) (
label_replace(
kube_pod_container_resource_requests_cpu_cores,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
label_replace(
kube_pod_container_resource_requests_cpu_cores,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
)
Expand All @@ -234,8 +254,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
(
sum by (cluster, namespace, deployment) (
label_replace(
kube_pod_container_resource_requests{resource="cpu"},
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
label_replace(
kube_pod_container_resource_requests{resource="cpu"},
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
)
Expand All @@ -261,18 +286,25 @@ local utils = import 'mixin-utils/utils.libsonnet';
},
{
// Convenience rule to get the Memory utilization for both a deployment and a statefulset.
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
record: 'cluster_namespace_deployment:container_memory_usage_bytes:sum',
expr: |||
sum by (cluster, namespace, deployment) (
label_replace(
container_memory_usage_bytes,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
label_replace(
container_memory_usage_bytes,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
|||,
},
{
// Convenience rule to get the Memory request for both a deployment and a statefulset.
// Multi-zone deployments are grouped together removing the "zone-X" suffix.
record: 'cluster_namespace_deployment:kube_pod_container_resource_requests_memory_bytes:sum',
expr: |||
# This recording rule is made compatible with the breaking changes introduced in kube-state-metrics v2
Expand All @@ -285,8 +317,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
(
sum by (cluster, namespace, deployment) (
label_replace(
kube_pod_container_resource_requests_memory_bytes,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
label_replace(
kube_pod_container_resource_requests_memory_bytes,
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
)
Expand All @@ -296,8 +333,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
(
sum by (cluster, namespace, deployment) (
label_replace(
kube_pod_container_resource_requests{resource="memory"},
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
label_replace(
kube_pod_container_resource_requests{resource="memory"},
"deployment", "$1", "pod", "(.*)-(?:([0-9]+)|([a-z0-9]+)-([a-z0-9]+))"
),
# The question mark in "(.*?)" is used to make it non-greedy, otherwise it
# always matches everything and the (optional) zone is not removed.
"deployment", "$1", "deployment", "(.*?)(?:-zone-[a-z])?"
)
)
)
Expand Down

0 comments on commit dccf32a

Please sign in to comment.