From 6d4346bf8916920e7867f2360c5fed578735d708 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stephan=20He=C3=9Felmann?= Date: Mon, 13 Nov 2023 23:23:59 +0100 Subject: [PATCH 01/13] fix: regenerate dashboards --- .../rhacs-cluster-overview-configmap.yaml | 8 +++--- .../rhacs-cluster-overview-dashboard.yaml | 8 +++--- ...cluster-resource-adjustment-configmap.yaml | 26 +++++++++---------- ...cluster-resource-adjustment-dashboard.yaml | 26 +++++++++---------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml index 6fff80ac..2c47daab 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml @@ -739,7 +739,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:cpu_limit_ratio", + "expr": "availability_zone:acscs_worker_nodes:cpu_limit_ratio", "interval": "", "legendFormat": "Limit / {{availability_zone}}", "range": true, @@ -751,7 +751,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:cpu_request_ratio", + "expr": "availability_zone:acscs_worker_nodes:cpu_request_ratio", "hide": false, "interval": "", "legendFormat": "Request / {{availability_zone}}", @@ -848,7 +848,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:memory_limit_ratio", + "expr": "availability_zone:acscs_worker_nodes:memory_limit_ratio", "interval": "", "legendFormat": "Limit / {{availability_zone}}", "range": true, @@ -860,7 +860,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:memory_request_ratio", + "expr": "availability_zone:acscs_worker_nodes:memory_request_ratio", "hide": false, "interval": "", "legendFormat": "Request / {{availability_zone}}", diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml index 43bf9568..e6c0915f 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml @@ -739,7 +739,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:cpu_limit_ratio", + "expr": "availability_zone:acscs_worker_nodes:cpu_limit_ratio", "interval": "", "legendFormat": "Limit / {{availability_zone}}", "range": true, @@ -751,7 +751,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:cpu_request_ratio", + "expr": "availability_zone:acscs_worker_nodes:cpu_request_ratio", "hide": false, "interval": "", "legendFormat": "Request / {{availability_zone}}", @@ -848,7 +848,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:memory_limit_ratio", + "expr": "availability_zone:acscs_worker_nodes:memory_limit_ratio", "interval": "", "legendFormat": "Limit / {{availability_zone}}", "range": true, @@ -860,7 +860,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "availability_zone:strictly_worker_nodes:memory_request_ratio", + "expr": "availability_zone:acscs_worker_nodes:memory_request_ratio", "hide": false, "interval": "", "legendFormat": "Request / {{availability_zone}}", diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml index 72a9623d..4a545acb 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml @@ -921,7 +921,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(strictly_worker_nodes)", + "expr": "sum(acscs_worker_nodes)", "legendFormat": "__auto", "range": true, "refId": "A" @@ -988,7 +988,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes) / 1024 / 1024 / 1024", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", "instant": false, "legendFormat": "__auto", "range": true, @@ -1055,7 +1055,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes) / 1024 / 1024 / 1024", + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1160,7 +1160,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1232,7 +1232,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", "format": "time_series", "hide": false, "instant": false, @@ -1304,7 +1304,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes)\n ", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", "format": "time_series", "hide": false, "instant": false, @@ -1436,7 +1436,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes)\n ", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", "format": "time_series", "hide": false, "instant": false, @@ -1452,7 +1452,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) \n ", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", "format": "time_series", "hide": false, "instant": false, @@ -1548,7 +1548,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1620,7 +1620,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", "format": "time_series", "hide": false, "instant": false, @@ -1692,7 +1692,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1824,7 +1824,7 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1839,7 +1839,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", "hide": false, "legendFormat": "cpu absolute", "range": true, diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml index 804e731c..48584e77 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml @@ -921,7 +921,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(strictly_worker_nodes)", + "expr": "sum(acscs_worker_nodes)", "legendFormat": "__auto", "range": true, "refId": "A" @@ -988,7 +988,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes) / 1024 / 1024 / 1024", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", "instant": false, "legendFormat": "__auto", "range": true, @@ -1055,7 +1055,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes) / 1024 / 1024 / 1024", + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", "legendFormat": "__auto", "range": true, "refId": "A" @@ -1160,7 +1160,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1232,7 +1232,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes)", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", "format": "time_series", "hide": false, "instant": false, @@ -1304,7 +1304,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes)\n ", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", "format": "time_series", "hide": false, "instant": false, @@ -1436,7 +1436,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() strictly_worker_nodes)\n ", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", "format": "time_series", "hide": false, "instant": false, @@ -1452,7 +1452,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() strictly_worker_nodes) \n ", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", "format": "time_series", "hide": false, "instant": false, @@ -1548,7 +1548,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1620,7 +1620,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes)", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", "format": "time_series", "hide": false, "instant": false, @@ -1692,7 +1692,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1824,7 +1824,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() strictly_worker_nodes)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", "format": "time_series", "hide": false, "instant": false, @@ -1839,7 +1839,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() strictly_worker_nodes)\n", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", "hide": false, "legendFormat": "cpu absolute", "range": true, From d37893e1ddc4157d0e9c0c4ca1bf62c9bea5da14 Mon Sep 17 00:00:00 2001 From: Stephan Hesselmann Date: Tue, 14 Nov 2023 14:59:23 +0100 Subject: [PATCH 02/13] feat: add `make generate` to pre-commit hook (#166) --- .pre-commit-config.yaml | 5 +++++ scripts/run-make-generate.sh | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100755 scripts/run-make-generate.sh diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 17029307..73a15da0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,11 @@ repos: - id: end-of-file-fixer - repo: local hooks: + - id: make-generate + name: make generate + entry: ./scripts/run-make-generate.sh + language: system + pass_filenames: false - id: grafana-lint name: grafana lint entry: ./scripts/lint-grafana.sh diff --git a/scripts/run-make-generate.sh b/scripts/run-make-generate.sh new file mode 100755 index 00000000..15629235 --- /dev/null +++ b/scripts/run-make-generate.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -eu + +if ! [ -x "$(command -v jb)" ]; then + go install -a github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@latest +fi + +if ! [ -x "$(command -v jsonnet)" ]; then + go install github.com/google/go-jsonnet/cmd/jsonnet@latest +fi + +if ! [ -x "$(command -v yq)" ]; then + go install github.com/mikefarah/yq/v4@latest +fi + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]:-$0}")" &>/dev/null && pwd 2>/dev/null)" + +make -C "$SCRIPT_DIR"/.. generate From d50aa6ab3da247a4fb9fc0d9a34ae10279aa2857 Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Thu, 16 Nov 2023 15:36:29 +0100 Subject: [PATCH 03/13] ROX-20792: Add alert for operator OOM --- resources/prometheus/prometheus-rules.yaml | 31 +++++++++++++++++ .../RHACSOperatorMemoryUtilizationHigh.yaml | 34 +++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 974b65c4..126804e7 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -158,6 +158,37 @@ spec: summary: "Fleetshard synchronizer manages `{{ $value }}` centrals." description: "Fleetshard synchronizer manages `{{ $value }}` centrals. The number of Centrals should always be larger than zero in a working system. If it drops to or below zero, fleetshard synchronizer is assumed to be in a failed state." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-007-fleetshard-sync-reconciliation-error.md" + + - name: rhacs-operator + rules: + - expr: | + sum (kube_pod_info{namespace="rhacs"} + * on (pod, namespace) group_left() kube_pod_labels{namespace="rhacs", label_app="rhacs-operator"} + * on (pod, namespace) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (namespace, workload, pod) + record: rhacs_operator:namespace:workload:pod + - expr: | + rhacs_operator:namespace:workload:pod + * on (pod, namespace) group_left() sum(container_memory_max_usage_bytes{container!=""}) by (pod, namespace) + record: rhacs_operator:namespace:workload:pod:max_memory_usage_bytes + - expr: | + rhacs_operator:namespace:workload:pod + * on (pod, namespace) group_left() sum(container_spec_memory_limit_bytes{container!=""}) by (pod, namespace) + record: rhacs_operator:namespace:workload:pod:memory_limit_bytes + - expr: | + sum(rhacs_operator:namespace:workload:pod:max_memory_usage_bytes / rhacs_operator:namespace:workload:pod:memory_limit_bytes) + by (namespace, workload) + record: rhacs_operator:namespace:workload:max_memory_usage_ratio + - alert: RHACSOperatorMemoryUtilizationHigh + expr: | + rhacs_operator:namespace:workload:max_memory_usage_ratio > 0.6 + for: 5m + labels: + severity: warning + annotations: + summary: RHACS Operator '{{ $labels.workload }}' is reaching its memory limit. + description: The RHACS operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-011-rhacs-operator-unavailable.md" + - name: rhacs-aws-quota rules: - alert: RHACSCentralDBClustersUtilizationHigh diff --git a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml new file mode 100644 index 00000000..3933dc4e --- /dev/null +++ b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml @@ -0,0 +1,34 @@ +rule_files: + - /tmp/prometheus-rules-test.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: kube_pod_info{namespace="rhacs", pod="operator-pod"} + values: "1+0x20" + - series: kube_pod_labels{namespace="rhacs", pod="operator-pod", label_app="rhacs-operator"} + values: "1+0x20" + - series: namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs", pod="operator-pod", workload="operator-workload"} + values: "1+0x20" + - series: container_memory_max_usage_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + values: "50+0x10 70+0x10" + - series: container_spec_memory_limit_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + values: "100+0x20" + alert_rule_test: + - eval_time: 1m + alertname: RHACSOperatorMemoryUtilizationHigh + exp_alerts: [] + - eval_time: 16m + alertname: RHACSOperatorMemoryUtilizationHigh + exp_alerts: + - exp_labels: + alertname: RHACSOperatorMemoryUtilizationHigh + severity: warning + namespace: rhacs + workload: operator-workload + exp_annotations: + description: "The RHACS operator 'operator-workload' reached 70% of its memory limit and is at risk of being OOM killed." + summary: "RHACS Operator 'operator-workload' is reaching its memory limit." + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-011-rhacs-operator-unavailable.md" From 89f95eee59ab9057c21596e0cbe30208902bb311 Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Thu, 16 Nov 2023 15:41:09 +0100 Subject: [PATCH 04/13] ROX-20792: Add alert for operator OOM --- resources/prometheus/prometheus-rules.yaml | 2 +- .../unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 126804e7..3e09833d 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -187,7 +187,7 @@ spec: annotations: summary: RHACS Operator '{{ $labels.workload }}' is reaching its memory limit. description: The RHACS operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. - sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-011-rhacs-operator-unavailable.md" + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" - name: rhacs-aws-quota rules: diff --git a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml index 3933dc4e..42d1fe46 100644 --- a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml +++ b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml @@ -31,4 +31,4 @@ tests: exp_annotations: description: "The RHACS operator 'operator-workload' reached 70% of its memory limit and is at risk of being OOM killed." summary: "RHACS Operator 'operator-workload' is reaching its memory limit." - sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-011-rhacs-operator-unavailable.md" + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" From 51891a20bec8e89f0ec914c1cdab6a6d33ba7c96 Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Thu, 16 Nov 2023 15:59:24 +0100 Subject: [PATCH 05/13] ROX-20792: Add critical alert for operator OOM --- resources/prometheus/prometheus-rules.yaml | 11 ++++++++ .../RHACSOperatorMemoryUtilizationHigh.yaml | 28 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 3e09833d..4b2be4b0 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -188,6 +188,17 @@ spec: summary: RHACS Operator '{{ $labels.workload }}' is reaching its memory limit. description: The RHACS operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" + - alert: RHACSOperatorMemoryUtilizationCritical + expr: | + rhacs_operator:namespace:workload:max_memory_usage_ratio > 0.9 + for: 5m + labels: + severity: critical + annotations: + summary: RHACS Operator '{{ $labels.workload }}' is critically close to its memory limit. + description: The RHACS operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at high risk of being OOM killed. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" + - name: rhacs-aws-quota rules: diff --git a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml index 42d1fe46..8a01d044 100644 --- a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml +++ b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml @@ -32,3 +32,31 @@ tests: description: "The RHACS operator 'operator-workload' reached 70% of its memory limit and is at risk of being OOM killed." summary: "RHACS Operator 'operator-workload' is reaching its memory limit." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" + - interval: 1m + input_series: + - series: kube_pod_info{namespace="rhacs", pod="operator-pod"} + values: "1+0x20" + - series: kube_pod_labels{namespace="rhacs", pod="operator-pod", label_app="rhacs-operator"} + values: "1+0x20" + - series: namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs", pod="operator-pod", workload="operator-workload"} + values: "1+0x20" + - series: container_memory_max_usage_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + values: "50+0x10 95+0x10" + - series: container_spec_memory_limit_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + values: "100+0x20" + alert_rule_test: + - eval_time: 1m + alertname: RHACSOperatorMemoryUtilizationCritical + exp_alerts: [] + - eval_time: 17m + alertname: RHACSOperatorMemoryUtilizationCritical + exp_alerts: + - exp_labels: + alertname: RHACSOperatorMemoryUtilizationCritical + severity: critical + namespace: rhacs + workload: operator-workload + exp_annotations: + description: "The RHACS operator 'operator-workload' reached 95% of its memory limit and is at high risk of being OOM killed." + summary: "RHACS Operator 'operator-workload' is critically close to its memory limit." + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" From 80b5395c965dbd4d505fe693e47cfbf2c4bcb806 Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Thu, 16 Nov 2023 16:52:12 +0100 Subject: [PATCH 06/13] ROX-20792: Changed the alert to be by container --- resources/prometheus/prometheus-rules.yaml | 40 ++++++++++--------- .../RHACSOperatorMemoryUtilizationHigh.yaml | 32 +++++++-------- 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 4b2be4b0..f93b475f 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -162,41 +162,43 @@ spec: - name: rhacs-operator rules: - expr: | - sum (kube_pod_info{namespace="rhacs"} - * on (pod, namespace) group_left() kube_pod_labels{namespace="rhacs", label_app="rhacs-operator"} - * on (pod, namespace) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (namespace, workload, pod) - record: rhacs_operator:namespace:workload:pod + sum (namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs"} + * on (pod, namespace) group_left() kube_pod_labels{namespace="rhacs", label_app="rhacs-operator"}) + by (pod, namespace, workload) + record: rhacs_operator:namespace:workload:pod:container - expr: | - rhacs_operator:namespace:workload:pod - * on (pod, namespace) group_left() sum(container_memory_max_usage_bytes{container!=""}) by (pod, namespace) - record: rhacs_operator:namespace:workload:pod:max_memory_usage_bytes + sum(container_memory_max_usage_bytes{namespace="rhacs",container!~"POD|"}) by (container, pod, namespace) + * on (namespace, pod) group_left(workload) rhacs_operator:namespace:workload:pod:container + record: rhacs_operator:namespace:workload:pod:container:max_memory_usage_bytes - expr: | - rhacs_operator:namespace:workload:pod - * on (pod, namespace) group_left() sum(container_spec_memory_limit_bytes{container!=""}) by (pod, namespace) - record: rhacs_operator:namespace:workload:pod:memory_limit_bytes + sum(container_spec_memory_limit_bytes{namespace="rhacs",container!~"POD|"}) by (container, pod, namespace) + * on (namespace, pod) group_left(workload) rhacs_operator:namespace:workload:pod:container + record: rhacs_operator:namespace:workload:pod:container:memory_limit_bytes - expr: | - sum(rhacs_operator:namespace:workload:pod:max_memory_usage_bytes / rhacs_operator:namespace:workload:pod:memory_limit_bytes) - by (namespace, workload) - record: rhacs_operator:namespace:workload:max_memory_usage_ratio + sum( + rhacs_operator:namespace:workload:pod:container:max_memory_usage_bytes + / rhacs_operator:namespace:workload:pod:container:memory_limit_bytes) + by (namespace, workload, container) + record: rhacs_operator:namespace:workload:container:max_memory_usage_ratio - alert: RHACSOperatorMemoryUtilizationHigh expr: | - rhacs_operator:namespace:workload:max_memory_usage_ratio > 0.6 + rhacs_operator:namespace:workload:container:max_memory_usage_ratio > 0.6 for: 5m labels: severity: warning annotations: - summary: RHACS Operator '{{ $labels.workload }}' is reaching its memory limit. - description: The RHACS operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. + summary: The container '{{ $labels.container }}' in operator '{{ $labels.workload }}' is reaching its memory limit. + description: The container '{{ $labels.container }}' in operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" - alert: RHACSOperatorMemoryUtilizationCritical expr: | - rhacs_operator:namespace:workload:max_memory_usage_ratio > 0.9 + rhacs_operator:namespace:workload:container:max_memory_usage_ratio > 0.9 for: 5m labels: severity: critical annotations: - summary: RHACS Operator '{{ $labels.workload }}' is critically close to its memory limit. - description: The RHACS operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at high risk of being OOM killed. + summary: The container '{{ $labels.container }}' in operator '{{ $labels.workload }}' is critically reaching its memory limit. + description: The container '{{ $labels.container }}' in operator '{{ $labels.workload }}' reached {{ $value | humanizePercentage }} of its memory limit and is at high risk of being OOM killed. sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" diff --git a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml index 8a01d044..70dd0fd2 100644 --- a/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml +++ b/resources/prometheus/unit_tests/RHACSOperatorMemoryUtilizationHigh.yaml @@ -6,15 +6,13 @@ evaluation_interval: 1m tests: - interval: 1m input_series: - - series: kube_pod_info{namespace="rhacs", pod="operator-pod"} + - series: namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs",workload="operator-workload",pod="operator-pod"} values: "1+0x20" - - series: kube_pod_labels{namespace="rhacs", pod="operator-pod", label_app="rhacs-operator"} + - series: kube_pod_labels{namespace="rhacs",label_app="rhacs-operator",pod="operator-pod"} values: "1+0x20" - - series: namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs", pod="operator-pod", workload="operator-workload"} - values: "1+0x20" - - series: container_memory_max_usage_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + - series: container_memory_max_usage_bytes{namespace="rhacs", pod="operator-pod",container="manager"} values: "50+0x10 70+0x10" - - series: container_spec_memory_limit_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + - series: container_spec_memory_limit_bytes{namespace="rhacs",pod="operator-pod",container="manager"} values: "100+0x20" alert_rule_test: - eval_time: 1m @@ -28,21 +26,20 @@ tests: severity: warning namespace: rhacs workload: operator-workload + container: manager exp_annotations: - description: "The RHACS operator 'operator-workload' reached 70% of its memory limit and is at risk of being OOM killed." - summary: "RHACS Operator 'operator-workload' is reaching its memory limit." + description: The container 'manager' in operator 'operator-workload' reached 70% of its memory limit and is at risk of being OOM killed. + summary: "The container 'manager' in operator 'operator-workload' is reaching its memory limit." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" - interval: 1m input_series: - - series: kube_pod_info{namespace="rhacs", pod="operator-pod"} - values: "1+0x20" - - series: kube_pod_labels{namespace="rhacs", pod="operator-pod", label_app="rhacs-operator"} + - series: namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs",workload="operator-workload",pod="operator-pod"} values: "1+0x20" - - series: namespace_workload_pod:kube_pod_owner:relabel{namespace="rhacs", pod="operator-pod", workload="operator-workload"} + - series: kube_pod_labels{namespace="rhacs",label_app="rhacs-operator",pod="operator-pod"} values: "1+0x20" - - series: container_memory_max_usage_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} - values: "50+0x10 95+0x10" - - series: container_spec_memory_limit_bytes{namespace="rhacs", pod="operator-pod", container="operator-container"} + - series: container_memory_max_usage_bytes{namespace="rhacs", pod="operator-pod",container="manager"} + values: "50+0x10 91+0x10" + - series: container_spec_memory_limit_bytes{namespace="rhacs",pod="operator-pod",container="manager"} values: "100+0x20" alert_rule_test: - eval_time: 1m @@ -56,7 +53,8 @@ tests: severity: critical namespace: rhacs workload: operator-workload + container: manager exp_annotations: - description: "The RHACS operator 'operator-workload' reached 95% of its memory limit and is at high risk of being OOM killed." - summary: "RHACS Operator 'operator-workload' is critically close to its memory limit." + description: The container 'manager' in operator 'operator-workload' reached 91% of its memory limit and is at high risk of being OOM killed. + summary: The container 'manager' in operator 'operator-workload' is critically reaching its memory limit. sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-037-operator-memory-high.md" From d478dd6012e200e6fafcad23b1638e4d75400cb7 Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Fri, 1 Dec 2023 16:17:52 +0100 Subject: [PATCH 07/13] ROX-21228: Capacity planning dashboard --- ...cluster-resource-adjustment-configmap.yaml | 4805 ++++++++++++++--- ...cluster-resource-adjustment-dashboard.yaml | 4805 ++++++++++++++--- .../rhacs-cluster-resource-adjustment.json | 4805 ++++++++++++++--- 3 files changed, 11724 insertions(+), 2691 deletions(-) diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml index 4a545acb..cb0e1ba5 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-configmap.yaml @@ -35,7 +35,7 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 25, + "id": 23, "links": [], "liveNow": false, "panels": [ @@ -47,11 +47,2025 @@ data: "x": 0, "y": 0 }, - "id": 2, + "id": 72, "panels": [], - "title": "Question 1: How big are tenant containers?", + "title": "Overview", "type": "row" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 76, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Overview

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 9, + "x": 2, + "y": 4 + }, + "id": 48, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

acscs-worker

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 9, + "x": 13, + "y": 4 + }, + "id": 78, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

acscs-infra

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 9, + "x": 2, + "y": 7 + }, + "id": 50, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(acscs_worker_nodes)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 9, + "x": 13, + "y": 7 + }, + "id": 60, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_node_role{role=\"acscs-infra\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 9 + }, + "id": 92, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Memory

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 12 + }, + "id": 100, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 12 + }, + "id": 9, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 12 + }, + "id": 15, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 8, + "y": 12 + }, + "id": 96, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 9, + "y": 12 + }, + "id": 51, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 12 + }, + "id": 102, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 12 + }, + "id": 66, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 12 + }, + "id": 109, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 19, + "y": 12 + }, + "id": 97, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 20, + "y": 12 + }, + "id": 65, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 14 + }, + "id": 101, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 14 + }, + "id": 104, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 14 + }, + "id": 108, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 14 + }, + "id": 103, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 14 + }, + "id": 106, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 14 + }, + "id": 110, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "memory %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory abs" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 2, + "y": 16 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory %", + "range": true, + "refId": "memory %" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory abs", + "range": true, + "refId": "memory absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "memory %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory abs" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 13, + "y": 16 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory %", + "range": true, + "refId": "memory %" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) \n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory abs", + "range": true, + "refId": "memory absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 20 + }, + "id": 93, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

CPU

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 23 + }, + "id": 111, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 23 + }, + "id": 10, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 23 + }, + "id": 16, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 8, + "y": 23 + }, + "id": 98, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 9, + "y": 23 + }, + "id": 3, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 23 + }, + "id": 113, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 23 + }, + "id": 69, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 23 + }, + "id": 70, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 19, + "y": 23 + }, + "id": 99, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 20, + "y": 23 + }, + "id": 68, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 25 + }, + "id": 112, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -61,38 +2075,7 @@ data: "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -108,82 +2091,290 @@ data: } ] }, - "unit": "cpu" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 1 + "h": 2, + "w": 2, + "x": 4, + "y": 25 }, - "id": 4, + "id": 114, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "repeat": "TenantContainers", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "legendFormat": "100%ile", - "range": true, - "refId": "A" + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 25 + }, + "id": 115, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.95, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 25 + }, + "id": 116, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 25 + }, + "id": 117, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 25 + }, + "id": 118, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" } ], - "title": "Tenant $TenantContainers Container CPU Usage", - "type": "timeseries" + "type": "stat" }, { "datasource": { @@ -197,6 +2388,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -210,12 +2402,12 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "log": 2, - "type": "log" + "type": "linear" }, "showPoints": "auto", "spanNulls": false, @@ -241,96 +2433,94 @@ data: } ] }, - "unit": "bytes" + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cpu absolute" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cores" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + } + ] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 1 + "h": 4, + "w": 9, + "x": 2, + "y": 27 }, - "id": 5, + "id": 52, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "bottom", + "showLegend": false }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, - "repeat": "TenantContainers", - "repeatDirection": "v", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(1, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "legendFormat": "100%ile", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "quantile(0.95, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "quantile(0.5, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", "hide": false, - "legendFormat": "50%ile", + "instant": false, + "interval": "", + "legendFormat": "cpu %", "range": true, - "refId": "C" + "refId": "cpu %" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.1, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", "hide": false, - "legendFormat": "10%ile", + "legendFormat": "cpu absolute", "range": true, - "refId": "D" + "refId": "cpu absolute" } ], - "title": "Tenant $TenantContainers Container Memory Usage", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 17, - "panels": [], - "title": "Question 2: How big are tenant namespaces?", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -343,6 +2533,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -356,12 +2547,12 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "log": 2, - "type": "log" + "type": "linear" }, "showPoints": "auto", "spanNulls": false, @@ -386,134 +2577,224 @@ data: "value": 80 } ] - } + }, + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cpu absolute" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cores" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + } + ] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 34 + "h": 4, + "w": 9, + "x": 13, + "y": 27 }, - "id": 28, + "id": 71, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "bottom", + "showLegend": false }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "9.4.7", - "repeatDirection": "v", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "quantile(1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", "format": "time_series", - "legendFormat": "100%ile", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "cpu %", "range": true, - "refId": "A" + "refId": "cpu %" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(.95, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"})\n", "hide": false, - "legendFormat": "95%ile", + "legendFormat": "cpu absolute", "range": true, - "refId": "B" + "refId": "cpu absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 31 + }, + "id": 85, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "content": "", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "shades" }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.50, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" + "decimals": 0, + "mappings": [], + "max": 8, + "min": 6, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "GB/core" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 7, + "x": 2, + "y": 34 + }, + "id": 21, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" } ], - "title": "Tenant Namespace CPU Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 9, + "y": 34 + }, + "id": 79, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "How many GB per CPU do nodes have?\n\nThis corresponds to either memory-optimized, cpu-optimized or general purpose GB/CPU ratios.", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -528,147 +2809,60 @@ data: } ] }, - "unit": "bytes" + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, + "h": 4, + "w": 7, + "x": 15, "y": 34 }, - "id": 29, + "id": 62, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "quantile(1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "legendFormat": "100%ile", - "range": true, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"}) / 1024 / 1024 / 1024", + "instant": true, + "legendFormat": "__auto", + "range": false, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.95, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.5, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" } ], - "title": "Tenant Namespace Memory Usage", - "transformations": [], - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 42 - }, - "id": 31, - "panels": [], - "title": "Question 3: Which tenants are in the \"big head\" and need XL overrides?", - "type": "row" + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -682,91 +2876,83 @@ data: "value": 80 } ] - } + }, + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 43 + "h": 4, + "w": 7, + "x": 2, + "y": 38 }, - "id": 46, + "id": 22, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "topk(5, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", + "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Tenant Namespace CPU Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "gridPos": { + "h": 4, + "w": 6, + "x": 9, + "y": 38 + }, + "id": 80, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "How many GB / CPU are actually used\n\nThis can help to choose a more appropriate node type, either memory-optimized, cpu-optimized or general-purpose", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -782,31 +2968,32 @@ data: } ] }, - "unit": "bytes" + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 43 + "h": 4, + "w": 7, + "x": 15, + "y": 38 }, - "id": 47, + "id": 61, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -815,30 +3002,38 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "topk(5, sum by(namespace) (avg_over_time(container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}[6h])))", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / 1024 / 1024 / 1024", "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Tenant Namespace Memory Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" }, { - "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 51 + "h": 3, + "w": 20, + "x": 2, + "y": 42 }, - "id": 37, - "panels": [], - "title": "Question 4: What is our overall worker node CPU to Memory Profile? (Pick a worker node type)", - "type": "row" + "id": 125, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -847,21 +3042,21 @@ data: }, "gridPos": { "h": 3, - "w": 5, - "x": 0, - "y": 52 + "w": 20, + "x": 2, + "y": 45 }, - "id": 48, + "id": 128, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", + "content": "# Fine-Grained Adjustments\n\nFind workloads that are over or under-provisioned", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -874,6 +3069,13 @@ data: "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -888,32 +3090,85 @@ data: } ] }, - "unit": "nodes" + "unit": "decbytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + } + ] }, "gridPos": { - "h": 3, - "w": 2, - "x": 5, - "y": 52 + "h": 13, + "w": 10, + "x": 2, + "y": 48 }, - "id": 5, + "id": 127, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false }, - "textMode": "auto" + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Memory Usage / Request" + } + ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -921,14 +3176,137 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(acscs_worker_nodes)", - "legendFormat": "__auto", - "range": true, + "exemplar": false, + "expr": "sum(container_memory_usage_bytes{container!=\"\",container!=\"POD\",job=\"kubelet\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "instant": true, + "legendFormat": "{{workload}}", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "D" } ], - "title": "Worker nodes", - "type": "stat" + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "inner" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true + }, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Memory Requests", + "Value #B": "Memory Usage", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Memory Usage / Request", + "binary": { + "left": "Memory Usage", + "operator": "/", + "right": "Memory Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU Usage / Request", + "binary": { + "left": "CPU Usage", + "operator": "/", + "right": "CPU Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "CPU Requests": 5, + "CPU Usage": 4, + "CPU Usage / Request": 6, + "Memory Requests": 2, + "Memory Usage": 1, + "Memory Usage / Request": 3, + "workload": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" }, { "datasource": { @@ -940,6 +3318,13 @@ data: "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -954,33 +3339,101 @@ data: } ] }, - "unit": "GB/core" + "unit": "decbytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + } + ] }, "gridPos": { - "h": 3, - "w": 3, - "x": 7, - "y": 52 + "h": 13, + "w": 10, + "x": 12, + "y": 48 }, - "id": 22, + "id": 129, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Memory Usage" + } + ] + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_memory_usage_bytes{container!=\"\",container!=\"POD\",job=\"kubelet\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "B" }, - "textMode": "auto" - }, - "pluginVersion": "9.4.7", - "targets": [ { "datasource": { "type": "prometheus", @@ -988,93 +3441,159 @@ data: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", - "instant": false, - "legendFormat": "__auto", - "range": true, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "instant": true, + "legendFormat": "{{workload}}", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "D" } ], - "title": "Used GB/CPU ratio", - "type": "stat" + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "inner" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true + }, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Memory Requests", + "Value #B": "Memory Usage", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Memory Usage / Request", + "binary": { + "left": "Memory Usage", + "operator": "/", + "right": "Memory Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU Usage / Request", + "binary": { + "left": "CPU Usage", + "operator": "/", + "right": "CPU Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "CPU Requests": 5, + "CPU Usage": 4, + "CPU Usage / Request": 6, + "Memory Requests": 2, + "Memory Usage": 1, + "Memory Usage / Request": 3, + "workload": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "GB/core" - }, - "overrides": [] - }, "gridPos": { "h": 3, - "w": 3, - "x": 10, - "y": 52 + "w": 20, + "x": 2, + "y": 61 }, - "id": 21, + "id": 126, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "", + "mode": "html" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Nodes GB/CPU ratio", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 55 + "y": 64 }, - "id": 39, + "id": 2, "panels": [], - "title": "Question 5: What is our overall cluster worker node utilization? (Scale node count up/down)", + "repeat": "TenantContainers", + "repeatDirection": "h", + "title": "How big is ${TenantContainers}", "type": "row" }, { @@ -1083,22 +3602,22 @@ data: "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 3, - "w": 5, - "x": 0, - "y": 56 + "h": 8, + "w": 2, + "x": 4, + "y": 65 }, - "id": 20, + "id": 136, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", - "mode": "markdown" + "content": "\n \n
\n average ${TenantContainers}\n
", + "mode": "html" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -1106,144 +3625,48 @@ data: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 5, - "y": 56 + "x": 6, + "y": 65 }, - "id": 2, + "id": 165, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^memory$/", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "\n \n
\n memory\n
", + "mode": "markdown" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory", - "range": true, - "refId": "memory" - } - ], - "title": "Total Memory", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 7, - "y": 56 + "x": 8, + "y": 65 }, - "id": 9, + "id": 166, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^memory$/", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "
used
", + "mode": "markdown" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory", - "range": true, - "refId": "memory" - } - ], - "title": "Used Memory", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -1270,17 +3693,17 @@ data: } ] }, - "unit": "percentunit" + "unit": "decbytes" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 9, - "y": 56 + "x": 10, + "y": 65 }, - "id": 15, + "id": 149, "options": { "colorMode": "none", "graphMode": "none", @@ -1288,33 +3711,36 @@ data: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^memory$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "expr": "quantile(0.5, node_namespace_pod_container:container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", "legendFormat": "memory", - "range": true, - "refId": "memory" + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Used Memory", "type": "stat" }, { @@ -1329,6 +3755,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1342,11 +3769,13 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "type": "linear" + "log": 2, + "type": "log" }, "showPoints": "auto", "spanNulls": false, @@ -1372,62 +3801,29 @@ data: } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "memory %" - }, - "properties": [ - { - "id": "custom.scaleDistribution", - "value": { - "log": 2, - "type": "log" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "memory abs" - }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "unit", - "value": "decbytes" - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 8, - "w": 11, - "x": 0, - "y": 59 + "h": 4, + "w": 8, + "x": 12, + "y": 65 }, - "id": 18, + "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom", - "showLegend": false + "placement": "right", + "showLegend": true }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, - "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -1435,34 +3831,61 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", - "format": "time_series", + "expr": "quantile(1, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "legendFormat": "p100", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "quantile(0.95, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory %", + "includeNullMetadata": true, + "legendFormat": "p95", "range": true, - "refId": "memory %" + "refId": "B", + "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", - "format": "time_series", + "expr": "quantile(0.5, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory abs", + "includeNullMetadata": true, + "legendFormat": "p50", "range": true, - "refId": "memory absolute" + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "quantile(0.10, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "p10", + "range": true, + "refId": "D", + "useBackend": false } ], - "title": "Used Memory", + "title": "Tenant $TenantContainers Container Memory Usage", "type": "timeseries" }, { @@ -1471,22 +3894,22 @@ data: "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 3, - "w": 5, - "x": 0, + "h": 2, + "w": 2, + "x": 8, "y": 67 }, - "id": 49, + "id": 167, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", + "content": "
requested
", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -1514,17 +3937,17 @@ data: } ] }, - "unit": "cores" + "unit": "decbytes" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 5, + "x": 10, "y": 67 }, - "id": 3, + "id": 168, "options": { "colorMode": "none", "graphMode": "none", @@ -1532,35 +3955,86 @@ data: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], - "fields": "/^cpu$/", + "fields": "/^memory$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "expr": "quantile(0.5, kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", - "legendFormat": "cpu", - "range": true, - "refId": "cpu" + "legendFormat": "memory", + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Total CPU", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 6, + "y": 69 + }, + "id": 196, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n cpu\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 69 + }, + "id": 197, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
used
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -1586,17 +4060,17 @@ data: } ] }, - "unit": "cores" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 7, - "y": 67 + "x": 10, + "y": 69 }, - "id": 10, + "id": 198, "options": { "colorMode": "none", "graphMode": "none", @@ -1604,34 +4078,198 @@ data: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^cpu$/", "values": false }, - "textMode": "auto" - }, - "pluginVersion": "9.4.7", - "targets": [ + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "memory", + "useBackend": false + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cpu" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 12, + "y": 69 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "legendFormat": "p100", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.95, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "hide": false, + "legendFormat": "p95", + "range": true, + "refId": "B" + }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", - "format": "time_series", + "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cpu", + "includeNullMetadata": true, + "legendFormat": "p50", "range": true, - "refId": "cpu" + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "hide": false, + "legendFormat": "p10", + "range": true, + "refId": "D" } ], - "title": "Used CPU", - "type": "stat" + "title": "Tenant $TenantContainers Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 71 + }, + "id": 235, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
requested
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -1658,17 +4296,17 @@ data: } ] }, - "unit": "percentunit" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 9, - "y": 67 + "x": 10, + "y": 71 }, - "id": 16, + "id": 236, "options": { "colorMode": "none", "graphMode": "none", @@ -1676,184 +4314,557 @@ data: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^cpu$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "expr": "quantile(0.5, kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", "legendFormat": "cpu", - "range": true, - "refId": "cpu" + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Used CPU", "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 100 }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 17, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 6 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cpu absolute" + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "100%ile", + "range": true, + "refId": "A" }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - { - "id": "unit", - "value": "cores" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cpu %" + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(.95, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "95%ile", + "range": true, + "refId": "B" }, - "properties": [ - { - "id": "custom.scaleDistribution", - "value": { + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.50, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "50%ile", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "10%ile", + "range": true, + "refId": "D" + } + ], + "title": "Tenant Namespace CPU Usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "log": 2, "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } - ] - } - ] - }, + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 6 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "100%ile", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.95, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "95%ile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.5, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "50%ile", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "10%ile", + "range": true, + "refId": "D" + } + ], + "title": "Tenant Namespace Memory Usage", + "transformations": [], + "type": "timeseries" + } + ], + "title": "Question 2: How big are tenant namespaces?", + "type": "row" + }, + { + "collapsed": true, "gridPos": { - "h": 8, - "w": 11, + "h": 1, + "w": 24, "x": 0, - "y": 70 + "y": 101 }, - "id": 17, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.4.7", - "targets": [ + "id": 31, + "panels": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cpu %", - "range": true, - "refId": "cpu %" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 7 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "topk(5, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tenant Namespace CPU Usage", + "transformations": [], + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", - "hide": false, - "legendFormat": "cpu absolute", - "range": true, - "refId": "cpu absolute" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 7 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(5, sum by(namespace) (avg_over_time(container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}[6h])))", + "format": "time_series", + "instant": true, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tenant Namespace Memory Usage", + "transformations": [], + "type": "timeseries" } ], - "title": "Used CPU", - "type": "timeseries" + "title": "Question 3: Which tenants are in the \"big head\" and need XL overrides?", + "type": "row" } ], "refresh": "", "revision": 1, "schemaVersion": 38, - "style": "dark", "tags": [], "templating": { "list": [ diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml index 48584e77..bbbe165a 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml @@ -35,7 +35,7 @@ spec: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 25, + "id": 23, "links": [], "liveNow": false, "panels": [ @@ -47,11 +47,2025 @@ spec: "x": 0, "y": 0 }, - "id": 2, + "id": 72, "panels": [], - "title": "Question 1: How big are tenant containers?", + "title": "Overview", "type": "row" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 76, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Overview

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 9, + "x": 2, + "y": 4 + }, + "id": 48, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

acscs-worker

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 9, + "x": 13, + "y": 4 + }, + "id": 78, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

acscs-infra

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 9, + "x": 2, + "y": 7 + }, + "id": 50, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(acscs_worker_nodes)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 9, + "x": 13, + "y": 7 + }, + "id": 60, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_node_role{role=\"acscs-infra\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 9 + }, + "id": 92, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Memory

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 12 + }, + "id": 100, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 12 + }, + "id": 9, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 12 + }, + "id": 15, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 8, + "y": 12 + }, + "id": 96, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 9, + "y": 12 + }, + "id": 51, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 12 + }, + "id": 102, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 12 + }, + "id": 66, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 12 + }, + "id": 109, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 19, + "y": 12 + }, + "id": 97, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 20, + "y": 12 + }, + "id": 65, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 14 + }, + "id": 101, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 14 + }, + "id": 104, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 14 + }, + "id": 108, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 14 + }, + "id": 103, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 14 + }, + "id": 106, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 14 + }, + "id": 110, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "memory %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory abs" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 2, + "y": 16 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory %", + "range": true, + "refId": "memory %" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory abs", + "range": true, + "refId": "memory absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "memory %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory abs" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 13, + "y": 16 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory %", + "range": true, + "refId": "memory %" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) \n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory abs", + "range": true, + "refId": "memory absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 20 + }, + "id": 93, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

CPU

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 23 + }, + "id": 111, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 23 + }, + "id": 10, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 23 + }, + "id": 16, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 8, + "y": 23 + }, + "id": 98, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 9, + "y": 23 + }, + "id": 3, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 23 + }, + "id": 113, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 23 + }, + "id": 69, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 23 + }, + "id": 70, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 19, + "y": 23 + }, + "id": 99, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 20, + "y": 23 + }, + "id": 68, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 25 + }, + "id": 112, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -61,38 +2075,7 @@ spec: "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -108,82 +2091,290 @@ spec: } ] }, - "unit": "cpu" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 1 + "h": 2, + "w": 2, + "x": 4, + "y": 25 }, - "id": 4, + "id": 114, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "repeat": "TenantContainers", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "legendFormat": "100%ile", - "range": true, - "refId": "A" + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 25 + }, + "id": 115, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.95, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 25 + }, + "id": 116, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 25 + }, + "id": 117, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 25 + }, + "id": 118, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" } ], - "title": "Tenant $TenantContainers Container CPU Usage", - "type": "timeseries" + "type": "stat" }, { "datasource": { @@ -197,6 +2388,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -210,12 +2402,12 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "log": 2, - "type": "log" + "type": "linear" }, "showPoints": "auto", "spanNulls": false, @@ -241,96 +2433,94 @@ spec: } ] }, - "unit": "bytes" + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cpu absolute" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cores" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + } + ] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 1 + "h": 4, + "w": 9, + "x": 2, + "y": 27 }, - "id": 5, + "id": 52, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "bottom", + "showLegend": false }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, - "repeat": "TenantContainers", - "repeatDirection": "v", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(1, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "legendFormat": "100%ile", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "quantile(0.95, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "quantile(0.5, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", "hide": false, - "legendFormat": "50%ile", + "instant": false, + "interval": "", + "legendFormat": "cpu %", "range": true, - "refId": "C" + "refId": "cpu %" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.1, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", "hide": false, - "legendFormat": "10%ile", + "legendFormat": "cpu absolute", "range": true, - "refId": "D" + "refId": "cpu absolute" } ], - "title": "Tenant $TenantContainers Container Memory Usage", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 17, - "panels": [], - "title": "Question 2: How big are tenant namespaces?", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -343,6 +2533,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -356,12 +2547,12 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "log": 2, - "type": "log" + "type": "linear" }, "showPoints": "auto", "spanNulls": false, @@ -386,134 +2577,224 @@ spec: "value": 80 } ] - } + }, + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cpu absolute" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cores" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + } + ] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 34 + "h": 4, + "w": 9, + "x": 13, + "y": 27 }, - "id": 28, + "id": 71, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "bottom", + "showLegend": false }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "9.4.7", - "repeatDirection": "v", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "quantile(1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", "format": "time_series", - "legendFormat": "100%ile", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "cpu %", "range": true, - "refId": "A" + "refId": "cpu %" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(.95, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"})\n", "hide": false, - "legendFormat": "95%ile", + "legendFormat": "cpu absolute", "range": true, - "refId": "B" + "refId": "cpu absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 31 + }, + "id": 85, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "content": "", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "shades" }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.50, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" + "decimals": 0, + "mappings": [], + "max": 8, + "min": 6, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "GB/core" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 7, + "x": 2, + "y": 34 + }, + "id": 21, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" } ], - "title": "Tenant Namespace CPU Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 9, + "y": 34 + }, + "id": 79, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "How many GB per CPU do nodes have?\n\nThis corresponds to either memory-optimized, cpu-optimized or general purpose GB/CPU ratios.", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -528,147 +2809,60 @@ spec: } ] }, - "unit": "bytes" + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, + "h": 4, + "w": 7, + "x": 15, "y": 34 }, - "id": 29, + "id": 62, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "quantile(1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "legendFormat": "100%ile", - "range": true, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"}) / 1024 / 1024 / 1024", + "instant": true, + "legendFormat": "__auto", + "range": false, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.95, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.5, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" } ], - "title": "Tenant Namespace Memory Usage", - "transformations": [], - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 42 - }, - "id": 31, - "panels": [], - "title": "Question 3: Which tenants are in the \"big head\" and need XL overrides?", - "type": "row" + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -682,91 +2876,83 @@ spec: "value": 80 } ] - } + }, + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 43 + "h": 4, + "w": 7, + "x": 2, + "y": 38 }, - "id": 46, + "id": 22, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "topk(5, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", + "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Tenant Namespace CPU Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "gridPos": { + "h": 4, + "w": 6, + "x": 9, + "y": 38 + }, + "id": 80, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "How many GB / CPU are actually used\n\nThis can help to choose a more appropriate node type, either memory-optimized, cpu-optimized or general-purpose", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -782,31 +2968,32 @@ spec: } ] }, - "unit": "bytes" + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 43 + "h": 4, + "w": 7, + "x": 15, + "y": 38 }, - "id": 47, + "id": 61, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -815,30 +3002,38 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "topk(5, sum by(namespace) (avg_over_time(container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}[6h])))", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / 1024 / 1024 / 1024", "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Tenant Namespace Memory Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" }, { - "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 51 + "h": 3, + "w": 20, + "x": 2, + "y": 42 }, - "id": 37, - "panels": [], - "title": "Question 4: What is our overall worker node CPU to Memory Profile? (Pick a worker node type)", - "type": "row" + "id": 125, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -847,21 +3042,21 @@ spec: }, "gridPos": { "h": 3, - "w": 5, - "x": 0, - "y": 52 + "w": 20, + "x": 2, + "y": 45 }, - "id": 48, + "id": 128, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", + "content": "# Fine-Grained Adjustments\n\nFind workloads that are over or under-provisioned", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -874,6 +3069,13 @@ spec: "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -888,32 +3090,85 @@ spec: } ] }, - "unit": "nodes" + "unit": "decbytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + } + ] }, "gridPos": { - "h": 3, - "w": 2, - "x": 5, - "y": 52 + "h": 13, + "w": 10, + "x": 2, + "y": 48 }, - "id": 5, + "id": 127, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false }, - "textMode": "auto" + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Memory Usage / Request" + } + ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -921,14 +3176,137 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(acscs_worker_nodes)", - "legendFormat": "__auto", - "range": true, + "exemplar": false, + "expr": "sum(container_memory_usage_bytes{container!=\"\",container!=\"POD\",job=\"kubelet\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "instant": true, + "legendFormat": "{{workload}}", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "D" } ], - "title": "Worker nodes", - "type": "stat" + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "inner" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true + }, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Memory Requests", + "Value #B": "Memory Usage", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Memory Usage / Request", + "binary": { + "left": "Memory Usage", + "operator": "/", + "right": "Memory Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU Usage / Request", + "binary": { + "left": "CPU Usage", + "operator": "/", + "right": "CPU Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "CPU Requests": 5, + "CPU Usage": 4, + "CPU Usage / Request": 6, + "Memory Requests": 2, + "Memory Usage": 1, + "Memory Usage / Request": 3, + "workload": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" }, { "datasource": { @@ -940,6 +3318,13 @@ spec: "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -954,33 +3339,101 @@ spec: } ] }, - "unit": "GB/core" + "unit": "decbytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + } + ] }, "gridPos": { - "h": 3, - "w": 3, - "x": 7, - "y": 52 + "h": 13, + "w": 10, + "x": 12, + "y": 48 }, - "id": 22, + "id": 129, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Memory Usage" + } + ] + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_memory_usage_bytes{container!=\"\",container!=\"POD\",job=\"kubelet\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "B" }, - "textMode": "auto" - }, - "pluginVersion": "9.4.7", - "targets": [ { "datasource": { "type": "prometheus", @@ -988,93 +3441,159 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", - "instant": false, - "legendFormat": "__auto", - "range": true, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "instant": true, + "legendFormat": "{{workload}}", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "D" } ], - "title": "Used GB/CPU ratio", - "type": "stat" + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "inner" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true + }, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Memory Requests", + "Value #B": "Memory Usage", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Memory Usage / Request", + "binary": { + "left": "Memory Usage", + "operator": "/", + "right": "Memory Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU Usage / Request", + "binary": { + "left": "CPU Usage", + "operator": "/", + "right": "CPU Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "CPU Requests": 5, + "CPU Usage": 4, + "CPU Usage / Request": 6, + "Memory Requests": 2, + "Memory Usage": 1, + "Memory Usage / Request": 3, + "workload": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "GB/core" - }, - "overrides": [] - }, "gridPos": { "h": 3, - "w": 3, - "x": 10, - "y": 52 + "w": 20, + "x": 2, + "y": 61 }, - "id": 21, + "id": 126, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "", + "mode": "html" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Nodes GB/CPU ratio", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 55 + "y": 64 }, - "id": 39, + "id": 2, "panels": [], - "title": "Question 5: What is our overall cluster worker node utilization? (Scale node count up/down)", + "repeat": "TenantContainers", + "repeatDirection": "h", + "title": "How big is ${TenantContainers}", "type": "row" }, { @@ -1083,22 +3602,22 @@ spec: "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 3, - "w": 5, - "x": 0, - "y": 56 + "h": 8, + "w": 2, + "x": 4, + "y": 65 }, - "id": 20, + "id": 136, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", - "mode": "markdown" + "content": "\n \n
\n average ${TenantContainers}\n
", + "mode": "html" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -1106,144 +3625,48 @@ spec: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 5, - "y": 56 + "x": 6, + "y": 65 }, - "id": 2, + "id": 165, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^memory$/", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "\n \n
\n memory\n
", + "mode": "markdown" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory", - "range": true, - "refId": "memory" - } - ], - "title": "Total Memory", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 7, - "y": 56 + "x": 8, + "y": 65 }, - "id": 9, + "id": 166, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^memory$/", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "
used
", + "mode": "markdown" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory", - "range": true, - "refId": "memory" - } - ], - "title": "Used Memory", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -1270,17 +3693,17 @@ spec: } ] }, - "unit": "percentunit" + "unit": "decbytes" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 9, - "y": 56 + "x": 10, + "y": 65 }, - "id": 15, + "id": 149, "options": { "colorMode": "none", "graphMode": "none", @@ -1288,33 +3711,36 @@ spec: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^memory$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "expr": "quantile(0.5, node_namespace_pod_container:container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", "legendFormat": "memory", - "range": true, - "refId": "memory" + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Used Memory", "type": "stat" }, { @@ -1329,6 +3755,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1342,11 +3769,13 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "type": "linear" + "log": 2, + "type": "log" }, "showPoints": "auto", "spanNulls": false, @@ -1372,62 +3801,29 @@ spec: } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "memory %" - }, - "properties": [ - { - "id": "custom.scaleDistribution", - "value": { - "log": 2, - "type": "log" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "memory abs" - }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "unit", - "value": "decbytes" - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 8, - "w": 11, - "x": 0, - "y": 59 + "h": 4, + "w": 8, + "x": 12, + "y": 65 }, - "id": 18, + "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom", - "showLegend": false + "placement": "right", + "showLegend": true }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, - "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -1435,34 +3831,61 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", - "format": "time_series", + "expr": "quantile(1, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "legendFormat": "p100", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "quantile(0.95, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory %", + "includeNullMetadata": true, + "legendFormat": "p95", "range": true, - "refId": "memory %" + "refId": "B", + "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", - "format": "time_series", + "expr": "quantile(0.5, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory abs", + "includeNullMetadata": true, + "legendFormat": "p50", "range": true, - "refId": "memory absolute" + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "quantile(0.10, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "p10", + "range": true, + "refId": "D", + "useBackend": false } ], - "title": "Used Memory", + "title": "Tenant $TenantContainers Container Memory Usage", "type": "timeseries" }, { @@ -1471,22 +3894,22 @@ spec: "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 3, - "w": 5, - "x": 0, + "h": 2, + "w": 2, + "x": 8, "y": 67 }, - "id": 49, + "id": 167, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", + "content": "
requested
", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -1514,17 +3937,17 @@ spec: } ] }, - "unit": "cores" + "unit": "decbytes" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 5, + "x": 10, "y": 67 }, - "id": 3, + "id": 168, "options": { "colorMode": "none", "graphMode": "none", @@ -1532,35 +3955,86 @@ spec: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], - "fields": "/^cpu$/", + "fields": "/^memory$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "expr": "quantile(0.5, kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", - "legendFormat": "cpu", - "range": true, - "refId": "cpu" + "legendFormat": "memory", + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Total CPU", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 6, + "y": 69 + }, + "id": 196, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n cpu\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 69 + }, + "id": 197, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
used
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -1586,17 +4060,17 @@ spec: } ] }, - "unit": "cores" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 7, - "y": 67 + "x": 10, + "y": 69 }, - "id": 10, + "id": 198, "options": { "colorMode": "none", "graphMode": "none", @@ -1604,34 +4078,198 @@ spec: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^cpu$/", "values": false }, - "textMode": "auto" - }, - "pluginVersion": "9.4.7", - "targets": [ + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "memory", + "useBackend": false + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cpu" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 12, + "y": 69 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "legendFormat": "p100", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.95, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "hide": false, + "legendFormat": "p95", + "range": true, + "refId": "B" + }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", - "format": "time_series", + "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cpu", + "includeNullMetadata": true, + "legendFormat": "p50", "range": true, - "refId": "cpu" + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "hide": false, + "legendFormat": "p10", + "range": true, + "refId": "D" } ], - "title": "Used CPU", - "type": "stat" + "title": "Tenant $TenantContainers Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 71 + }, + "id": 235, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
requested
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -1658,17 +4296,17 @@ spec: } ] }, - "unit": "percentunit" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 9, - "y": 67 + "x": 10, + "y": 71 }, - "id": 16, + "id": 236, "options": { "colorMode": "none", "graphMode": "none", @@ -1676,184 +4314,557 @@ spec: "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^cpu$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "expr": "quantile(0.5, kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", "legendFormat": "cpu", - "range": true, - "refId": "cpu" + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Used CPU", "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 100 }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 17, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 6 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cpu absolute" + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "100%ile", + "range": true, + "refId": "A" }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - { - "id": "unit", - "value": "cores" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cpu %" + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(.95, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "95%ile", + "range": true, + "refId": "B" }, - "properties": [ - { - "id": "custom.scaleDistribution", - "value": { + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.50, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "50%ile", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "10%ile", + "range": true, + "refId": "D" + } + ], + "title": "Tenant Namespace CPU Usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "log": 2, "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } - ] - } - ] - }, + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 6 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "100%ile", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.95, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "95%ile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.5, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "50%ile", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "10%ile", + "range": true, + "refId": "D" + } + ], + "title": "Tenant Namespace Memory Usage", + "transformations": [], + "type": "timeseries" + } + ], + "title": "Question 2: How big are tenant namespaces?", + "type": "row" + }, + { + "collapsed": true, "gridPos": { - "h": 8, - "w": 11, + "h": 1, + "w": 24, "x": 0, - "y": 70 + "y": 101 }, - "id": 17, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.4.7", - "targets": [ + "id": 31, + "panels": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cpu %", - "range": true, - "refId": "cpu %" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 7 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "topk(5, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tenant Namespace CPU Usage", + "transformations": [], + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", - "hide": false, - "legendFormat": "cpu absolute", - "range": true, - "refId": "cpu absolute" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 7 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(5, sum by(namespace) (avg_over_time(container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}[6h])))", + "format": "time_series", + "instant": true, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tenant Namespace Memory Usage", + "transformations": [], + "type": "timeseries" } ], - "title": "Used CPU", - "type": "timeseries" + "title": "Question 3: Which tenants are in the \"big head\" and need XL overrides?", + "type": "row" } ], "refresh": "", "revision": 1, "schemaVersion": 38, - "style": "dark", "tags": [], "templating": { "list": [ diff --git a/resources/grafana/sources/rhacs-cluster-resource-adjustment.json b/resources/grafana/sources/rhacs-cluster-resource-adjustment.json index ebc6efac..8751dabf 100644 --- a/resources/grafana/sources/rhacs-cluster-resource-adjustment.json +++ b/resources/grafana/sources/rhacs-cluster-resource-adjustment.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 25, + "id": 23, "links": [], "liveNow": false, "panels": [ @@ -36,11 +36,2025 @@ "x": 0, "y": 0 }, - "id": 2, + "id": 72, "panels": [], - "title": "Question 1: How big are tenant containers?", + "title": "Overview", "type": "row" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 76, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Overview

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 9, + "x": 2, + "y": 4 + }, + "id": 48, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

acscs-worker

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 9, + "x": 13, + "y": 4 + }, + "id": 78, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

acscs-infra

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 9, + "x": 2, + "y": 7 + }, + "id": 50, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(acscs_worker_nodes)", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "nodes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 9, + "x": 13, + "y": 7 + }, + "id": 60, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "sum(kube_node_role{role=\"acscs-infra\"})", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 9 + }, + "id": 92, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

Memory

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 12 + }, + "id": 100, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 12 + }, + "id": 9, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "fixed" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 12 + }, + "id": 15, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 8, + "y": 12 + }, + "id": 96, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 9, + "y": 12 + }, + "id": 51, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 12 + }, + "id": 102, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 12 + }, + "id": 66, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 12 + }, + "id": 109, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 19, + "y": 12 + }, + "id": 97, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 20, + "y": 12 + }, + "id": 65, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 14 + }, + "id": 101, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 14 + }, + "id": 104, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 14 + }, + "id": 108, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 14 + }, + "id": 103, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 14 + }, + "id": 106, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "percentage", + "steps": [ + { + "color": "dark-red", + "value": null + }, + { + "color": "dark-orange", + "value": 40 + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "dark-green", + "value": 60 + }, + { + "color": "dark-yellow", + "value": 70 + }, + { + "color": "dark-orange", + "value": 80 + }, + { + "color": "dark-red", + "value": 90 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 14 + }, + "id": 110, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "/^memory$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "memory %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory abs" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 2, + "y": 16 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory %", + "range": true, + "refId": "memory %" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory abs", + "range": true, + "refId": "memory absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "memory %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "memory abs" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "decbytes" + } + ] + } + ] + }, + "gridPos": { + "h": 4, + "w": 9, + "x": 13, + "y": 16 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory %", + "range": true, + "refId": "memory %" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) \n ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "memory abs", + "range": true, + "refId": "memory absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 20 + }, + "id": 93, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

CPU

", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 23 + }, + "id": 111, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 4, + "y": 23 + }, + "id": 10, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 23 + }, + "id": 16, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 8, + "y": 23 + }, + "id": 98, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 9, + "y": 23 + }, + "id": 3, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 23 + }, + "id": 113, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

used

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 1, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 23 + }, + "id": 69, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 23 + }, + "id": 70, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 1, + "x": 19, + "y": 23 + }, + "id": 99, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n of\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cores" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 20, + "y": 23 + }, + "id": 68, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false + }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 2, + "y": 25 + }, + "id": 112, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -50,38 +2064,7 @@ "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -97,82 +2080,290 @@ } ] }, - "unit": "cpu" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 1 + "h": 2, + "w": 2, + "x": 4, + "y": 25 }, - "id": 4, + "id": 114, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "repeat": "TenantContainers", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "legendFormat": "100%ile", - "range": true, - "refId": "A" + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 6, + "y": 25 + }, + "id": 115, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.95, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 13, + "y": 25 + }, + "id": 116, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "

requested

", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "core" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 15, + "y": 25 + }, + "id": 117, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^memory$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"})", + "format": "time_series", "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" + "instant": true, + "interval": "", + "legendFormat": "memory", + "range": false, + "refId": "memory" + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 17, + "y": 25 + }, + "id": 118, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "/^cpu$/", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", + "format": "time_series", "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "cpu" } ], - "title": "Tenant $TenantContainers Container CPU Usage", - "type": "timeseries" + "type": "stat" }, { "datasource": { @@ -186,6 +2377,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -199,12 +2391,12 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "log": 2, - "type": "log" + "type": "linear" }, "showPoints": "auto", "spanNulls": false, @@ -230,96 +2422,94 @@ } ] }, - "unit": "bytes" + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cpu absolute" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cores" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + } + ] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 1 + "h": 4, + "w": 9, + "x": 2, + "y": 27 }, - "id": 5, + "id": 52, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "bottom", + "showLegend": false }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, - "repeat": "TenantContainers", - "repeatDirection": "v", + "pluginVersion": "9.4.7", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(1, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "legendFormat": "100%ile", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "quantile(0.95, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", - "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "expr": "quantile(0.5, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "format": "time_series", "hide": false, - "legendFormat": "50%ile", + "instant": false, + "interval": "", + "legendFormat": "cpu %", "range": true, - "refId": "C" + "refId": "cpu %" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "expr": "quantile(0.1, container_memory_working_set_bytes{container=\"$TenantContainers\", namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\"})", + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", "hide": false, - "legendFormat": "10%ile", + "legendFormat": "cpu absolute", "range": true, - "refId": "D" + "refId": "cpu absolute" } ], - "title": "Tenant $TenantContainers Container Memory Usage", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 17, - "panels": [], - "title": "Question 2: How big are tenant namespaces?", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -332,6 +2522,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -345,12 +2536,12 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "log": 2, - "type": "log" + "type": "linear" }, "showPoints": "auto", "spanNulls": false, @@ -375,134 +2566,224 @@ "value": 80 } ] - } + }, + "unit": "percentunit" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cpu absolute" + }, + "properties": [ + { + "id": "custom.axisPlacement", + "value": "right" + }, + { + "id": "unit", + "value": "cores" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cpu %" + }, + "properties": [ + { + "id": "custom.scaleDistribution", + "value": { + "log": 2, + "type": "log" + } + } + ] + } + ] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 34 + "h": 4, + "w": 9, + "x": 13, + "y": 27 }, - "id": 28, + "id": 71, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "right", - "showLegend": true + "placement": "bottom", + "showLegend": false }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, "pluginVersion": "9.4.7", - "repeatDirection": "v", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "quantile(1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"})\n", "format": "time_series", - "legendFormat": "100%ile", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "cpu %", "range": true, - "refId": "A" + "refId": "cpu %" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(.95, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", + "editorMode": "code", + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"})\n", "hide": false, - "legendFormat": "95%ile", + "legendFormat": "cpu absolute", "range": true, - "refId": "B" + "refId": "cpu absolute" + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 3, + "w": 20, + "x": 2, + "y": 31 + }, + "id": 85, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "content": "", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "shades" }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.50, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" + "decimals": 0, + "mappings": [], + "max": 8, + "min": 6, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "GB/core" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 7, + "x": 2, + "y": 34 + }, + "id": 21, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" } ], - "title": "Tenant Namespace CPU Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 6, + "x": 9, + "y": 34 + }, + "id": 79, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "How many GB per CPU do nodes have?\n\nThis corresponds to either memory-optimized, cpu-optimized or general purpose GB/CPU ratios.", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -517,147 +2798,60 @@ } ] }, - "unit": "bytes" + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, + "h": 4, + "w": 7, + "x": 15, "y": 34 }, - "id": 29, + "id": 62, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "quantile(1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "legendFormat": "100%ile", - "range": true, + "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() kube_node_role{role=\"acscs-infra\"}) / 1024 / 1024 / 1024", + "instant": true, + "legendFormat": "__auto", + "range": false, "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.95, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "95%ile", - "range": true, - "refId": "B" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.5, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "50%ile", - "range": true, - "refId": "C" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "builder", - "exemplar": false, - "expr": "quantile(0.1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", - "hide": false, - "legendFormat": "10%ile", - "range": true, - "refId": "D" } ], - "title": "Tenant Namespace Memory Usage", - "transformations": [], - "type": "timeseries" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 42 - }, - "id": 31, - "panels": [], - "title": "Question 3: Which tenants are in the \"big head\" and need XL overrides?", - "type": "row" + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", "fieldConfig": { "defaults": { "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "mode": "thresholds" }, + "decimals": 0, "mappings": [], "thresholds": { "mode": "absolute", @@ -671,91 +2865,83 @@ "value": 80 } ] - } + }, + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 43 + "h": 4, + "w": 7, + "x": 2, + "y": 38 }, - "id": 46, + "id": 22, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "builder", + "editorMode": "code", "exemplar": false, - "expr": "topk(5, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", + "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Tenant Namespace CPU Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "log" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } + "gridPos": { + "h": 4, + "w": 6, + "x": 9, + "y": 38 + }, + "id": 80, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "How many GB / CPU are actually used\n\nThis can help to choose a more appropriate node type, either memory-optimized, cpu-optimized or general-purpose", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" }, "mappings": [], "thresholds": { @@ -771,31 +2957,32 @@ } ] }, - "unit": "bytes" + "unit": "GB/core" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 43 + "h": 4, + "w": 7, + "x": 15, + "y": 38 }, - "id": 47, + "id": 61, "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "right", - "showLegend": true + "colorMode": "none", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "textMode": "value" }, - "pluginVersion": "9.4.7", - "repeatDirection": "v", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -804,30 +2991,38 @@ }, "editorMode": "code", "exemplar": false, - "expr": "topk(5, sum by(namespace) (avg_over_time(container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}[6h])))", - "format": "time_series", + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() kube_node_role{role=\"acscs-infra\"}) / 1024 / 1024 / 1024", "instant": true, "legendFormat": "__auto", - "range": true, + "range": false, "refId": "A" } ], - "title": "Tenant Namespace Memory Usage", - "transformations": [], - "type": "timeseries" + "type": "stat" }, { - "collapsed": true, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 51 + "h": 3, + "w": 20, + "x": 2, + "y": 42 }, - "id": 37, - "panels": [], - "title": "Question 4: What is our overall worker node CPU to Memory Profile? (Pick a worker node type)", - "type": "row" + "id": 125, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "", + "mode": "html" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -836,21 +3031,21 @@ }, "gridPos": { "h": 3, - "w": 5, - "x": 0, - "y": 52 + "w": 20, + "x": 2, + "y": 45 }, - "id": 48, + "id": 128, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", + "content": "# Fine-Grained Adjustments\n\nFind workloads that are over or under-provisioned", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -863,6 +3058,13 @@ "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -877,32 +3079,85 @@ } ] }, - "unit": "nodes" + "unit": "decbytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + } + ] }, "gridPos": { - "h": 3, - "w": 2, - "x": 5, - "y": 52 + "h": 13, + "w": 10, + "x": 2, + "y": 48 }, - "id": 5, + "id": 127, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false }, - "textMode": "auto" + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Memory Usage / Request" + } + ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -910,14 +3165,137 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "sum(acscs_worker_nodes)", - "legendFormat": "__auto", - "range": true, + "exemplar": false, + "expr": "sum(container_memory_usage_bytes{container!=\"\",container!=\"POD\",job=\"kubelet\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "instant": true, + "legendFormat": "{{workload}}", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!~\"|POD\"} * on (node) group_left() kube_node_role{role=\"acscs-worker\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "D" } ], - "title": "Worker nodes", - "type": "stat" + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "inner" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true + }, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Memory Requests", + "Value #B": "Memory Usage", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Memory Usage / Request", + "binary": { + "left": "Memory Usage", + "operator": "/", + "right": "Memory Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU Usage / Request", + "binary": { + "left": "CPU Usage", + "operator": "/", + "right": "CPU Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "CPU Requests": 5, + "CPU Usage": 4, + "CPU Usage / Request": 6, + "Memory Requests": 2, + "Memory Usage": 1, + "Memory Usage / Request": 3, + "workload": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" }, { "datasource": { @@ -929,6 +3307,13 @@ "color": { "mode": "thresholds" }, + "custom": { + "align": "auto", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, "mappings": [], "thresholds": { "mode": "absolute", @@ -943,33 +3328,101 @@ } ] }, - "unit": "GB/core" + "unit": "decbytes" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "CPU Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage / Request" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Usage" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "CPU Requests" + }, + "properties": [ + { + "id": "unit", + "value": "core" + } + ] + } + ] }, "gridPos": { - "h": 3, - "w": 3, - "x": 7, - "y": 52 + "h": 13, + "w": 10, + "x": 12, + "y": 48 }, - "id": 22, + "id": 129, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], + "cellHeight": "sm", + "footer": { + "countRows": false, "fields": "", - "values": false + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": true, + "displayName": "Memory Usage" + } + ] + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(container_memory_usage_bytes{container!=\"\",container!=\"POD\",job=\"kubelet\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "B" }, - "textMode": "auto" - }, - "pluginVersion": "9.4.7", - "targets": [ { "datasource": { "type": "prometheus", @@ -977,93 +3430,159 @@ }, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", - "instant": false, - "legendFormat": "__auto", - "range": true, + "expr": "sum(kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "instant": true, + "legendFormat": "{{workload}}", + "range": false, "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container!=\"POD\",container!=\"\"} * on (node) group_left() kube_node_role{role=\"acscs-infra\"} * on (namespace, pod) group_left(workload) namespace_workload_pod:kube_pod_owner:relabel) by (workload)", + "format": "table", + "hide": false, + "instant": true, + "legendFormat": "{{workload}}", + "range": false, + "refId": "D" } ], - "title": "Used GB/CPU ratio", - "type": "stat" + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "inner" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true + }, + "indexByName": {}, + "renameByName": { + "Time 1": "", + "Time 2": "", + "Value #A": "Memory Requests", + "Value #B": "Memory Usage", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "Memory Usage / Request", + "binary": { + "left": "Memory Usage", + "operator": "/", + "right": "Memory Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "calculateField", + "options": { + "alias": "CPU Usage / Request", + "binary": { + "left": "CPU Usage", + "operator": "/", + "right": "CPU Requests" + }, + "mode": "binary", + "reduce": { + "reducer": "sum" + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "CPU Requests": 5, + "CPU Usage": 4, + "CPU Usage / Request": 6, + "Memory Requests": 2, + "Memory Usage": 1, + "Memory Usage / Request": 3, + "workload": 0 + }, + "renameByName": {} + } + } + ], + "type": "table" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "GB/core" - }, - "overrides": [] - }, "gridPos": { "h": 3, - "w": 3, - "x": 10, - "y": 52 + "w": 20, + "x": 2, + "y": 61 }, - "id": 21, + "id": 126, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "", + "mode": "html" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes) / 1024 / 1024 / 1024", - "legendFormat": "__auto", - "range": true, - "refId": "A" - } - ], - "title": "Nodes GB/CPU ratio", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 55 + "y": 64 }, - "id": 39, + "id": 2, "panels": [], - "title": "Question 5: What is our overall cluster worker node utilization? (Scale node count up/down)", + "repeat": "TenantContainers", + "repeatDirection": "h", + "title": "How big is ${TenantContainers}", "type": "row" }, { @@ -1072,22 +3591,22 @@ "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 3, - "w": 5, - "x": 0, - "y": 56 + "h": 8, + "w": 2, + "x": 4, + "y": 65 }, - "id": 20, + "id": 136, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", - "mode": "markdown" + "content": "\n \n
\n average ${TenantContainers}\n
", + "mode": "html" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -1095,144 +3614,48 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, "gridPos": { - "h": 3, + "h": 4, "w": 2, - "x": 5, - "y": 56 + "x": 6, + "y": 65 }, - "id": 2, + "id": 165, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^memory$/", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "\n \n
\n memory\n
", + "mode": "markdown" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory", - "range": true, - "refId": "memory" - } - ], - "title": "Total Memory", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" - }, - "overrides": [] - }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 7, - "y": 56 + "x": 8, + "y": 65 }, - "id": 9, + "id": 166, "options": { - "colorMode": "none", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "/^memory$/", - "values": false + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false }, - "textMode": "auto" + "content": "
used
", + "mode": "markdown" }, - "pluginVersion": "9.4.7", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory", - "range": true, - "refId": "memory" - } - ], - "title": "Used Memory", - "type": "stat" + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -1259,17 +3682,17 @@ } ] }, - "unit": "percentunit" + "unit": "decbytes" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 9, - "y": 56 + "x": 10, + "y": 65 }, - "id": 15, + "id": 149, "options": { "colorMode": "none", "graphMode": "none", @@ -1277,33 +3700,36 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^memory$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", + "expr": "quantile(0.5, node_namespace_pod_container:container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", "legendFormat": "memory", - "range": true, - "refId": "memory" + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Used Memory", "type": "stat" }, { @@ -1318,6 +3744,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1331,11 +3758,13 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "type": "linear" + "log": 2, + "type": "log" }, "showPoints": "auto", "spanNulls": false, @@ -1361,62 +3790,29 @@ } ] }, - "unit": "percentunit" + "unit": "bytes" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "memory %" - }, - "properties": [ - { - "id": "custom.scaleDistribution", - "value": { - "log": 2, - "type": "log" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "memory abs" - }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" - }, - { - "id": "unit", - "value": "decbytes" - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 8, - "w": 11, - "x": 0, - "y": 59 + "h": 4, + "w": 8, + "x": 12, + "y": 65 }, - "id": 18, + "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", - "placement": "bottom", - "showLegend": false + "placement": "right", + "showLegend": true }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "multi", + "sort": "desc" } }, - "pluginVersion": "9.4.7", "targets": [ { "datasource": { @@ -1424,34 +3820,61 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) / \nsum(kube_node_status_capacity{resource=\"memory\", unit=\"byte\"} * on(node) group_left() acscs_worker_nodes)\n ", - "format": "time_series", + "expr": "quantile(1, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "legendFormat": "p100", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "quantile(0.95, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory %", + "includeNullMetadata": true, + "legendFormat": "p95", "range": true, - "refId": "memory %" + "refId": "B", + "useBackend": false }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{container!=\"\"} * on (node) group_left() acscs_worker_nodes) \n ", - "format": "time_series", + "expr": "quantile(0.5, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "memory abs", + "includeNullMetadata": true, + "legendFormat": "p50", "range": true, - "refId": "memory absolute" + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "quantile(0.10, container_memory_working_set_bytes{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "legendFormat": "p10", + "range": true, + "refId": "D", + "useBackend": false } ], - "title": "Used Memory", + "title": "Tenant $TenantContainers Container Memory Usage", "type": "timeseries" }, { @@ -1460,22 +3883,22 @@ "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 3, - "w": 5, - "x": 0, + "h": 2, + "w": 2, + "x": 8, "y": 67 }, - "id": 49, + "id": 167, "options": { "code": { "language": "plaintext", "showLineNumbers": false, "showMiniMap": false }, - "content": "# Worker Nodes Resources\n\nThis dashboard shows the resource consumption exclusively for **worker nodes**", + "content": "
requested
", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -1503,17 +3926,17 @@ } ] }, - "unit": "cores" + "unit": "decbytes" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 5, + "x": 10, "y": 67 }, - "id": 3, + "id": 168, "options": { "colorMode": "none", "graphMode": "none", @@ -1521,35 +3944,86 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], - "fields": "/^cpu$/", + "fields": "/^memory$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "expr": "quantile(0.5, kube_pod_container_resource_requests{resource=\"memory\",unit=\"byte\",container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", - "legendFormat": "cpu", - "range": true, - "refId": "cpu" + "legendFormat": "memory", + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Total CPU", "type": "stat" }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 6, + "y": 69 + }, + "id": 196, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "\n \n
\n cpu\n
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 69 + }, + "id": 197, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
used
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" + }, { "datasource": { "type": "prometheus", @@ -1575,17 +4049,17 @@ } ] }, - "unit": "cores" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 7, - "y": 67 + "x": 10, + "y": 69 }, - "id": 10, + "id": 198, "options": { "colorMode": "none", "graphMode": "none", @@ -1593,34 +4067,198 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^cpu$/", "values": false }, - "textMode": "auto" - }, - "pluginVersion": "9.4.7", - "targets": [ + "textMode": "value" + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "disableTextWrap": false, + "editorMode": "code", + "exemplar": false, + "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "format": "time_series", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": true, + "instant": true, + "interval": "", + "legendFormat": "cpu", + "range": false, + "refId": "memory", + "useBackend": false + } + ], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cpu" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 12, + "y": 69 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "legendFormat": "p100", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.95, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "hide": false, + "legendFormat": "p95", + "range": true, + "refId": "B" + }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)", - "format": "time_series", + "expr": "quantile(0.5, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "fullMetaSearch": false, "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cpu", + "includeNullMetadata": true, + "legendFormat": "p50", "range": true, - "refId": "cpu" + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "quantile(0.1, node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", + "hide": false, + "legendFormat": "p10", + "range": true, + "refId": "D" } ], - "title": "Used CPU", - "type": "stat" + "title": "Tenant $TenantContainers Container CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "gridPos": { + "h": 2, + "w": 2, + "x": 8, + "y": 71 + }, + "id": 235, + "options": { + "code": { + "language": "plaintext", + "showLineNumbers": false, + "showMiniMap": false + }, + "content": "
requested
", + "mode": "markdown" + }, + "pluginVersion": "10.2.0", + "type": "text" }, { "datasource": { @@ -1647,17 +4285,17 @@ } ] }, - "unit": "percentunit" + "unit": "core" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 2, "w": 2, - "x": 9, - "y": 67 + "x": 10, + "y": 71 }, - "id": 16, + "id": 236, "options": { "colorMode": "none", "graphMode": "none", @@ -1665,184 +4303,557 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "/^cpu$/", "values": false }, - "textMode": "auto" + "textMode": "value" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, + "disableTextWrap": false, "editorMode": "code", "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", + "expr": "quantile(0.5, kube_pod_container_resource_requests{resource=\"cpu\",unit=\"core\",container=\"$TenantContainers\",namespace=~\"rhacs-.{20}\"})", "format": "time_series", + "fullMetaSearch": false, "hide": false, - "instant": false, + "includeNullMetadata": true, + "instant": true, "interval": "", "legendFormat": "cpu", - "range": true, - "refId": "cpu" + "range": false, + "refId": "memory", + "useBackend": false } ], - "title": "Used CPU", "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 100 }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "id": 17, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 6 + }, + "id": 28, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "cpu absolute" + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "100%ile", + "range": true, + "refId": "A" }, - "properties": [ - { - "id": "custom.axisPlacement", - "value": "right" + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" }, - { - "id": "unit", - "value": "cores" - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "cpu %" + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(.95, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "95%ile", + "range": true, + "refId": "B" }, - "properties": [ - { - "id": "custom.scaleDistribution", - "value": { + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.50, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "50%ile", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.1, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "10%ile", + "range": true, + "refId": "D" + } + ], + "title": "Tenant Namespace CPU Usage", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { "log": 2, "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - } - ] - } - ] - }, + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 6 + }, + "id": 29, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "100%ile", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.95, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "95%ile", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.5, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "50%ile", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "quantile(0.1, sum by(namespace) (container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "hide": false, + "legendFormat": "10%ile", + "range": true, + "refId": "D" + } + ], + "title": "Tenant Namespace Memory Usage", + "transformations": [], + "type": "timeseries" + } + ], + "title": "Question 2: How big are tenant namespaces?", + "type": "row" + }, + { + "collapsed": true, "gridPos": { - "h": 8, - "w": 11, + "h": 1, + "w": 24, "x": 0, - "y": 70 + "y": 101 }, - "id": 17, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": false - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "9.4.7", - "targets": [ + "id": 31, + "panels": [ { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "exemplar": false, - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes) / sum(kube_node_status_capacity{resource=\"cpu\", unit=\"core\"} * on(node) group_left() acscs_worker_nodes)\n", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "cpu %", - "range": true, - "refId": "cpu %" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 4, + "y": 7 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "builder", + "exemplar": false, + "expr": "topk(5, sum by(namespace) (node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}))", + "format": "time_series", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tenant Namespace CPU Usage", + "transformations": [], + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "editorMode": "code", - "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate * on (node) group_left() acscs_worker_nodes)\n", - "hide": false, - "legendFormat": "cpu absolute", - "range": true, - "refId": "cpu absolute" + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "log" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 7 + }, + "id": 47, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "9.4.7", + "repeatDirection": "v", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "exemplar": false, + "expr": "topk(5, sum by(namespace) (avg_over_time(container_memory_working_set_bytes{namespace=~\"rhacs-.*\", namespace!~\"rhacs-(audit-logs|cloudwatch|observability|probe|secured-cluster)\", container!~\"POD|\"}[6h])))", + "format": "time_series", + "instant": true, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Tenant Namespace Memory Usage", + "transformations": [], + "type": "timeseries" } ], - "title": "Used CPU", - "type": "timeseries" + "title": "Question 3: Which tenants are in the \"big head\" and need XL overrides?", + "type": "row" } ], "refresh": "", "revision": 1, "schemaVersion": 38, - "style": "dark", "tags": [], "templating": { "list": [ From 463a65740ebc0cc6ba51f37e8d5f93136909b0ec Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Fri, 1 Dec 2023 17:02:08 +0100 Subject: [PATCH 08/13] ROX-21046: Alerts for tenant nearing OOM --- resources/prometheus/prometheus-rules.yaml | 32 ++++++++++++ ...CSTenantWorkloadMemoryUtilizationHigh.yaml | 52 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index f93b475f..985d4662 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -159,6 +159,38 @@ spec: description: "Fleetshard synchronizer manages `{{ $value }}` centrals. The number of Centrals should always be larger than zero in a working system. If it drops to or below zero, fleetshard synchronizer is assumed to be in a failed state." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-007-fleetshard-sync-reconciliation-error.md" + - name: tenant-resources + rules: + - expr: | + sum(container_memory_max_usage_bytes{namespace=~"rhacs-.{20}",container!="POD",container!=""}) by (namespace, container, pod) + record: rhacs_tenants:namespace:pod:container:max_memory_usage_bytes + - expr: | + sum(container_spec_memory_limit_bytes{namespace=~"rhacs-.{20}",container!="POD",container!=""}) by (namespace, container, pod) + record: rhacs_tenants:namespace:pod:container:memory_limit_bytes + - expr: | + rhacs_tenants:namespace:pod:container:max_memory_usage_bytes / rhacs_tenants:namespace:pod:container:memory_limit_bytes + record: rhacs_tenants:namespace:pod:container:max_memory_usage_ratio + - alert: RHACSTenantWorkloadMemoryUtilizationHigh + expr: | + rhacs_tenants:namespace:pod:container:max_memory_usage_ratio >= 0.75 + for: 5m + labels: + severity: warning + annotations: + summary: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' is reaching its memory limit. + description: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + - alert: RHACSTenantWorkloadMemoryUtilizationCritical + expr: | + rhacs_tenants:namespace:pod:container:max_memory_usage_ratio >= 0.9 + for: 5m + labels: + severity: critical + annotations: + summary: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' is critically reaching its memory limit. + description: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' reached {{ $value | humanizePercentage }} of its memory limit and is at high risk of being OOM killed. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + - name: rhacs-operator rules: - expr: | diff --git a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml new file mode 100644 index 00000000..3215c4f4 --- /dev/null +++ b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml @@ -0,0 +1,52 @@ +rule_files: + - /tmp/prometheus-rules-test.yaml + +evaluation_interval: 1m + +tests: + - interval: 1m + input_series: + - series: container_memory_max_usage_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa", pod="mypod", container="container-1"} + values: "50+0x10 75+0x10" + - series: container_spec_memory_limit_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod", container="container-1"} + values: "100+0x20" + alert_rule_test: + - eval_time: 1m + alertname: RHACSTenantWorkloadMemoryUtilizationHigh + exp_alerts: [] + - eval_time: 16m + alertname: RHACSTenantWorkloadMemoryUtilizationHigh + exp_alerts: + - exp_labels: + alertname: RHACSTenantWorkloadMemoryUtilizationHigh + severity: warning + namespace: rhacs-aaaaaaaaaaaaaaaaaaaa + pod: mypod + container: container-1 + exp_annotations: + summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' is reaching its memory limit. + description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' reached 75% of its memory limit and is at risk of being OOM killed. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + - interval: 1m + input_series: + - series: container_memory_max_usage_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="container-1"} + values: "50+0x10 90+0x10" + - series: container_spec_memory_limit_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="container-1"} + values: "100+0x20" + alert_rule_test: + - eval_time: 1m + alertname: RHACSTenantWorkloadMemoryUtilizationCritical + exp_alerts: [] + - eval_time: 17m + alertname: RHACSTenantWorkloadMemoryUtilizationCritical + exp_alerts: + - exp_labels: + alertname: RHACSTenantWorkloadMemoryUtilizationCritical + severity: critical + namespace: rhacs-aaaaaaaaaaaaaaaaaaaa + pod: mypod + container: container-1 + exp_annotations: + description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' reached 90% of its memory limit and is at high risk of being OOM killed. + summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' is critically reaching its memory limit. + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" From a1d4093ac0827c45bf0271d1802ca9302d4e064c Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Tue, 5 Dec 2023 13:53:50 +0100 Subject: [PATCH 09/13] ROX-21046: Alerts for tenant nearing OOM --- resources/prometheus/prometheus-rules.yaml | 2 +- .../unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 985d4662..49ddc557 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -162,7 +162,7 @@ spec: - name: tenant-resources rules: - expr: | - sum(container_memory_max_usage_bytes{namespace=~"rhacs-.{20}",container!="POD",container!=""}) by (namespace, container, pod) + sum(container_memory_working_set_bytes{namespace=~"rhacs-.{20}",container!="POD",container!=""}) by (namespace, container, pod) record: rhacs_tenants:namespace:pod:container:max_memory_usage_bytes - expr: | sum(container_spec_memory_limit_bytes{namespace=~"rhacs-.{20}",container!="POD",container!=""}) by (namespace, container, pod) diff --git a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml index 3215c4f4..245da454 100644 --- a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml +++ b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml @@ -6,7 +6,7 @@ evaluation_interval: 1m tests: - interval: 1m input_series: - - series: container_memory_max_usage_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa", pod="mypod", container="container-1"} + - series: container_memory_working_set_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa", pod="mypod", container="container-1"} values: "50+0x10 75+0x10" - series: container_spec_memory_limit_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod", container="container-1"} values: "100+0x20" @@ -29,7 +29,7 @@ tests: sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" - interval: 1m input_series: - - series: container_memory_max_usage_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="container-1"} + - series: container_memory_working_set_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="container-1"} values: "50+0x10 90+0x10" - series: container_spec_memory_limit_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="container-1"} values: "100+0x20" From dc885abd3f88ec89aa88f7ec0181612700c679ec Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Thu, 7 Dec 2023 13:38:00 +0100 Subject: [PATCH 10/13] ROX-21046: Fix runbook URLS --- resources/prometheus/prometheus-rules.yaml | 8 ++++---- .../RHACSTenantWorkloadMemoryUtilizationHigh.yaml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 49ddc557..d3ea2b6c 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -173,23 +173,23 @@ spec: - alert: RHACSTenantWorkloadMemoryUtilizationHigh expr: | rhacs_tenants:namespace:pod:container:max_memory_usage_ratio >= 0.75 - for: 5m + for: 10m labels: severity: warning annotations: summary: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' is reaching its memory limit. description: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' reached {{ $value | humanizePercentage }} of its memory limit and is at risk of being OOM killed. - sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md" - alert: RHACSTenantWorkloadMemoryUtilizationCritical expr: | rhacs_tenants:namespace:pod:container:max_memory_usage_ratio >= 0.9 - for: 5m + for: 10m labels: severity: critical annotations: summary: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' is critically reaching its memory limit. description: tenant '{{ $labels.namespace }}' container '{{ $labels.container }}' in pod '{{ $labels.pod }}' reached {{ $value | humanizePercentage }} of its memory limit and is at high risk of being OOM killed. - sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md" - name: rhacs-operator rules: diff --git a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml index 245da454..d359c65f 100644 --- a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml +++ b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml @@ -14,7 +14,7 @@ tests: - eval_time: 1m alertname: RHACSTenantWorkloadMemoryUtilizationHigh exp_alerts: [] - - eval_time: 16m + - eval_time: 21m alertname: RHACSTenantWorkloadMemoryUtilizationHigh exp_alerts: - exp_labels: @@ -26,7 +26,7 @@ tests: exp_annotations: summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' is reaching its memory limit. description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' reached 75% of its memory limit and is at risk of being OOM killed. - sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md" - interval: 1m input_series: - series: container_memory_working_set_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod",container="container-1"} @@ -37,7 +37,7 @@ tests: - eval_time: 1m alertname: RHACSTenantWorkloadMemoryUtilizationCritical exp_alerts: [] - - eval_time: 17m + - eval_time: 21m alertname: RHACSTenantWorkloadMemoryUtilizationCritical exp_alerts: - exp_labels: @@ -49,4 +49,4 @@ tests: exp_annotations: description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' reached 90% of its memory limit and is at high risk of being OOM killed. summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'container-1' in pod 'mypod' is critically reaching its memory limit. - sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-managed-service-runbooks/blob/master/sops/dp-038-tenant-workload-memory-high.md" + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md" From 352c7782f46765a24af3251fe2c3cccf8537f0d8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Dec 2023 00:16:20 +0100 Subject: [PATCH 11/13] Bump actions/setup-python from 4 to 5 (#176) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 169a01aa..56eb39de 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 - uses: actions/setup-go@v4 with: go-version: ">=1.18.0" From 12d5decdaee817b2e1ae6fed91b3c84d3169bbc8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 12 Dec 2023 00:30:26 +0100 Subject: [PATCH 12/13] Bump actions/setup-python from 4 to 5 (#176) (#175) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4 to 5. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 56eb39de..0ad82de2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -10,7 +10,7 @@ jobs: steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - - uses: actions/setup-go@v4 + - uses: actions/setup-go@v5 with: go-version: ">=1.18.0" - name: Install promtool From 4e0bbbdb70984f99c4972583097b42570671c936 Mon Sep 17 00:00:00 2001 From: Stephan Hesselmann Date: Tue, 12 Dec 2023 22:20:29 +0100 Subject: [PATCH 13/13] chore: make SLI graphs more readable (#177) --- .../rhacs-central-slo-configmap.yaml | 68 ++++++++++++------- .../rhacs-central-slo-dashboard.yaml | 68 ++++++++++++------- .../grafana/sources/rhacs-central-slo.json | 68 ++++++++++++------- 3 files changed, 126 insertions(+), 78 deletions(-) diff --git a/resources/grafana/generated/dashboards/rhacs-central-slo-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-central-slo-configmap.yaml index 83d61768..d4875f02 100644 --- a/resources/grafana/generated/dashboards/rhacs-central-slo-configmap.yaml +++ b/resources/grafana/generated/dashboards/rhacs-central-slo-configmap.yaml @@ -35,7 +35,7 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 7, + "id": 15, "links": [], "liveNow": false, "panels": [ @@ -61,7 +61,7 @@ data: "content": "## Definition\n\nThe availability of Central is defined as a combination of pod ready status and API error rate.\n\n`Availability SLI = Pod Ready SLI * Error Rate SLI`\n\nThe SLO target is 99% availability calculated over 28 day rolling intervals.", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -88,6 +88,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -101,6 +102,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -460,6 +462,7 @@ data: }, "id": 18, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, @@ -472,7 +475,7 @@ data: "showHeader": true, "sortBy": [] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -640,7 +643,7 @@ data: "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 2, + "h": 3, "w": 23, "x": 0, "y": 32 @@ -652,10 +655,10 @@ data: "showLineNumbers": false, "showMiniMap": false }, - "content": "Select instances via the variables on top of the dashbord. If multiple Centrals are selected, the SLIs/SLOs are averaged.", + "content": "Select instances via the variables on top of the dashbord. If multiple Centrals are selected, the SLIs/SLOs are averaged.\n\nWe plot a proxy of the SLI based on the number of unavailability drops per hour (percentage gauges are exact). This renders better in Grafana for long ranges. Note that the actual down time may be shorter than is rendered. It might still be necessary to zoom in around burn rate spikes to get full resolution.", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -698,10 +701,12 @@ data: "h": 8, "w": 5, "x": 0, - "y": 34 + "y": 35 }, "id": 6, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -713,7 +718,7 @@ data: "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -735,13 +740,14 @@ data: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`Pod Ready SLI * Error Rate SLI`", + "description": "`Pod Ready SLI * Error Rate SLI`\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -755,6 +761,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -794,7 +801,7 @@ data: "h": 8, "w": 6, "x": 5, - "y": 34 + "y": 35 }, "id": 9, "options": { @@ -816,7 +823,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:availability{rhacs_instance_id=~\"$instance_id\"})", + "expr": "1 - clamp_max(avg(changes(central:sli:availability{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", "legendFormat": "{{label_name}}", "range": true, "refId": "A" @@ -830,13 +837,14 @@ data: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`1` is at least one pod is in ready state. `0` otherwise.", + "description": "`1` is at least one pod is in ready state. `0` otherwise.\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -850,6 +858,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -889,7 +898,7 @@ data: "h": 8, "w": 6, "x": 11, - "y": 34 + "y": 35 }, "id": 13, "options": { @@ -911,8 +920,8 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:pod_ready{namespace=~\"rhacs-$instance_id\"})", - "legendFormat": "{{label_name}}", + "expr": "1 - clamp_max(avg(changes(central:sli:pod_ready{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", + "legendFormat": "SLI", "range": true, "refId": "A" } @@ -925,13 +934,14 @@ data: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`1` if the combined error rate of gRPC and HTTP requests is `<35%`. `0` otherwise.\n\nA gRPC error is defined by a response with `grpc_code != OK`. An HTTP error is defined by a response with status code `5xx`.", + "description": "`1` if the combined error rate of gRPC and HTTP requests is `<35%`. `0` otherwise.\n\nA gRPC error is defined by a response with `grpc_code != OK`. An HTTP error is defined by a response with status code `5xx`.\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -945,6 +955,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -984,7 +995,7 @@ data: "h": 8, "w": 6, "x": 17, - "y": 34 + "y": 35 }, "id": 12, "options": { @@ -1006,7 +1017,7 @@ data: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:error_rate{rhacs_instance_id=~\"$instance_id\"})", + "expr": "1 - clamp_max(avg(changes(central:sli:error_rate{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", "legendFormat": "{{label_name}}", "range": true, "refId": "A" @@ -1060,10 +1071,12 @@ data: "h": 8, "w": 5, "x": 0, - "y": 42 + "y": 43 }, "id": 7, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -1075,7 +1088,7 @@ data: "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1103,6 +1116,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1116,6 +1130,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1154,7 +1169,7 @@ data: "h": 8, "w": 9, "x": 5, - "y": 42 + "y": 43 }, "id": 10, "options": { @@ -1196,6 +1211,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1209,6 +1225,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1247,7 +1264,7 @@ data: "h": 8, "w": 9, "x": 14, - "y": 42 + "y": 43 }, "id": 14, "options": { @@ -1279,10 +1296,9 @@ data: "type": "timeseries" } ], - "refresh": "", + "refresh": false, "revision": 1, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 38, "tags": [ "rhacs" ], @@ -1439,6 +1455,6 @@ data: "timezone": "", "title": "RHACS Dataplane - Central SLOs", "uid": "vH7ntMs4k", - "version": 2, + "version": 1, "weekStart": "" } diff --git a/resources/grafana/generated/dashboards/rhacs-central-slo-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-central-slo-dashboard.yaml index 0298d283..18572c6b 100644 --- a/resources/grafana/generated/dashboards/rhacs-central-slo-dashboard.yaml +++ b/resources/grafana/generated/dashboards/rhacs-central-slo-dashboard.yaml @@ -35,7 +35,7 @@ spec: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 7, + "id": 15, "links": [], "liveNow": false, "panels": [ @@ -61,7 +61,7 @@ spec: "content": "## Definition\n\nThe availability of Central is defined as a combination of pod ready status and API error rate.\n\n`Availability SLI = Pod Ready SLI * Error Rate SLI`\n\nThe SLO target is 99% availability calculated over 28 day rolling intervals.", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -88,6 +88,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -101,6 +102,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -460,6 +462,7 @@ spec: }, "id": 18, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, @@ -472,7 +475,7 @@ spec: "showHeader": true, "sortBy": [] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -640,7 +643,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 2, + "h": 3, "w": 23, "x": 0, "y": 32 @@ -652,10 +655,10 @@ spec: "showLineNumbers": false, "showMiniMap": false }, - "content": "Select instances via the variables on top of the dashbord. If multiple Centrals are selected, the SLIs/SLOs are averaged.", + "content": "Select instances via the variables on top of the dashbord. If multiple Centrals are selected, the SLIs/SLOs are averaged.\n\nWe plot a proxy of the SLI based on the number of unavailability drops per hour (percentage gauges are exact). This renders better in Grafana for long ranges. Note that the actual down time may be shorter than is rendered. It might still be necessary to zoom in around burn rate spikes to get full resolution.", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -698,10 +701,12 @@ spec: "h": 8, "w": 5, "x": 0, - "y": 34 + "y": 35 }, "id": 6, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -713,7 +718,7 @@ spec: "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -735,13 +740,14 @@ spec: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`Pod Ready SLI * Error Rate SLI`", + "description": "`Pod Ready SLI * Error Rate SLI`\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -755,6 +761,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -794,7 +801,7 @@ spec: "h": 8, "w": 6, "x": 5, - "y": 34 + "y": 35 }, "id": 9, "options": { @@ -816,7 +823,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:availability{rhacs_instance_id=~\"$instance_id\"})", + "expr": "1 - clamp_max(avg(changes(central:sli:availability{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", "legendFormat": "{{label_name}}", "range": true, "refId": "A" @@ -830,13 +837,14 @@ spec: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`1` is at least one pod is in ready state. `0` otherwise.", + "description": "`1` is at least one pod is in ready state. `0` otherwise.\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -850,6 +858,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -889,7 +898,7 @@ spec: "h": 8, "w": 6, "x": 11, - "y": 34 + "y": 35 }, "id": 13, "options": { @@ -911,8 +920,8 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:pod_ready{namespace=~\"rhacs-$instance_id\"})", - "legendFormat": "{{label_name}}", + "expr": "1 - clamp_max(avg(changes(central:sli:pod_ready{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", + "legendFormat": "SLI", "range": true, "refId": "A" } @@ -925,13 +934,14 @@ spec: "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`1` if the combined error rate of gRPC and HTTP requests is `<35%`. `0` otherwise.\n\nA gRPC error is defined by a response with `grpc_code != OK`. An HTTP error is defined by a response with status code `5xx`.", + "description": "`1` if the combined error rate of gRPC and HTTP requests is `<35%`. `0` otherwise.\n\nA gRPC error is defined by a response with `grpc_code != OK`. An HTTP error is defined by a response with status code `5xx`.\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -945,6 +955,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -984,7 +995,7 @@ spec: "h": 8, "w": 6, "x": 17, - "y": 34 + "y": 35 }, "id": 12, "options": { @@ -1006,7 +1017,7 @@ spec: "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:error_rate{rhacs_instance_id=~\"$instance_id\"})", + "expr": "1 - clamp_max(avg(changes(central:sli:error_rate{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", "legendFormat": "{{label_name}}", "range": true, "refId": "A" @@ -1060,10 +1071,12 @@ spec: "h": 8, "w": 5, "x": 0, - "y": 42 + "y": 43 }, "id": 7, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -1075,7 +1088,7 @@ spec: "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1103,6 +1116,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1116,6 +1130,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1154,7 +1169,7 @@ spec: "h": 8, "w": 9, "x": 5, - "y": 42 + "y": 43 }, "id": 10, "options": { @@ -1196,6 +1211,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1209,6 +1225,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1247,7 +1264,7 @@ spec: "h": 8, "w": 9, "x": 14, - "y": 42 + "y": 43 }, "id": 14, "options": { @@ -1279,10 +1296,9 @@ spec: "type": "timeseries" } ], - "refresh": "", + "refresh": false, "revision": 1, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 38, "tags": [ "rhacs" ], @@ -1439,6 +1455,6 @@ spec: "timezone": "", "title": "RHACS Dataplane - Central SLOs", "uid": "vH7ntMs4k", - "version": 2, + "version": 1, "weekStart": "" } diff --git a/resources/grafana/sources/rhacs-central-slo.json b/resources/grafana/sources/rhacs-central-slo.json index 855ebbed..c40a4f27 100644 --- a/resources/grafana/sources/rhacs-central-slo.json +++ b/resources/grafana/sources/rhacs-central-slo.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 7, + "id": 15, "links": [], "liveNow": false, "panels": [ @@ -50,7 +50,7 @@ "content": "## Definition\n\nThe availability of Central is defined as a combination of pod ready status and API error rate.\n\n`Availability SLI = Pod Ready SLI * Error Rate SLI`\n\nThe SLO target is 99% availability calculated over 28 day rolling intervals.", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -77,6 +77,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -90,6 +91,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -449,6 +451,7 @@ }, "id": 18, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, @@ -461,7 +464,7 @@ "showHeader": true, "sortBy": [] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -629,7 +632,7 @@ "uid": "PBFA97CFB590B2093" }, "gridPos": { - "h": 2, + "h": 3, "w": 23, "x": 0, "y": 32 @@ -641,10 +644,10 @@ "showLineNumbers": false, "showMiniMap": false }, - "content": "Select instances via the variables on top of the dashbord. If multiple Centrals are selected, the SLIs/SLOs are averaged.", + "content": "Select instances via the variables on top of the dashbord. If multiple Centrals are selected, the SLIs/SLOs are averaged.\n\nWe plot a proxy of the SLI based on the number of unavailability drops per hour (percentage gauges are exact). This renders better in Grafana for long ranges. Note that the actual down time may be shorter than is rendered. It might still be necessary to zoom in around burn rate spikes to get full resolution.", "mode": "markdown" }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "type": "text" }, { @@ -687,10 +690,12 @@ "h": 8, "w": 5, "x": 0, - "y": 34 + "y": 35 }, "id": 6, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -702,7 +707,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -724,13 +729,14 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`Pod Ready SLI * Error Rate SLI`", + "description": "`Pod Ready SLI * Error Rate SLI`\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -744,6 +750,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -783,7 +790,7 @@ "h": 8, "w": 6, "x": 5, - "y": 34 + "y": 35 }, "id": 9, "options": { @@ -805,7 +812,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:availability{rhacs_instance_id=~\"$instance_id\"})", + "expr": "1 - clamp_max(avg(changes(central:sli:availability{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", "legendFormat": "{{label_name}}", "range": true, "refId": "A" @@ -819,13 +826,14 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`1` is at least one pod is in ready state. `0` otherwise.", + "description": "`1` is at least one pod is in ready state. `0` otherwise.\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -839,6 +847,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -878,7 +887,7 @@ "h": 8, "w": 6, "x": 11, - "y": 34 + "y": 35 }, "id": 13, "options": { @@ -900,8 +909,8 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:pod_ready{namespace=~\"rhacs-$instance_id\"})", - "legendFormat": "{{label_name}}", + "expr": "1 - clamp_max(avg(changes(central:sli:pod_ready{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", + "legendFormat": "SLI", "range": true, "refId": "A" } @@ -914,13 +923,14 @@ "type": "prometheus", "uid": "PBFA97CFB590B2093" }, - "description": "`1` if the combined error rate of gRPC and HTTP requests is `<35%`. `0` otherwise.\n\nA gRPC error is defined by a response with `grpc_code != OK`. An HTTP error is defined by a response with status code `5xx`.", + "description": "`1` if the combined error rate of gRPC and HTTP requests is `<35%`. `0` otherwise.\n\nA gRPC error is defined by a response with `grpc_code != OK`. An HTTP error is defined by a response with status code `5xx`.\n\nWe show a proxy of the SLI based on the number of unavailability drops because it renders better in Grafana.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -934,6 +944,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -973,7 +984,7 @@ "h": 8, "w": 6, "x": 17, - "y": 34 + "y": 35 }, "id": 12, "options": { @@ -995,7 +1006,7 @@ "uid": "PBFA97CFB590B2093" }, "editorMode": "code", - "expr": "avg(central:sli:error_rate{rhacs_instance_id=~\"$instance_id\"})", + "expr": "1 - clamp_max(avg(changes(central:sli:error_rate{namespace=~\"rhacs-$instance_id\"}[1h])), 1)", "legendFormat": "{{label_name}}", "range": true, "refId": "A" @@ -1049,10 +1060,12 @@ "h": 8, "w": 5, "x": 0, - "y": 42 + "y": 43 }, "id": 7, "options": { + "minVizHeight": 75, + "minVizWidth": 75, "orientation": "auto", "reduceOptions": { "calcs": [ @@ -1064,7 +1077,7 @@ "showThresholdLabels": false, "showThresholdMarkers": true }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1092,6 +1105,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1105,6 +1119,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1143,7 +1158,7 @@ "h": 8, "w": 9, "x": 5, - "y": 42 + "y": 43 }, "id": 10, "options": { @@ -1185,6 +1200,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1198,6 +1214,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1236,7 +1253,7 @@ "h": 8, "w": 9, "x": 14, - "y": 42 + "y": 43 }, "id": 14, "options": { @@ -1268,10 +1285,9 @@ "type": "timeseries" } ], - "refresh": "", + "refresh": false, "revision": 1, - "schemaVersion": 37, - "style": "dark", + "schemaVersion": 38, "tags": [ "rhacs" ], @@ -1428,6 +1444,6 @@ "timezone": "", "title": "RHACS Dataplane - Central SLOs", "uid": "vH7ntMs4k", - "version": 2, + "version": 1, "weekStart": "" }