From ab3268aec37d18a1d40719c03eb171ed48b8af8f Mon Sep 17 00:00:00 2001 From: Aleksandr Kurlov Date: Thu, 4 Jan 2024 00:03:38 +0100 Subject: [PATCH 1/4] ROX-21321: Add ACSCS release dashboard (#187) * Add ACSCS release dashboard * Update resources/index.json Co-authored-by: Stephan Hesselmann --------- Co-authored-by: Stephan Hesselmann --- .../rhacs-central-release-configmap.yaml | 355 ++++++++++++++++++ .../rhacs-central-release-dashboard.yaml | 355 ++++++++++++++++++ .../sources/rhacs-central-release.json | 344 +++++++++++++++++ .../rhacs-central-release-configmap.yaml | 11 + .../rhacs-central-release-dashboard.yaml | 11 + resources/index.json | 3 +- 6 files changed, 1078 insertions(+), 1 deletion(-) create mode 100644 resources/grafana/generated/dashboards/rhacs-central-release-configmap.yaml create mode 100644 resources/grafana/generated/dashboards/rhacs-central-release-dashboard.yaml create mode 100644 resources/grafana/sources/rhacs-central-release.json create mode 100644 resources/grafana/templates/dashboards/rhacs-central-release-configmap.yaml create mode 100644 resources/grafana/templates/dashboards/rhacs-central-release-dashboard.yaml diff --git a/resources/grafana/generated/dashboards/rhacs-central-release-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-central-release-configmap.yaml new file mode 100644 index 00000000..aac96bd2 --- /dev/null +++ b/resources/grafana/generated/dashboards/rhacs-central-release-configmap.yaml @@ -0,0 +1,355 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + creationTimestamp: null + name: rhacs-central-release + labels: + grafana_dashboard: "true" + annotations: + grafana-folder: /grafana-dashboard-definitions/Addons +data: + json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 23, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of Central deployments with at least one pod in ready state per ACS version", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count by (rhacs_version) (clamp_max(count by (namespace) (kube_pod_container_status_ready{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kube-state-metrics\"}), 1) * on(namespace) group_left(rhacs_version) count by (namespace, rhacs_version) (process_cpu_seconds_total{namespace=~\"rhacs-$instance_id\", job=\"central\", rhacs_org_id=~\"$org_id\"}))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Central count by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Reflects the current state of deployed ACSCS versions", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 28, + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count by (rhacs_version) (rate(process_cpu_seconds_total{namespace=~\"rhacs-$instance_id\", job=\"central\", rhacs_org_id=~\"$org_id\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ACS version chart", + "type": "piechart" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_name)", + "description": "Red Hat SSO Organisation Name", + "hide": 0, + "includeAll": true, + "label": "Organisation", + "multi": true, + "name": "org_name", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_id)", + "description": "Red Hat SSO Organisation ID", + "hide": 0, + "includeAll": true, + "label": "Organisation ID", + "multi": true, + "name": "org_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id=~\"$org_id\",cluster_id=~\"$cluster_id\"}, rhacs_instance_id)", + "description": "RHACS Central Instance ID", + "hide": 0, + "includeAll": true, + "label": "Central", + "multi": true, + "name": "instance_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id=~\"$org_id\",cluster_id=~\"$cluster_id\"}, rhacs_instance_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\"}, cluster_id)", + "description": "RHACS Cluster ID", + "hide": 0, + "includeAll": true, + "label": "Cluster ID", + "multi": true, + "name": "cluster_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\"}, cluster_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "RHACS Release", + "uid": "bbcb22ed-4982-488c-82bb-d5cf9280eaa3", + "version": 1, + "weekStart": "" + } diff --git a/resources/grafana/generated/dashboards/rhacs-central-release-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-central-release-dashboard.yaml new file mode 100644 index 00000000..2c9fbf58 --- /dev/null +++ b/resources/grafana/generated/dashboards/rhacs-central-release-dashboard.yaml @@ -0,0 +1,355 @@ +apiVersion: integreatly.org/v1alpha1 +kind: GrafanaDashboard +metadata: + labels: + app: rhacs + monitoring-key: middleware + name: rhacs-central-release-dashboard + namespace: +spec: + name: rhacs-central-release.json + json: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 23, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of Central deployments with at least one pod in ready state per ACS version", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count by (rhacs_version) (clamp_max(count by (namespace) (kube_pod_container_status_ready{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kube-state-metrics\"}), 1) * on(namespace) group_left(rhacs_version) count by (namespace, rhacs_version) (process_cpu_seconds_total{namespace=~\"rhacs-$instance_id\", job=\"central\", rhacs_org_id=~\"$org_id\"}))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Central count by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Reflects the current state of deployed ACSCS versions", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 28, + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count by (rhacs_version) (rate(process_cpu_seconds_total{namespace=~\"rhacs-$instance_id\", job=\"central\", rhacs_org_id=~\"$org_id\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ACS version chart", + "type": "piechart" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_name)", + "description": "Red Hat SSO Organisation Name", + "hide": 0, + "includeAll": true, + "label": "Organisation", + "multi": true, + "name": "org_name", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_id)", + "description": "Red Hat SSO Organisation ID", + "hide": 0, + "includeAll": true, + "label": "Organisation ID", + "multi": true, + "name": "org_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id=~\"$org_id\",cluster_id=~\"$cluster_id\"}, rhacs_instance_id)", + "description": "RHACS Central Instance ID", + "hide": 0, + "includeAll": true, + "label": "Central", + "multi": true, + "name": "instance_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id=~\"$org_id\",cluster_id=~\"$cluster_id\"}, rhacs_instance_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\"}, cluster_id)", + "description": "RHACS Cluster ID", + "hide": 0, + "includeAll": true, + "label": "Cluster ID", + "multi": true, + "name": "cluster_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\"}, cluster_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "RHACS Release", + "uid": "bbcb22ed-4982-488c-82bb-d5cf9280eaa3", + "version": 1, + "weekStart": "" + } diff --git a/resources/grafana/sources/rhacs-central-release.json b/resources/grafana/sources/rhacs-central-release.json new file mode 100644 index 00000000..ca7e3051 --- /dev/null +++ b/resources/grafana/sources/rhacs-central-release.json @@ -0,0 +1,344 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 23, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "The number of Central deployments with at least one pod in ready state per ACS version", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count by (rhacs_version) (clamp_max(count by (namespace) (kube_pod_container_status_ready{namespace=~\"rhacs-$instance_id\", container=\"central\", job=~\"kube-state-metrics\"}), 1) * on(namespace) group_left(rhacs_version) count by (namespace, rhacs_version) (process_cpu_seconds_total{namespace=~\"rhacs-$instance_id\", job=\"central\", rhacs_org_id=~\"$org_id\"}))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Central count by version", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "description": "Reflects the current state of deployed ACSCS versions", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "mappings": [], + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 16, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 28, + "options": { + "displayLabels": [ + "name" + ], + "legend": { + "displayMode": "list", + "placement": "bottom", + "showLegend": true, + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "10.2.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "editorMode": "code", + "expr": "count by (rhacs_version) (rate(process_cpu_seconds_total{namespace=~\"rhacs-$instance_id\", job=\"central\", rhacs_org_id=~\"$org_id\"}[$__rate_interval]))", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "ACS version chart", + "type": "piechart" + } + ], + "refresh": "", + "revision": 1, + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_name)", + "description": "Red Hat SSO Organisation Name", + "hide": 0, + "includeAll": true, + "label": "Organisation", + "multi": true, + "name": "org_name", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_name)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_id)", + "description": "Red Hat SSO Organisation ID", + "hide": 0, + "includeAll": true, + "label": "Organisation ID", + "multi": true, + "name": "org_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id!=\"16536854\",cluster_id=~\"$cluster_id\"}, rhacs_org_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id=~\"$org_id\",cluster_id=~\"$cluster_id\"}, rhacs_instance_id)", + "description": "RHACS Central Instance ID", + "hide": 0, + "includeAll": true, + "label": "Central", + "multi": true, + "name": "instance_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\", rhacs_org_name=~\"$org_name\", rhacs_org_id=~\"$org_id\",cluster_id=~\"$cluster_id\"}, rhacs_instance_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "PBFA97CFB590B2093" + }, + "definition": "label_values(process_cpu_seconds_total{job=\"central\"}, cluster_id)", + "description": "RHACS Cluster ID", + "hide": 0, + "includeAll": true, + "label": "Cluster ID", + "multi": true, + "name": "cluster_id", + "options": [], + "query": { + "query": "label_values(process_cpu_seconds_total{job=\"central\"}, cluster_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "RHACS Release", + "uid": "bbcb22ed-4982-488c-82bb-d5cf9280eaa3", + "version": 1, + "weekStart": "" +} diff --git a/resources/grafana/templates/dashboards/rhacs-central-release-configmap.yaml b/resources/grafana/templates/dashboards/rhacs-central-release-configmap.yaml new file mode 100644 index 00000000..35525621 --- /dev/null +++ b/resources/grafana/templates/dashboards/rhacs-central-release-configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + creationTimestamp: null + name: rhacs-central-release + labels: + grafana_dashboard: "true" + annotations: + grafana-folder: /grafana-dashboard-definitions/Addons +data: + json: | diff --git a/resources/grafana/templates/dashboards/rhacs-central-release-dashboard.yaml b/resources/grafana/templates/dashboards/rhacs-central-release-dashboard.yaml new file mode 100644 index 00000000..7dde7e63 --- /dev/null +++ b/resources/grafana/templates/dashboards/rhacs-central-release-dashboard.yaml @@ -0,0 +1,11 @@ +apiVersion: integreatly.org/v1alpha1 +kind: GrafanaDashboard +metadata: + labels: + app: rhacs + monitoring-key: middleware + name: rhacs-central-release-dashboard + namespace: +spec: + name: rhacs-central-release.json + json: | diff --git a/resources/index.json b/resources/index.json index f02b3373..b843e500 100644 --- a/resources/index.json +++ b/resources/index.json @@ -47,7 +47,8 @@ "grafana/generated/dashboards/rhacs-central-dashboard.yaml", "grafana/generated/dashboards/rhacs-central-slo-dashboard.yaml", "grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml", - "grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml" + "grafana/generated/dashboards/rhacs-cluster-resource-adjustment-dashboard.yaml", + "grafana/generated/dashboards/rhacs-central-release-dashboard.yaml" ], "grafanaVersion": "10.2.0" }, From 7ddf7b1df47dd00a33863d3619b48bdaa5b8788b Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Tue, 9 Jan 2024 12:51:09 +0100 Subject: [PATCH 2/4] ROX-21047: Widgets are hard to read --- .../rhacs-cluster-overview-configmap.yaml | 271 +++--------------- .../rhacs-cluster-overview-dashboard.yaml | 271 +++--------------- .../sources/rhacs-cluster-overview.json | 271 +++--------------- 3 files changed, 114 insertions(+), 699 deletions(-) diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml index 731156ef..8da6e3d9 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml @@ -35,7 +35,7 @@ data: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 12, + "id": 19, "links": [], "liveNow": false, "panels": [ @@ -64,6 +64,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -77,6 +78,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -160,6 +162,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -173,6 +176,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -256,6 +260,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -269,6 +274,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -364,6 +370,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -377,6 +384,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -460,6 +468,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -473,6 +482,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -556,6 +566,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -569,6 +580,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -653,224 +665,6 @@ data: "title": "Resources", "type": "row" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:cpu_limit_ratio", - "interval": "", - "legendFormat": "Limit / {{availability_zone}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:cpu_request_ratio", - "hide": false, - "interval": "", - "legendFormat": "Request / {{availability_zone}}", - "range": true, - "refId": "B" - } - ], - "title": "Availability Zone CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:memory_limit_ratio", - "interval": "", - "legendFormat": "Limit / {{availability_zone}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:memory_request_ratio", - "hide": false, - "interval": "", - "legendFormat": "Request / {{availability_zone}}", - "range": true, - "refId": "B" - } - ], - "title": "Availability Zone Memory Usage", - "type": "timeseries" - }, { "datasource": { "type": "prometheus", @@ -882,6 +676,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -896,6 +691,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -934,7 +730,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 26 }, "id": 6, "options": { @@ -979,6 +775,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -992,6 +789,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1030,7 +828,7 @@ data: "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 26 }, "id": 5, "options": { @@ -1074,6 +872,7 @@ data: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1087,6 +886,7 @@ data: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1125,7 +925,7 @@ data: "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 34 }, "id": 4, "options": { @@ -1164,7 +964,7 @@ data: "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 42 }, "id": 23, "panels": [], @@ -1370,10 +1170,11 @@ data: "h": 12, "w": 24, "x": 0, - "y": 51 + "y": 43 }, "id": 19, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, @@ -1391,7 +1192,7 @@ data: } ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1605,7 +1406,8 @@ data: "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1642,7 +1444,8 @@ data: "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1708,7 +1511,8 @@ data: "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1754,7 +1558,8 @@ data: "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1779,10 +1584,11 @@ data: "h": 13, "w": 24, "x": 0, - "y": 63 + "y": 55 }, "id": 21, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "fields": "", @@ -1799,7 +1605,7 @@ data: } ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1952,7 +1758,6 @@ data: "refresh": "", "revision": 1, "schemaVersion": 38, - "style": "dark", "tags": [ "rhacs" ], @@ -2092,6 +1897,6 @@ data: "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 3, + "version": 4, "weekStart": "" } diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml index e95bf615..37f8ff3e 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml @@ -35,7 +35,7 @@ spec: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 12, + "id": 19, "links": [], "liveNow": false, "panels": [ @@ -64,6 +64,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -77,6 +78,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -160,6 +162,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -173,6 +176,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -256,6 +260,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -269,6 +274,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -364,6 +370,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -377,6 +384,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -460,6 +468,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -473,6 +482,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -556,6 +566,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -569,6 +580,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -653,224 +665,6 @@ spec: "title": "Resources", "type": "row" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:cpu_limit_ratio", - "interval": "", - "legendFormat": "Limit / {{availability_zone}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:cpu_request_ratio", - "hide": false, - "interval": "", - "legendFormat": "Request / {{availability_zone}}", - "range": true, - "refId": "B" - } - ], - "title": "Availability Zone CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:memory_limit_ratio", - "interval": "", - "legendFormat": "Limit / {{availability_zone}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:memory_request_ratio", - "hide": false, - "interval": "", - "legendFormat": "Request / {{availability_zone}}", - "range": true, - "refId": "B" - } - ], - "title": "Availability Zone Memory Usage", - "type": "timeseries" - }, { "datasource": { "type": "prometheus", @@ -882,6 +676,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -896,6 +691,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -934,7 +730,7 @@ spec: "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 26 }, "id": 6, "options": { @@ -979,6 +775,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -992,6 +789,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1030,7 +828,7 @@ spec: "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 26 }, "id": 5, "options": { @@ -1074,6 +872,7 @@ spec: "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1087,6 +886,7 @@ spec: "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1125,7 +925,7 @@ spec: "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 34 }, "id": 4, "options": { @@ -1164,7 +964,7 @@ spec: "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 42 }, "id": 23, "panels": [], @@ -1370,10 +1170,11 @@ spec: "h": 12, "w": 24, "x": 0, - "y": 51 + "y": 43 }, "id": 19, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, @@ -1391,7 +1192,7 @@ spec: } ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1605,7 +1406,8 @@ spec: "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1642,7 +1444,8 @@ spec: "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1708,7 +1511,8 @@ spec: "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1754,7 +1558,8 @@ spec: "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1779,10 +1584,11 @@ spec: "h": 13, "w": 24, "x": 0, - "y": 63 + "y": 55 }, "id": 21, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "fields": "", @@ -1799,7 +1605,7 @@ spec: } ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1952,7 +1758,6 @@ spec: "refresh": "", "revision": 1, "schemaVersion": 38, - "style": "dark", "tags": [ "rhacs" ], @@ -2092,6 +1897,6 @@ spec: "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 3, + "version": 4, "weekStart": "" } diff --git a/resources/grafana/sources/rhacs-cluster-overview.json b/resources/grafana/sources/rhacs-cluster-overview.json index b3c85539..4ed669ad 100644 --- a/resources/grafana/sources/rhacs-cluster-overview.json +++ b/resources/grafana/sources/rhacs-cluster-overview.json @@ -24,7 +24,7 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 12, + "id": 19, "links": [], "liveNow": false, "panels": [ @@ -53,6 +53,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -66,6 +67,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -149,6 +151,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -162,6 +165,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -245,6 +249,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -258,6 +263,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -353,6 +359,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -366,6 +373,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -449,6 +457,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -462,6 +471,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -545,6 +555,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -558,6 +569,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -642,224 +654,6 @@ "title": "Resources", "type": "row" }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 12, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:cpu_limit_ratio", - "interval": "", - "legendFormat": "Limit / {{availability_zone}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:cpu_request_ratio", - "hide": false, - "interval": "", - "legendFormat": "Request / {{availability_zone}}", - "range": true, - "refId": "B" - } - ], - "title": "Availability Zone CPU Usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 26, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "multi", - "sort": "none" - } - }, - "pluginVersion": "9.1.0", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:memory_limit_ratio", - "interval": "", - "legendFormat": "Limit / {{availability_zone}}", - "range": true, - "refId": "A" - }, - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "editorMode": "code", - "expr": "availability_zone:acscs_worker_nodes:memory_request_ratio", - "hide": false, - "interval": "", - "legendFormat": "Request / {{availability_zone}}", - "range": true, - "refId": "B" - } - ], - "title": "Availability Zone Memory Usage", - "type": "timeseries" - }, { "datasource": { "type": "prometheus", @@ -871,6 +665,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -885,6 +680,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -923,7 +719,7 @@ "h": 8, "w": 12, "x": 0, - "y": 34 + "y": 26 }, "id": 6, "options": { @@ -968,6 +764,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -981,6 +778,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1019,7 +817,7 @@ "h": 8, "w": 12, "x": 12, - "y": 34 + "y": 26 }, "id": 5, "options": { @@ -1063,6 +861,7 @@ "mode": "palette-classic" }, "custom": { + "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", @@ -1076,6 +875,7 @@ "tooltip": false, "viz": false }, + "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, @@ -1114,7 +914,7 @@ "h": 8, "w": 12, "x": 0, - "y": 42 + "y": 34 }, "id": 4, "options": { @@ -1153,7 +953,7 @@ "h": 1, "w": 24, "x": 0, - "y": 50 + "y": 42 }, "id": 23, "panels": [], @@ -1359,10 +1159,11 @@ "h": 12, "w": 24, "x": 0, - "y": 51 + "y": 43 }, "id": 19, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "enablePagination": true, @@ -1380,7 +1181,7 @@ } ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1594,7 +1395,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1631,7 +1433,8 @@ "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1697,7 +1500,8 @@ "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1743,7 +1547,8 @@ "mode": "percentage", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "orange", @@ -1768,10 +1573,11 @@ "h": 13, "w": 24, "x": 0, - "y": 63 + "y": 55 }, "id": 21, "options": { + "cellHeight": "sm", "footer": { "countRows": false, "fields": "", @@ -1788,7 +1594,7 @@ } ] }, - "pluginVersion": "9.4.7", + "pluginVersion": "10.2.0", "targets": [ { "datasource": { @@ -1941,7 +1747,6 @@ "refresh": "", "revision": 1, "schemaVersion": 38, - "style": "dark", "tags": [ "rhacs" ], @@ -2081,6 +1886,6 @@ "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 3, + "version": 4, "weekStart": "" } From 434ae0d1c021b7d5beee92259100b699d54fa17f Mon Sep 17 00:00:00 2001 From: Ludovic Cleroux Date: Tue, 9 Jan 2024 13:43:32 +0100 Subject: [PATCH 3/4] ROX-21047: Fixed legend width --- .../rhacs-cluster-overview-configmap.yaml | 21 +++++++------------ .../rhacs-cluster-overview-dashboard.yaml | 21 +++++++------------ .../sources/rhacs-cluster-overview.json | 21 +++++++------------ 3 files changed, 21 insertions(+), 42 deletions(-) diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml index 8da6e3d9..a4691d2b 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview-configmap.yaml @@ -681,7 +681,6 @@ data: "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisWidth": -3, "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, @@ -996,8 +995,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1034,8 +1032,7 @@ data: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1406,8 +1403,7 @@ data: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1444,8 +1440,7 @@ data: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1511,8 +1506,7 @@ data: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1558,8 +1552,7 @@ data: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1897,6 +1890,6 @@ data: "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 4, + "version": 5, "weekStart": "" } diff --git a/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml b/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml index 37f8ff3e..baa19d82 100644 --- a/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml +++ b/resources/grafana/generated/dashboards/rhacs-cluster-overview-dashboard.yaml @@ -681,7 +681,6 @@ spec: "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisWidth": -3, "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, @@ -996,8 +995,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1034,8 +1032,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1406,8 +1403,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1444,8 +1440,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1511,8 +1506,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1558,8 +1552,7 @@ spec: "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1897,6 +1890,6 @@ spec: "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 4, + "version": 5, "weekStart": "" } diff --git a/resources/grafana/sources/rhacs-cluster-overview.json b/resources/grafana/sources/rhacs-cluster-overview.json index 4ed669ad..bc8cfdb0 100644 --- a/resources/grafana/sources/rhacs-cluster-overview.json +++ b/resources/grafana/sources/rhacs-cluster-overview.json @@ -670,7 +670,6 @@ "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", - "axisWidth": -3, "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, @@ -985,8 +984,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1023,8 +1021,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1395,8 +1392,7 @@ "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1433,8 +1429,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1500,8 +1495,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1547,8 +1541,7 @@ "mode": "percentage", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "orange", @@ -1886,6 +1879,6 @@ "timezone": "", "title": "RHACS Dataplane - Cluster Metrics", "uid": "4032f3c17643119901e107a0a1786d5b9e4c9565", - "version": 4, + "version": 5, "weekStart": "" } From e023c334cd481c1339f5d6c159f3154064a3255a Mon Sep 17 00:00:00 2001 From: Stephan Hesselmann Date: Tue, 9 Jan 2024 13:43:58 +0100 Subject: [PATCH 4/4] chore: relax overcommit alerts (#189) --- resources/prometheus/prometheus-rules.yaml | 26 ++++++++++++++--- .../WorkerNodesCPUQuotaOverCommit.yaml | 28 +++++++++++++++++-- .../WorkerNodesMemoryQuotaOverCommit.yaml | 28 +++++++++++++++++-- 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 69ab872e..591aac01 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -691,23 +691,41 @@ spec: cpu_resource_limits:acscs_worker_nodes:by_availability_zone:sum / availability_zone:acscs_worker_nodes:allocatable_cpu + - alert: WorkerNodesMemoryQuotaOverCommitWarning + expr: avg(availability_zone:acscs_worker_nodes:memory_request_ratio) > 0.85 + for: 5m + labels: + severity: warning + annotations: + summary: "There is a risk of over-committing Memory resources on worker nodes." + description: "During the last 5 minutes, the average memory request commitment on worker nodes was {{ $value | humanizePercentage }}. This is above the recommended threshold of 85%." + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" - alert: WorkerNodesMemoryQuotaOverCommit - expr: avg(availability_zone:acscs_worker_nodes:memory_request_ratio) > 0.8 + expr: avg(availability_zone:acscs_worker_nodes:memory_request_ratio) > 0.95 for: 5m labels: severity: critical annotations: summary: "There is a high risk of over-committing Memory resources on worker nodes." - description: "During the last 5 minutes, the average memory request commitment on worker nodes was {{ $value | humanizePercentage }}. This is above the recommended threshold of 80%." + description: "During the last 5 minutes, the average memory request commitment on worker nodes was {{ $value | humanizePercentage }}. This is above the critical threshold of 95%." + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" + - alert: WorkerNodesCPUQuotaOverCommitWarning + expr: avg(availability_zone:acscs_worker_nodes:cpu_request_ratio) > 0.85 + for: 5m + labels: + severity: warning + annotations: + summary: "There is a risk of over-committing CPU resources on worker nodes." + description: "During the last 5 minutes, the average CPU request commitment on worker nodes was {{ $value | humanizePercentage }}. This is above the recommended threshold of 85%." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" - alert: WorkerNodesCPUQuotaOverCommit - expr: avg(availability_zone:acscs_worker_nodes:cpu_request_ratio) > 0.8 + expr: avg(availability_zone:acscs_worker_nodes:cpu_request_ratio) > 0.95 for: 5m labels: severity: critical annotations: summary: "There is a high risk of over-committing CPU resources on worker nodes." - description: "During the last 5 minutes, the average CPU request commitment on worker nodes was {{ $value | humanizePercentage }}. This is above the recommended threshold of 80%." + description: "During the last 5 minutes, the average CPU request commitment on worker nodes was {{ $value | humanizePercentage }}. This is above the critical threshold of 95%." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" - alert: WorkerNodesMemoryOverCommit expr: avg(availability_zone:acscs_worker_nodes:memory_limit_ratio) > 2 diff --git a/resources/prometheus/unit_tests/WorkerNodesCPUQuotaOverCommit.yaml b/resources/prometheus/unit_tests/WorkerNodesCPUQuotaOverCommit.yaml index 4648c0b9..1a8af897 100644 --- a/resources/prometheus/unit_tests/WorkerNodesCPUQuotaOverCommit.yaml +++ b/resources/prometheus/unit_tests/WorkerNodesCPUQuotaOverCommit.yaml @@ -13,7 +13,31 @@ tests: - series: kube_node_status_allocatable{node="worker-1", resource="cpu", job="kube-state-metrics"} values: "100" - series: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{node="worker-1", resource="cpu", job="kube-state-metrics"} - values: "81" + values: "86" + alert_rule_test: + - eval_time: 1m + alertname: WorkerNodesCPUQuotaOverCommitWarning + exp_alerts: [] + - eval_time: 5m + alertname: WorkerNodesCPUQuotaOverCommitWarning + exp_alerts: + - exp_labels: + alertname: WorkerNodesCPUQuotaOverCommitWarning + severity: warning + exp_annotations: + description: "During the last 5 minutes, the average CPU request commitment on worker nodes was 86%. This is above the recommended threshold of 85%." + summary: "There is a risk of over-committing CPU resources on worker nodes." + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" + - interval: 1m + input_series: + - series: kube_node_role{node="worker-1", role="acscs-worker"} + values: "1" + - series: kube_node_labels{node="worker-1", label_failure_domain_beta_kubernetes_io_zone="us-east-1a"} + values: "1" + - series: kube_node_status_allocatable{node="worker-1", resource="cpu", job="kube-state-metrics"} + values: "100" + - series: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{node="worker-1", resource="cpu", job="kube-state-metrics"} + values: "96" alert_rule_test: - eval_time: 1m alertname: WorkerNodesCPUQuotaOverCommit @@ -25,6 +49,6 @@ tests: alertname: WorkerNodesCPUQuotaOverCommit severity: critical exp_annotations: - description: "During the last 5 minutes, the average CPU request commitment on worker nodes was 81%. This is above the recommended threshold of 80%." + description: "During the last 5 minutes, the average CPU request commitment on worker nodes was 96%. This is above the critical threshold of 95%." summary: "There is a high risk of over-committing CPU resources on worker nodes." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" diff --git a/resources/prometheus/unit_tests/WorkerNodesMemoryQuotaOverCommit.yaml b/resources/prometheus/unit_tests/WorkerNodesMemoryQuotaOverCommit.yaml index 52374d01..86399746 100644 --- a/resources/prometheus/unit_tests/WorkerNodesMemoryQuotaOverCommit.yaml +++ b/resources/prometheus/unit_tests/WorkerNodesMemoryQuotaOverCommit.yaml @@ -13,7 +13,31 @@ tests: - series: kube_node_status_allocatable{node="worker-1", resource="memory", job="kube-state-metrics"} values: "100" - series: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{node="worker-1", resource="memory", job="kube-state-metrics"} - values: "81" + values: "86" + alert_rule_test: + - eval_time: 1m + alertname: WorkerNodesMemoryQuotaOverCommitWarning + exp_alerts: [] + - eval_time: 5m + alertname: WorkerNodesMemoryQuotaOverCommitWarning + exp_alerts: + - exp_labels: + alertname: WorkerNodesMemoryQuotaOverCommitWarning + severity: warning + exp_annotations: + description: "During the last 5 minutes, the average memory request commitment on worker nodes was 86%. This is above the recommended threshold of 85%." + summary: "There is a risk of over-committing Memory resources on worker nodes." + sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md" + - interval: 1m + input_series: + - series: kube_node_role{node="worker-1", role="acscs-worker"} + values: "1" + - series: kube_node_labels{node="worker-1", label_failure_domain_beta_kubernetes_io_zone="us-east-1a"} + values: "1" + - series: kube_node_status_allocatable{node="worker-1", resource="memory", job="kube-state-metrics"} + values: "100" + - series: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{node="worker-1", resource="memory", job="kube-state-metrics"} + values: "96" alert_rule_test: - eval_time: 1m alertname: WorkerNodesMemoryQuotaOverCommit @@ -25,6 +49,6 @@ tests: alertname: WorkerNodesMemoryQuotaOverCommit severity: critical exp_annotations: - description: "During the last 5 minutes, the average memory request commitment on worker nodes was 81%. This is above the recommended threshold of 80%." + description: "During the last 5 minutes, the average memory request commitment on worker nodes was 96%. This is above the critical threshold of 95%." summary: "There is a high risk of over-committing Memory resources on worker nodes." sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-027-cluster-scale-up.md"