From 70d58b2b2f24ed5d5ccc62d233c7bd3c2726067d Mon Sep 17 00:00:00 2001 From: Alex Rukletsov Date: Thu, 22 Sep 2022 20:34:43 +0200 Subject: [PATCH 1/4] Disambiguate alert names. --- resources/prometheus/prometheus-rules.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index da862651..99d3e75e 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -38,7 +38,7 @@ spec: summary: "Central container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` restarted more than 3 times." description: "Central container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` has restarted more than 3 times during the last 10 minutes." sop_url: "" # TODO: Add SOP - - alert: RHACSCentralDatabasePersistentVolumeFillingUp + - alert: RHACSCentralDatabasePersistentVolumeFillingUp (< 10% left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="stackrox-db"} < 0.1 for: 5m labels: @@ -47,7 +47,7 @@ spec: summary: "Central database storage in namespace `{{ $labels.namespace }}` is filing up." description: "Central database storage in namespace `{{ $labels.namespace }}` is filling up for PVC `{{ $labels.persistentvolumeclaim }}`. Available storage quota is `{{ $value | humanizePercentage }}`." sop_url: "" # TODO: Add SOP - - alert: RHACSCentralDatabasePersistentVolumeFillingUp + - alert: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="stackrox-db"} < 0.25 and predict_linear(kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"}[6h], 4 * 24 * 3600) < 0 for: 5m labels: @@ -190,7 +190,7 @@ spec: - name: observability-operator rules: - - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} < 0.1 for: 5m labels: @@ -200,7 +200,7 @@ spec: description: "The Observability Operator's Prometheus storage in namespace `{{ $labels.namespace }}` is filling up for PVC `{{ $labels.persistentvolumeclaim }}`. Available storage quota is `{{ $value | humanizePercentage }}`." sop_url: "" # TODO: Add SOP - - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} < 0.25 and predict_linear(kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"}[6h], 4 * 24 * 3600) < 0 for: 5m labels: From 3a4de0100010881881c976b94df8eb8fc7111eb2 Mon Sep 17 00:00:00 2001 From: Alex Rukletsov Date: Thu, 22 Sep 2022 20:48:12 +0200 Subject: [PATCH 2/4] Update tests. --- ...vabilityOperatorPrometheusPersistentVolumeFillingUp.yaml | 6 +++--- .../RHACSCentralDatabasePersistentVolumeFillingUp.yaml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml index 396fb719..a48343f3 100644 --- a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml +++ b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml @@ -19,7 +19,7 @@ tests: alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp exp_alerts: - exp_labels: - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0 namespace: rhacs-observability @@ -31,7 +31,7 @@ tests: alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp exp_alerts: - exp_labels: - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left) severity: critical persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0 namespace: rhacs-observability @@ -40,7 +40,7 @@ tests: description: "The Observability Operator's Prometheus storage in namespace `rhacs-observability` is filling up for PVC `managed-services-prometheus-kafka-prometheus-0`. Available storage quota is `3.32%`." sop_url: "" - exp_labels: - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0 namespace: rhacs-observability diff --git a/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml b/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml index d17e1097..c87227c3 100644 --- a/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml +++ b/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml @@ -19,7 +19,7 @@ tests: alertname: RHACSCentralDatabasePersistentVolumeFillingUp exp_alerts: - exp_labels: - alertname: RHACSCentralDatabasePersistentVolumeFillingUp + alertname: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: stackrox-db namespace: rhacs-1234 @@ -31,7 +31,7 @@ tests: alertname: RHACSCentralDatabasePersistentVolumeFillingUp exp_alerts: - exp_labels: - alertname: RHACSCentralDatabasePersistentVolumeFillingUp + alertname: RHACSCentralDatabasePersistentVolumeFillingUp (< 10% left) severity: critical persistentvolumeclaim: stackrox-db namespace: rhacs-1234 @@ -40,7 +40,7 @@ tests: description: "Central database storage in namespace `rhacs-1234` is filling up for PVC `stackrox-db`. Available storage quota is `3.32%`." sop_url: "" - exp_labels: - alertname: RHACSCentralDatabasePersistentVolumeFillingUp + alertname: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: stackrox-db namespace: rhacs-1234 From 4f652a5e4f1b00148e1e3a7f729283a23f1e734c Mon Sep 17 00:00:00 2001 From: Alex Rukletsov Date: Thu, 13 Oct 2022 22:52:35 +0200 Subject: [PATCH 3/4] Update resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml Co-authored-by: Stephan --- ...bservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml index a48343f3..d8f10aca 100644 --- a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml +++ b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml @@ -16,7 +16,7 @@ tests: alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp exp_alerts: [] - eval_time: 100m - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) exp_alerts: - exp_labels: alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) From 2dd97fc558fc4b73451c16f4e7d1b9289f5ce123 Mon Sep 17 00:00:00 2001 From: Alex Rukletsov Date: Thu, 13 Oct 2022 22:52:45 +0200 Subject: [PATCH 4/4] Update resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml Co-authored-by: Stephan --- ...bservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml index d8f10aca..4de6b42c 100644 --- a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml +++ b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml @@ -28,7 +28,7 @@ tests: description: "The Observability Operator's Prometheus storage in namespace `rhacs-observability` is filling up for PVC `managed-services-prometheus-kafka-prometheus-0`. Available storage quota is `13.09%`. The volume is expected to fill up within 4 days based on linear extrapolation over the last 6 hours." sop_url: "" - eval_time: 110m - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left) exp_alerts: - exp_labels: alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left)