diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 5331e378..eda931cb 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -38,7 +38,7 @@ spec: summary: "Central container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` restarted more than 3 times." description: "Central container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` has restarted more than 3 times during the last 10 minutes." sop_url: "" # TODO: Add SOP - - alert: RHACSCentralDatabasePersistentVolumeFillingUp + - alert: RHACSCentralDatabasePersistentVolumeFillingUp (< 10% left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="stackrox-db"} < 0.1 for: 5m labels: @@ -47,7 +47,7 @@ spec: summary: "Central database storage in namespace `{{ $labels.namespace }}` is filing up." description: "Central database storage in namespace `{{ $labels.namespace }}` is filling up for PVC `{{ $labels.persistentvolumeclaim }}`. Available storage quota is `{{ $value | humanizePercentage }}`." sop_url: "" # TODO: Add SOP - - alert: RHACSCentralDatabasePersistentVolumeFillingUp + - alert: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="stackrox-db"} < 0.25 and predict_linear(kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"}[6h], 4 * 24 * 3600) < 0 for: 5m labels: @@ -190,7 +190,7 @@ spec: - name: observability-operator rules: - - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} < 0.1 for: 5m labels: @@ -200,7 +200,7 @@ spec: description: "The Observability Operator's Prometheus storage in namespace `{{ $labels.namespace }}` is filling up for PVC `{{ $labels.persistentvolumeclaim }}`. Available storage quota is `{{ $value | humanizePercentage }}`." sop_url: "" # TODO: Add SOP - - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + - alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} < 0.25 and predict_linear(kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"}[6h], 4 * 24 * 3600) < 0 for: 5m labels: diff --git a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml index 396fb719..4de6b42c 100644 --- a/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml +++ b/resources/prometheus/unit_tests/ObservabilityOperatorPrometheusPersistentVolumeFillingUp.yaml @@ -16,10 +16,10 @@ tests: alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp exp_alerts: [] - eval_time: 100m - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) exp_alerts: - exp_labels: - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0 namespace: rhacs-observability @@ -28,10 +28,10 @@ tests: description: "The Observability Operator's Prometheus storage in namespace `rhacs-observability` is filling up for PVC `managed-services-prometheus-kafka-prometheus-0`. Available storage quota is `13.09%`. The volume is expected to fill up within 4 days based on linear extrapolation over the last 6 hours." sop_url: "" - eval_time: 110m - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left) exp_alerts: - exp_labels: - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left) severity: critical persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0 namespace: rhacs-observability @@ -40,7 +40,7 @@ tests: description: "The Observability Operator's Prometheus storage in namespace `rhacs-observability` is filling up for PVC `managed-services-prometheus-kafka-prometheus-0`. Available storage quota is `3.32%`." sop_url: "" - exp_labels: - alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp + alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0 namespace: rhacs-observability diff --git a/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml b/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml index d17e1097..c87227c3 100644 --- a/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml +++ b/resources/prometheus/unit_tests/RHACSCentralDatabasePersistentVolumeFillingUp.yaml @@ -19,7 +19,7 @@ tests: alertname: RHACSCentralDatabasePersistentVolumeFillingUp exp_alerts: - exp_labels: - alertname: RHACSCentralDatabasePersistentVolumeFillingUp + alertname: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: stackrox-db namespace: rhacs-1234 @@ -31,7 +31,7 @@ tests: alertname: RHACSCentralDatabasePersistentVolumeFillingUp exp_alerts: - exp_labels: - alertname: RHACSCentralDatabasePersistentVolumeFillingUp + alertname: RHACSCentralDatabasePersistentVolumeFillingUp (< 10% left) severity: critical persistentvolumeclaim: stackrox-db namespace: rhacs-1234 @@ -40,7 +40,7 @@ tests: description: "Central database storage in namespace `rhacs-1234` is filling up for PVC `stackrox-db`. Available storage quota is `3.32%`." sop_url: "" - exp_labels: - alertname: RHACSCentralDatabasePersistentVolumeFillingUp + alertname: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left) severity: warning persistentvolumeclaim: stackrox-db namespace: rhacs-1234