From 8cf487ea728c505caaa3b97cb8f115bdd1d4e2e8 Mon Sep 17 00:00:00 2001 From: Jose Armesto Date: Tue, 19 Nov 2024 22:58:50 +0100 Subject: [PATCH] Add alert to monitor the HelmRelease for vertical-pod-autoscaler-crd app --- CHANGELOG.md | 4 ++++ .../vertical-pod-autoscaler.rules.yml | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8e79e800..9a1604ea4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add alert to monitor the HelmRelease for vertical-pod-autoscaler-crd app. + ## [4.26.1] - 2024-11-19 ### Changed diff --git a/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml index 64a6f509d..fda1b7b4a 100644 --- a/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml +++ b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml @@ -28,3 +28,20 @@ spec: severity: page team: turtles topic: autoscaling + - alert: FluxHelmReleaseFailed + annotations: + description: |- + {{`Flux HelmRelease {{ $labels.name }} in ns {{ $labels.exported_namespace }} on {{ $labels.installation }}/{{ $labels.cluster_id }} is stuck in Failed state.`}} + opsrecipe: fluxcd-failing-helmrelease/ + expr: gotk_reconcile_condition{type="Ready", status="False", kind="HelmRelease", cluster_type="management_cluster", exported_namespace!="flux-giantswarm", name=~".*(vertical-pod-autoscaler-crd)"} > 0 + for: 20m + labels: + area: kaas + cancel_if_outside_working_hours: "true" + cancel_if_kube_state_metrics_down: "true" + cancel_if_monitoring_agent_down: "true" + severity: page + team: turtles + topic: autoscaling + namespace: |- + {{`{{ $labels.exported_namespace }}`}}