giantswarm · QuentinBisson · Jun 9, 2024 · Jun 5, 2024 · Jun 6, 2024 · Jun 5, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -14,7 +14,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Changed
 
-- Review phoenix alerts towards Mimir.
+- Split the phoenix job alert into 2:
+  - a new file named job.aws.rules that contains the aws specific alerts
+  - move the rest of job.rules into the shared alerts because it is provider independent
+- Move the management cluster certificate alerts into the shared alerts because it is provider independent
+- Review and fix phoenix alerts towards Mimir and multi-provider MCs.
 - Moves cluster-autoscaler and vpa alerts to turtles.
 
 ### Fixed

@@ -0,0 +1,29 @@
+## TODO Remove with vintage
+# This rule applies to vintage aws management clusters
+{{- if eq .Values.managementCluster.provider.flavor "vintage" }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  creationTimestamp: null
+  labels:
+    {{- include "labels.common" . | nindent 4 }}
+    # No need for .Values.mimir.enabled condition - will be gone with Vintage
+    cluster_type: "management_cluster"
+  name: aws.job.rules
+  namespace: {{ .Values.namespace  }}
+spec:
+  groups:
+  - name: aws-jobs
+    rules:
+    - alert: JobHasNotBeenScheduledForTooLong
+      annotations:
+        description: '{{`CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has not been scheduled for more than 2 hours.`}}'
+        opsrecipe: job-has-not-been-scheduled-for-too-long/
+      expr: (time() - kube_cronjob_status_last_schedule_time{cronjob="route53-manager"}) > 7200
+      for: 15m
+      labels:
+        area: kaas
+        severity: page
+        team: phoenix
+        topic: managementcluster
+{{- end }}
@@ -23,13 +23,13 @@ spec:
         area: kaas
         cancel_if_outside_working_hours: "true"
         severity: page
-        team: phoenix
+        team: {{ include "providerTeam" . }}
         topic: security
-    - alert: ManagementClusterAWSCertificateWillExpireInLessThanOneMonth
+    - alert: ManagementClusterCertificateWillExpireInLessThanOneMonth
       annotations:
         description: '{{`Certificate {{ $labels.path }} on {{ $labels.node }} will expire in less than one month.`}}'
         opsrecipe: renew-certificates/
-      expr: (cert_exporter_not_after{cluster_type="management_cluster", provider="aws", path!="/etc/kubernetes/ssl/service-account-crt.pem"} - time()) < 4 * 7 * 24 * 60 * 60
+      expr: (cert_exporter_not_after{cluster_type="management_cluster", path!="/etc/kubernetes/ssl/service-account-crt.pem"} - time()) < 4 * 7 * 24 * 60 * 60
       for: 5m
       labels:
         area: kaas

@@ -21,16 +21,3 @@ spec:
         severity: notify
         team: {{ include "providerTeam" . }}
         topic: managementcluster
-{{- if eq .Values.managementCluster.provider.kind "aws" }}
-    - alert: JobHasNotBeenScheduledForTooLong
-      annotations:
-        description: '{{`CronJob {{ $labels.namespace }}/{{ $labels.cronjob }} has not been scheduled for more than 2 hours.`}}'
-        opsrecipe: job-has-not-been-scheduled-for-too-long/
-      expr: (time() - kube_cronjob_status_last_schedule_time{cronjob="route53-manager"}) > 7200
-      for: 15m
-      labels:
-        area: kaas
-        severity: page
-        team: phoenix
-        topic: managementcluster
-{{- end }}