Skip to content

Commit

Permalink
package ramen alerts
Browse files Browse the repository at this point in the history
VolumeSynchronizationDelay

Signed-off-by: rakeshgm <[email protected]>
  • Loading branch information
rakeshgm committed Jul 27, 2023
1 parent 1c45155 commit a2c0d1d
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
32 changes: 32 additions & 0 deletions config/prometheus/alerts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
prometheus: k8s
role: alert-rules
name: alerting-rules
namespace: system
spec:
groups:
- name: alerts
rules:
- record: ramen_rpo_difference
expr: ramen_sync_duration_seconds{job="ramen-hub-operator-metrics-service"} / on(policyname, job) group_left() (ramen_policy_schedule_interval_seconds{job="ramen-hub-operator-metrics-service"})
- alert: VolumeSynchronizationDelay
expr: ramen_rpo_difference >= 3
for: 5s
labels:
cluster: "{{ $labels.cluster }}"
severity: critical
annotations:
description: "Syncing of volumes (DRPC: {{ $labels.obj_name }}, Namespace: {{ $labels.obj_namespace }}) is taking more than thrice the scheduled snapshot interval. This may cause data loss and a backlog of replication requests. To get around the delay, follow the instructions provided in the documentation."
alert_type: "DisasterRecovery"
- alert: VolumeSynchronizationDelay
expr: ramen_rpo_difference > 2 and ramen_rpo_difference < 3
for: 5s
labels:
cluster: "{{ $labels.cluster }}"
severity: warning
annotations:
description: "Syncing of volumes (DRPC: {{ $labels.obj_name }}, Namespace: {{ $labels.obj_namespace }}) is taking more than twice the scheduled snapshot interval. This may cause data loss and impact replication requests. Check the documentation for instructions on how to get around the delay."
alert_type: "DisasterRecovery"
1 change: 1 addition & 0 deletions config/prometheus/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
resources:
- monitor.yaml
- alerts.yaml

0 comments on commit a2c0d1d

Please sign in to comment.