Skip to content

Commit

Permalink
Alerts for failing OLM operators
Browse files Browse the repository at this point in the history
- Add prometheus rule for firing an alert when csv_abnormal metric
  is emitted with phase=failed
- Alert message contains name and version of operator.
  • Loading branch information
anik120 committed Nov 1, 2019
1 parent 0943c4f commit bf784ab
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
20 changes: 20 additions & 0 deletions deploy/chart/templates/0000_90_olm_01-prometheus-rule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{{ if and .Values.installType (eq .Values.installType "ocp") }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: olm-alert-rules
namespace: {{ .Values.namespace }}
labels:
prometheus: alert-rules
role: alert-rules
spec:
groups:
- name: olm.failing_operators.rules
rules:
- alert: FailingOperator
annotations:
message: Failed to install Operator {{ $labels.name }} version {{ $labels.version }}. Reason-{{ $labels.reason }}
expr: csv_abnormal{phase="Failed"}
labels:
severity: info
{{ end }}
19 changes: 19 additions & 0 deletions manifests/0000_90_olm_01-prometheus-rule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: olm-alert-rules
namespace: openshift-operator-lifecycle-manager
labels:
prometheus: alert-rules
role: alert-rules
spec:
groups:
- name: olm.failing_operators.rules
rules:
- alert: FailingOperator
annotations:
message: Failed to install Operator {{ $labels.name }} version {{ $labels.version }}. Reason-{{ $labels.reason }}
expr: csv_abnormal{phase="Failed"}
labels:
severity: info

0 comments on commit bf784ab

Please sign in to comment.