Skip to content

Commit

Permalink
Add keda alerting rules (#942)
Browse files Browse the repository at this point in the history
* Add keda alerting rules

Signed-off-by: QuentinBisson <[email protected]>

* move to atlas and make alerts notify only

---------

Signed-off-by: QuentinBisson <[email protected]>
  • Loading branch information
QuentinBisson authored Nov 7, 2023
1 parent 3bbfb6f commit 9cf0d30
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Add KEDA alerting rules.

### Changed

- Added `namespace` label to Flux helm release related alerts
Expand Down
72 changes: 72 additions & 0 deletions helm/prometheus-rules/templates/alerting-rules/keda.rules.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
creationTimestamp: null
labels:
{{- include "labels.common" . | nindent 4 }}
name: keda.rules
namespace: {{ .Values.namespace }}
spec:
groups:
- name: Keda
rules:
- alert: KedaDown
annotations:
description: 'Keda is down.'
expr: count (up{container=~"keda-.*"} == 0) > 0
for: 10m
labels:
area: kaas
cancel_if_apiserver_down: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
severity: notify
team: atlas
topic: autoscaling
- alert: KedaScaledObjectErrors
annotations:
description: '{{`Errors detected in scaled object {{ $labels.scaledObject }} in namespace {{ $labels.namespace}}.`}}'
expr: increase(keda_scaled_object_errors[10m])> 0
for: 15m
labels:
area: kaas
cancel_if_apiserver_down: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
severity: notify
team: atlas
topic: autoscaling
- alert: KedaWebhookScaledObjectValidationErrors
annotations:
description: '{{`Validation errors detected in webhook for scaled object {{ $labels.scaledObject }} in namespace {{ $labels.namespace}}.`}}'
expr: increase(keda_webhook_scaled_object_validation_errors[10m]) > 0
for: 15m
labels:
area: kaas
cancel_if_apiserver_down: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
severity: notify
team: atlas
topic: autoscaling
- alert: KedaScalerErrors
annotations:
description: '{{`Errors detected in scaler {{ $labels.scaler }} for scaled object {{ $labels.scaledObject }} in namespace {{ $labels.namespace}}.`}}'
expr: increase(keda_scaler_errors[10m]) > 0
for: 15m
labels:
area: kaas
cancel_if_apiserver_down: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
severity: notify
team: atlas
topic: autoscaling

0 comments on commit 9cf0d30

Please sign in to comment.