-
Notifications
You must be signed in to change notification settings - Fork 48
/
unschedulable_pods_test.yaml
65 lines (52 loc) · 3.15 KB
/
unschedulable_pods_test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
evaluation_interval: 1m
rule_files:
- prometheus.pod_alerts.yaml
tests:
- interval: 1m
input_series:
# Initially pod is unscheduled but later status changed to scheduled so it's not alerted
- series: 'kube_pod_status_unschedulable{namespace="prod", pod="prod-pod-1", source_cluster="cluster01"}'
values: '1x40 0x20'
# The pod is in unscheduled status whole time, so this is alerted
- series: 'kube_pod_status_unschedulable{namespace="prod-test", pod="prod-pod-2", source_cluster="cluster02"}'
values: '1x60'
# The pod is unscheduled most of the time but there is some window where pod is scheduled so it's not alerted
- series: 'kube_pod_status_unschedulable{namespace="prod-test", pod="prod-pod-3", source_cluster="cluster03"}'
values: '1x29 0 1x28'
# The pod is in unscheduled status whole time but the namespace ends with tenant so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="prod-tenant", pod="prod-pod-3", source_cluster="cluster04"}'
values: '1x60'
# The pod is in unscheduled status whole time but the namespace starts with openshift so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="openshift-prod-test", pod="oc-pod", source_cluster="cluster05"}'
values: '1x60'
# The pod is in unscheduled status whole time but the namespace starts with kube so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="kube-test", pod="kube-pod", source_cluster="cluster06"}'
values: '1x60'
# The pod is in unscheduled status whole time but the namespace is default so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="default", pod="test-pod", source_cluster="cluster07"}'
values: '1x60'
# The pod is in unscheduled status whole time but the namespace ends with env so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="test-env", pod="test-pod", source_cluster="cluster01"}'
values: '1x60'
# The pod is in unscheduled status whole time but it's in the 'tekton-ci' namespace so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="tekton-ci", pod="test-pod", source_cluster="cluster01"}'
values: '1x60'
# The pod is in unscheduled status whole time but it's in the 'build-templates-e2e' namespace so it's ignored
- series: 'kube_pod_status_unschedulable{namespace="build-templates-e2e", pod="test-pod", source_cluster="cluster01"}'
values: '1x60'
alert_rule_test:
- eval_time: 60m
alertname: UnschedulablePods
exp_alerts:
- exp_labels:
severity: warning
namespace: prod-test
pod: prod-pod-2
source_cluster: cluster02
exp_annotations:
summary: Pod prod-pod-2 cannot be scheduled.
description: >-
Pod prod-pod-2 for namespace prod-test on cluster cluster02 is
unscheduled for more than 30 minutes.
alert_routing_key: prod-test
runbook_url: https://gitlab.cee.redhat.com/konflux/docs/sop/-/blob/main/o11y/alert-rule-unschedualablePods.md