Skip to content

Commit

Permalink
some easy fies
Browse files Browse the repository at this point in the history
  • Loading branch information
QuentinBisson committed Oct 29, 2024
1 parent f1d7e4f commit 6bc8e0b
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 50 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Move `Inhibition` from a prefix to a suffix for the prometheus-agent inhibitions to match with the other alerts inhibition alerts:
- `PrometheusAgentFailingInhibition` => `InhibitionPrometheusAgentFailing`
- `PrometheusAgentShardsMissingInhibition` => `InhibitionPrometheusAgentShardsMissing`

### Fixed

- Fix statefulset.rules name as it is currently replacing the deployment.rules alerts.

## [4.22.0] - 2024-10-29

### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ spec:
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_has_no_workers: "true"
## Same as PrometheusAgentFailing, but triggers inhibition earlier and does not page.
- alert: PrometheusAgentFailingInhibition
- alert: InhibitionPrometheusAgentFailing
annotations:
description: '{{`Prometheus agent remote write is failing.`}}'
summary: Prometheus agent fails to send samples to remote write endpoint.
Expand Down Expand Up @@ -125,7 +125,7 @@ spec:
cancel_if_cluster_status_deleting: "true"
cancel_if_outside_working_hours: "true"
## Same as PrometheusAgentShardsMissing but triggers inhibition earlier, and does not page.
- alert: PrometheusAgentShardsMissingInhibition
- alert: InhibitionPrometheusAgentShardsMissing
annotations:
description: '{{`Prometheus agent is missing shards.`}}'
summary: Prometheus agent is missing shards.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ metadata:
{{- if not .Values.mimir.enabled }}
cluster_type: "management_cluster"
{{- end }}
name: deployment.management-cluster.rules
name: statefulset.rules
namespace: {{ .Values.namespace }}
spec:
groups:
Expand All @@ -18,8 +18,8 @@ spec:
description: '{{`Statefulset {{ $labels.namespace}}/{{ $labels.statefulset }} is not satisfied.`}}'
opsrecipe: deployment-not-satisfied/
expr: |-
kube_statefulset_status_replicas{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*"}
- kube_statefulset_status_replicas_ready{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*"}
kube_statefulset_status_replicas{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*|pyroscope.*|tempo.*"}
- kube_statefulset_status_replicas_ready{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*|pyroscope.*|tempo.*"}
> 0
for: 30m
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ tests:
description: "Prometheus agent remote write is failing."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 30m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -95,7 +95,7 @@ tests:
description: "Prometheus agent remote write is failing."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 90m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -124,7 +124,7 @@ tests:
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailing
eval_time: 150m
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 150m
# Tests for `PrometheusAgentShardsMissing` alert
- interval: 1m
Expand All @@ -142,7 +142,7 @@ tests:
alert_rule_test:
- alertname: PrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissing
eval_time: 120m
Expand All @@ -165,7 +165,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 100m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -207,7 +207,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 125m
exp_alerts:
- exp_labels:
Expand All @@ -230,7 +230,7 @@ tests:
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissing
eval_time: 130m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 130m
# Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric
- interval: 1m
Expand All @@ -246,7 +246,7 @@ tests:
alert_rule_test:
- alertname: PrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissing
eval_time: 120m
Expand All @@ -269,7 +269,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 100m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -311,7 +311,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 125m
exp_alerts:
- exp_labels:
Expand All @@ -334,5 +334,5 @@ tests:
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissing
eval_time: 130m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 130m
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ tests:
description: "Prometheus agent remote write is failing."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 30m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -71,7 +71,7 @@ tests:
description: "Prometheus agent remote write is failing."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 90m
exp_alerts:
- exp_labels:
Expand All @@ -94,7 +94,7 @@ tests:
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailing
eval_time: 150m
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 150m
# Tests for `PrometheusAgentShardsMissing` alert
- interval: 1m
Expand All @@ -112,7 +112,7 @@ tests:
alert_rule_test:
- alertname: PrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissing
eval_time: 120m
Expand All @@ -135,7 +135,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 100m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -177,7 +177,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 125m
exp_alerts:
- exp_labels:
Expand All @@ -200,7 +200,7 @@ tests:
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissing
eval_time: 130m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 130m
# Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric
- interval: 1m
Expand All @@ -216,7 +216,7 @@ tests:
alert_rule_test:
- alertname: PrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissing
eval_time: 120m
Expand All @@ -239,7 +239,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 100m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -281,7 +281,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 125m
exp_alerts:
- exp_labels:
Expand All @@ -304,5 +304,5 @@ tests:
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissing
eval_time: 130m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 130m
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ tests:
description: "Prometheus agent remote write is failing."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 30m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -71,7 +71,7 @@ tests:
description: "Prometheus agent remote write is failing."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 90m
exp_alerts:
- exp_labels:
Expand All @@ -94,7 +94,7 @@ tests:
summary: "Prometheus agent fails to send samples to remote write endpoint."
- alertname: PrometheusAgentFailing
eval_time: 150m
- alertname: PrometheusAgentFailingInhibition
- alertname: InhibitionPrometheusAgentFailing
eval_time: 150m
# Tests for `PrometheusAgentShardsMissing` alert
- interval: 1m
Expand All @@ -112,7 +112,7 @@ tests:
alert_rule_test:
- alertname: PrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissing
eval_time: 120m
Expand All @@ -135,7 +135,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 100m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -177,7 +177,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 125m
exp_alerts:
- exp_labels:
Expand All @@ -200,7 +200,7 @@ tests:
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissing
eval_time: 130m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 130m
# Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric
- interval: 1m
Expand All @@ -216,7 +216,7 @@ tests:
alert_rule_test:
- alertname: PrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 40m
- alertname: PrometheusAgentShardsMissing
eval_time: 120m
Expand All @@ -239,7 +239,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 100m
exp_alerts:
- exp_labels:
Expand Down Expand Up @@ -281,7 +281,7 @@ tests:
description: "Prometheus agent is missing shards."
opsrecipe: "prometheus-agent/"
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 125m
exp_alerts:
- exp_labels:
Expand All @@ -304,5 +304,5 @@ tests:
summary: "Prometheus agent is missing shards."
- alertname: PrometheusAgentShardsMissing
eval_time: 130m
- alertname: PrometheusAgentShardsMissingInhibition
- alertname: InhibitionPrometheusAgentShardsMissing
eval_time: 130m
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
rule_files:
- statefulset.management-cluster.rules.yml
- statefulset.rules.yml

tests:
- interval: 1m
Expand Down
Loading

0 comments on commit 6bc8e0b

Please sign in to comment.