From 40817c2dc878ea1377ca62d180a12fe9ec439135 Mon Sep 17 00:00:00 2001 From: Quentin Bisson Date: Wed, 30 Oct 2024 11:50:05 +0100 Subject: [PATCH] some easy fixes (#1409) * some easy fies * Update CHANGELOG.md * Update CHANGELOG.md Co-authored-by: Zirko <64951262+QuantumEnigmaa@users.noreply.github.com> --------- Co-authored-by: Zirko <64951262+QuantumEnigmaa@users.noreply.github.com> --- CHANGELOG.md | 10 +++++++++ .../alerting-rules/prometheus-agent.rules.yml | 4 ++-- ...luster.rules.yml => statefulset.rules.yml} | 6 ++--- .../prometheus-agent.rules.test.yml | 22 +++++++++---------- .../prometheus-agent.rules.test.yml | 22 +++++++++---------- .../prometheus-agent.rules.test.yml | 22 +++++++++---------- ...es.test.yml => statefulset.rules.test.yml} | 2 +- .../prometheus-agent.rules.test.yml | 22 +++++++++---------- 8 files changed, 60 insertions(+), 50 deletions(-) rename helm/prometheus-rules/templates/platform/atlas/alerting-rules/{statefulset.management-cluster.rules.yml => statefulset.rules.yml} (86%) rename test/tests/providers/global/platform/atlas/alerting-rules/{statefulset.management-cluster.rules.test.yml => statefulset.rules.test.yml} (97%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06a4153c..4821cecf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Move `Inhibition` from a suffix to a prefix for the prometheus-agent inhibitions to match with the other inhibition alerts: +- `PrometheusAgentFailingInhibition` => `InhibitionPrometheusAgentFailing` +- `PrometheusAgentShardsMissingInhibition` => `InhibitionPrometheusAgentShardsMissing` + +### Fixed + +- Fixes the statefulset.rules name as it is currently replacing the deployment.rules alerts. + ## [4.22.0] - 2024-10-29 ### Changed diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/prometheus-agent.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/prometheus-agent.rules.yml index 0dfbc0c9..2aadb8d9 100644 --- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/prometheus-agent.rules.yml +++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/prometheus-agent.rules.yml @@ -53,7 +53,7 @@ spec: cancel_if_cluster_status_deleting: "true" cancel_if_cluster_has_no_workers: "true" ## Same as PrometheusAgentFailing, but triggers inhibition earlier and does not page. - - alert: PrometheusAgentFailingInhibition + - alert: InhibitionPrometheusAgentFailing annotations: description: '{{`Prometheus agent remote write is failing.`}}' summary: Prometheus agent fails to send samples to remote write endpoint. @@ -125,7 +125,7 @@ spec: cancel_if_cluster_status_deleting: "true" cancel_if_outside_working_hours: "true" ## Same as PrometheusAgentShardsMissing but triggers inhibition earlier, and does not page. - - alert: PrometheusAgentShardsMissingInhibition + - alert: InhibitionPrometheusAgentShardsMissing annotations: description: '{{`Prometheus agent is missing shards.`}}' summary: Prometheus agent is missing shards. diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/statefulset.management-cluster.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/statefulset.rules.yml similarity index 86% rename from helm/prometheus-rules/templates/platform/atlas/alerting-rules/statefulset.management-cluster.rules.yml rename to helm/prometheus-rules/templates/platform/atlas/alerting-rules/statefulset.rules.yml index 473be318..1c546f35 100644 --- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/statefulset.management-cluster.rules.yml +++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/statefulset.rules.yml @@ -7,7 +7,7 @@ metadata: {{- if not .Values.mimir.enabled }} cluster_type: "management_cluster" {{- end }} - name: deployment.management-cluster.rules + name: statefulset.rules namespace: {{ .Values.namespace }} spec: groups: @@ -18,8 +18,8 @@ spec: description: '{{`Statefulset {{ $labels.namespace}}/{{ $labels.statefulset }} is not satisfied.`}}' opsrecipe: deployment-not-satisfied/ expr: |- - kube_statefulset_status_replicas{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*"} - - kube_statefulset_status_replicas_ready{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*"} + kube_statefulset_status_replicas{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*|pyroscope.*|tempo.*"} + - kube_statefulset_status_replicas_ready{cluster_type="management_cluster", statefulset=~"loki.*|mimir.*|pyroscope.*|tempo.*"} > 0 for: 30m labels: diff --git a/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml b/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml index 204fe576..7bb92ce3 100644 --- a/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml +++ b/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml @@ -40,7 +40,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 30m exp_alerts: - exp_labels: @@ -95,7 +95,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 90m exp_alerts: - exp_labels: @@ -124,7 +124,7 @@ tests: summary: "Prometheus agent fails to send samples to remote write endpoint." - alertname: PrometheusAgentFailing eval_time: 150m - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 150m # Tests for `PrometheusAgentShardsMissing` alert - interval: 1m @@ -142,7 +142,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -165,7 +165,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -207,7 +207,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -230,7 +230,7 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m # Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric - interval: 1m @@ -246,7 +246,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -269,7 +269,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -311,7 +311,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -334,5 +334,5 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m diff --git a/test/tests/providers/capi/capa/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml b/test/tests/providers/capi/capa/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml index 79e4a1fc..23422850 100644 --- a/test/tests/providers/capi/capa/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml +++ b/test/tests/providers/capi/capa/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml @@ -31,7 +31,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 30m exp_alerts: - exp_labels: @@ -71,7 +71,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 90m exp_alerts: - exp_labels: @@ -94,7 +94,7 @@ tests: summary: "Prometheus agent fails to send samples to remote write endpoint." - alertname: PrometheusAgentFailing eval_time: 150m - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 150m # Tests for `PrometheusAgentShardsMissing` alert - interval: 1m @@ -112,7 +112,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -135,7 +135,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -177,7 +177,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -200,7 +200,7 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m # Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric - interval: 1m @@ -216,7 +216,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -239,7 +239,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -281,7 +281,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -304,5 +304,5 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m diff --git a/test/tests/providers/capi/capz/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml b/test/tests/providers/capi/capz/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml index a2e3ed4b..838887c5 100644 --- a/test/tests/providers/capi/capz/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml +++ b/test/tests/providers/capi/capz/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml @@ -31,7 +31,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 30m exp_alerts: - exp_labels: @@ -71,7 +71,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 90m exp_alerts: - exp_labels: @@ -94,7 +94,7 @@ tests: summary: "Prometheus agent fails to send samples to remote write endpoint." - alertname: PrometheusAgentFailing eval_time: 150m - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 150m # Tests for `PrometheusAgentShardsMissing` alert - interval: 1m @@ -112,7 +112,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -135,7 +135,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -177,7 +177,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -200,7 +200,7 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m # Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric - interval: 1m @@ -216,7 +216,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -239,7 +239,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -281,7 +281,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -304,5 +304,5 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m diff --git a/test/tests/providers/global/platform/atlas/alerting-rules/statefulset.management-cluster.rules.test.yml b/test/tests/providers/global/platform/atlas/alerting-rules/statefulset.rules.test.yml similarity index 97% rename from test/tests/providers/global/platform/atlas/alerting-rules/statefulset.management-cluster.rules.test.yml rename to test/tests/providers/global/platform/atlas/alerting-rules/statefulset.rules.test.yml index 2b4c9297..4f9e5ba9 100644 --- a/test/tests/providers/global/platform/atlas/alerting-rules/statefulset.management-cluster.rules.test.yml +++ b/test/tests/providers/global/platform/atlas/alerting-rules/statefulset.rules.test.yml @@ -1,6 +1,6 @@ --- rule_files: - - statefulset.management-cluster.rules.yml + - statefulset.rules.yml tests: - interval: 1m diff --git a/test/tests/providers/vintage/aws/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml b/test/tests/providers/vintage/aws/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml index a2e3ed4b..838887c5 100644 --- a/test/tests/providers/vintage/aws/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml +++ b/test/tests/providers/vintage/aws/platform/atlas/alerting-rules/prometheus-agent.rules.test.yml @@ -31,7 +31,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 30m exp_alerts: - exp_labels: @@ -71,7 +71,7 @@ tests: description: "Prometheus agent remote write is failing." opsrecipe: "prometheus-agent/" summary: "Prometheus agent fails to send samples to remote write endpoint." - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 90m exp_alerts: - exp_labels: @@ -94,7 +94,7 @@ tests: summary: "Prometheus agent fails to send samples to remote write endpoint." - alertname: PrometheusAgentFailing eval_time: 150m - - alertname: PrometheusAgentFailingInhibition + - alertname: InhibitionPrometheusAgentFailing eval_time: 150m # Tests for `PrometheusAgentShardsMissing` alert - interval: 1m @@ -112,7 +112,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -135,7 +135,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -177,7 +177,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -200,7 +200,7 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m # Tests for `PrometheusAgentShardsMissing` alert with missing `prometheus_operator_spec_shards` metric - interval: 1m @@ -216,7 +216,7 @@ tests: alert_rule_test: - alertname: PrometheusAgentShardsMissing eval_time: 40m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 40m - alertname: PrometheusAgentShardsMissing eval_time: 120m @@ -239,7 +239,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 100m exp_alerts: - exp_labels: @@ -281,7 +281,7 @@ tests: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent/" summary: "Prometheus agent is missing shards." - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 125m exp_alerts: - exp_labels: @@ -304,5 +304,5 @@ tests: summary: "Prometheus agent is missing shards." - alertname: PrometheusAgentShardsMissing eval_time: 130m - - alertname: PrometheusAgentShardsMissingInhibition + - alertname: InhibitionPrometheusAgentShardsMissing eval_time: 130m