From e6e8d5f0f00624919e57c1543f7eb967a4035407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herv=C3=A9=20Nicol?= Date: Thu, 26 Sep 2024 16:32:01 +0200 Subject: [PATCH] fix lokicompactorfailedcompaction rule (#1381) Co-authored-by: Herve Nicol <12008875+hervenicol@users.noreply.github.com> --- CHANGELOG.md | 4 ++++ .../templates/platform/atlas/alerting-rules/loki.rules.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d2736fb..e0151490 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- fix `LokiFailedCompaction` to take latest successfull compaction across multiple compactor/backend pods + ## [4.16.0] - 2024-09-26 ### Added diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/loki.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/loki.rules.yml index 4b23f57b..31840cae 100644 --- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/loki.rules.yml +++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/loki.rules.yml @@ -143,7 +143,7 @@ spec: description: 'Loki compactor has been failing compactions for more than 2 hours since last compaction.' opsrecipe: loki#lokicompactorfailedcompaction # This alert checks if Loki's the last successful compaction run is older than 2 hours - expr: (time() - (loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds > 0) > 60 * 60 * 2) + expr: (min by (cluster_id, installation, provider, pipeline) (time() - (loki_boltdb_shipper_compact_tables_operation_last_successful_run_timestamp_seconds > 0)) > 60 * 60 * 2) for: 1h labels: area: platform