From 2218eac8c3c062d2833ac2b10199331cd2f88c3e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 6 Dec 2024 09:20:17 +0100 Subject: [PATCH 01/12] chore(deps): update dependency architect to v5.11.2 (#1451) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index d156e41c..70d6e84a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,6 @@ version: 2.1 orbs: - architect: giantswarm/architect@5.11.1 + architect: giantswarm/architect@5.11.2 workflows: package-and-push-chart-on-tag: From 454bfb90c66b130c137d92698becf8846004ed96 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 6 Dec 2024 09:22:35 +0100 Subject: [PATCH 02/12] chore(deps): update dependency go to v1.23.4 (#1450) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- test/hack/checkLabels/go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/hack/checkLabels/go.mod b/test/hack/checkLabels/go.mod index 0420bae2..5b03eb01 100644 --- a/test/hack/checkLabels/go.mod +++ b/test/hack/checkLabels/go.mod @@ -2,7 +2,7 @@ module checkLabels go 1.23 -toolchain go1.23.3 +toolchain go1.23.4 require ( // Try to keep version in sync with our prometheus rule CRD version. From 913f4c2f50928e130ed821a5e6d8522acb0689da Mon Sep 17 00:00:00 2001 From: Zirko <64951262+QuantumEnigmaa@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:10:31 +0100 Subject: [PATCH 03/12] increase time to trigger promtailrequestserrors alert (#1453) * increase time to trigger promtailrequestserrors alert * changelog --- CHANGELOG.md | 4 ++++ .../platform/atlas/alerting-rules/promtail.rules.yml | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 782ec8aa..140d421a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Increase time to trigger `PromtailRequestsErrors` alert from 15 to 25m. + ## [4.28.0] - 2024-12-02 ### Added diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml index 422a9c9b..7069c486 100644 --- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml +++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml @@ -38,7 +38,7 @@ spec: opsrecipe: promtail/ expr: | 100 * (sum(rate(promtail_request_duration_seconds_count{status_code!~"2.."}[5m])) by (cluster_id, installation, provider, pipeline, namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[5m])) by (cluster_id, installation, provider, pipeline, namespace, job, route, instance)) > 10 - for: 15m + for: 25m labels: area: platform severity: page From b6664306613ddc768dbeb9061a9459328c0bc528 Mon Sep 17 00:00:00 2001 From: Taylor Bot Date: Mon, 9 Dec 2024 22:26:23 +0900 Subject: [PATCH 04/12] Release v4.29.0 (#1454) --- CHANGELOG.md | 5 ++++- helm/prometheus-rules/Chart.yaml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 140d421a..3e54912b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [4.29.0] - 2024-12-09 + ### Changed - Increase time to trigger `PromtailRequestsErrors` alert from 15 to 25m. @@ -3303,7 +3305,8 @@ Fix `PromtailRequestsErrors` alerts as promtail retries after some backoff so ac - Add existing rules from https://github.com/giantswarm/prometheus-meta-operator/pull/637/commits/bc6a26759eb955de92b41ed5eb33fa37980660f2 -[Unreleased]: https://github.com/giantswarm/prometheus-rules/compare/v4.28.0...HEAD +[Unreleased]: https://github.com/giantswarm/prometheus-rules/compare/v4.29.0...HEAD +[4.29.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.28.0...v4.29.0 [4.28.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.27.0...v4.28.0 [4.27.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.26.2...v4.27.0 [4.26.2]: https://github.com/giantswarm/prometheus-rules/compare/v4.26.1...v4.26.2 diff --git a/helm/prometheus-rules/Chart.yaml b/helm/prometheus-rules/Chart.yaml index bce88b57..078811b7 100644 --- a/helm/prometheus-rules/Chart.yaml +++ b/helm/prometheus-rules/Chart.yaml @@ -5,7 +5,7 @@ home: https://github.com/giantswarm/prometheus-rules icon: https://s.giantswarm.io/app-icons/1/png/default-app-light.png name: prometheus-rules appVersion: '0.1.0' -version: '4.28.0' +version: '4.29.0' annotations: application.giantswarm.io/team: "atlas" config.giantswarm.io/version: 1.x.x From 1a163b2e153e388f9bc2e77070641a83eae5592d Mon Sep 17 00:00:00 2001 From: Jose Armesto Date: Mon, 9 Dec 2024 15:23:39 +0100 Subject: [PATCH 05/12] Add karpenter alerts (#1449) * Add karpenter alerts * Add cluster_id label --- CHANGELOG.md | 4 ++ .../alerting-rules/karpenter.rules.yml | 54 +++++++++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e54912b..37c8ce4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add alerts for `karpenter` issues. + ## [4.29.0] - 2024-12-09 ### Changed diff --git a/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml new file mode 100644 index 00000000..75e9a4cd --- /dev/null +++ b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/karpenter.rules.yml @@ -0,0 +1,54 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + labels: + labels: {{- include "labels.common" . | nindent 4}} + name: karpenter.rules + namespace: {{.Values.namespace}} + name: karpenter +spec: + groups: + - name: karpenter + rules: + - alert: KarpenterCanNotRegisterNewNodes + annotations: + description: | + Karpenter provisioner {{`{{ $labels.provisioner }}`}} on cluster {{`{{ $labels.cluster_id }}`}} launched new nodes, but some of nodes did not registered in the cluster + opsrecipe: karpenter/ + expr: sum by (provisioner, cluster_id, installation, pipeline, provider) (karpenter_machines_launched) - sum by (provisioner, cluster_id, installation, pipeline, provider)(karpenter_machines_registered) != 0 + for: 1h + labels: + area: kaas + cancel_if_monitoring_agent_down: "true" + cancel_if_outside_working_hours: "true" + severity: page + team: {{ include "providerTeam" . }} + topic: karpenter + - alert: KarpenterProvisionerAlmostFull + annotations: + description: | + Provisioner {{`{{ $labels.provisioner }}`}} on cluster {{`{{ $labels.cluster_id }}`}} is almost full. + opsrecipe: karpenter/ + expr: karpenter_provisioner_usage_pct > 90 + for: 72h + labels: + area: kaas + cancel_if_monitoring_agent_down: "true" + cancel_if_outside_working_hours: "true" + severity: page + team: {{ include "providerTeam" . }} + topic: karpenter + - alert: KarpenterCloudproviderErrors + annotations: + description: | + Karpenter on cluster {{`{{ $labels.cluster_id }}`}} is getting errors during API calls to the cloud provider. + opsrecipe: karpenter/ + expr: rate(karpenter_cloudprovider_errors_total{}[5m]) > 0.1 + for: 10m + labels: + area: kaas + cancel_if_monitoring_agent_down: "true" + cancel_if_outside_working_hours: "true" + severity: page + team: {{ include "providerTeam" . }} + topic: karpenter From a9c718ca1d884cb4fab591e1314a1ac23d346713 Mon Sep 17 00:00:00 2001 From: Jose Armesto Date: Tue, 10 Dec 2024 12:54:43 +0100 Subject: [PATCH 06/12] Ignore test clusters (#1455) --- .../phoenix/alerting-rules/cloud-provider-controller.rules.yml | 2 +- .../turtles/alerting-rules/vertical-pod-autoscaler.rules.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/cloud-provider-controller.rules.yml b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/cloud-provider-controller.rules.yml index 54dc0de4..f1e328ca 100644 --- a/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/cloud-provider-controller.rules.yml +++ b/helm/prometheus-rules/templates/kaas/phoenix/alerting-rules/cloud-provider-controller.rules.yml @@ -17,7 +17,7 @@ spec: description: |- {{`Flux HelmRelease {{ $labels.name }} in ns {{ $labels.exported_namespace }} on {{ $labels.installation }}/{{ $labels.cluster_id }} is stuck in Failed state.`}} opsrecipe: fluxcd-failing-helmrelease/ - expr: gotk_reconcile_condition{type="Ready", status="False", kind="HelmRelease", cluster_type="management_cluster", exported_namespace!="flux-giantswarm", name=~".*(aws-ebs-csi-driver|cloud-provider-aws|azure-cloud-controller-manager|azure-cloud-node-manager|azuredisk-csi-driver|azurefile-csi-driver|cloud-provider-vsphere|cloud-provider-cloud-director)"} > 0 + expr: gotk_reconcile_condition{type="Ready", status="False", kind="HelmRelease", cluster_type="management_cluster", exported_namespace!="flux-giantswarm", exported_namespace!~"org-t-.*", name=~".*(aws-ebs-csi-driver|cloud-provider-aws|azure-cloud-controller-manager|azure-cloud-node-manager|azuredisk-csi-driver|azurefile-csi-driver|cloud-provider-vsphere|cloud-provider-cloud-director)"} > 0 for: 20m labels: area: kaas diff --git a/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml index fda1b7b4..8526d6a3 100644 --- a/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml +++ b/helm/prometheus-rules/templates/kaas/turtles/alerting-rules/vertical-pod-autoscaler.rules.yml @@ -33,7 +33,7 @@ spec: description: |- {{`Flux HelmRelease {{ $labels.name }} in ns {{ $labels.exported_namespace }} on {{ $labels.installation }}/{{ $labels.cluster_id }} is stuck in Failed state.`}} opsrecipe: fluxcd-failing-helmrelease/ - expr: gotk_reconcile_condition{type="Ready", status="False", kind="HelmRelease", cluster_type="management_cluster", exported_namespace!="flux-giantswarm", name=~".*(vertical-pod-autoscaler-crd)"} > 0 + expr: gotk_reconcile_condition{type="Ready", status="False", kind="HelmRelease", cluster_type="management_cluster", exported_namespace!="flux-giantswarm", exported_namespace!~"org-t-.*", name=~".*(vertical-pod-autoscaler-crd)"} > 0 for: 20m labels: area: kaas From bb96c0a9257be5d1a7c58c683502c60e65071fff Mon Sep 17 00:00:00 2001 From: Taylor Bot Date: Tue, 10 Dec 2024 22:24:25 +0900 Subject: [PATCH 07/12] Release v4.30.0 (#1456) --- CHANGELOG.md | 5 ++++- helm/prometheus-rules/Chart.yaml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 37c8ce4a..f28c572c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [4.30.0] - 2024-12-10 + ### Added - Add alerts for `karpenter` issues. @@ -3309,7 +3311,8 @@ Fix `PromtailRequestsErrors` alerts as promtail retries after some backoff so ac - Add existing rules from https://github.com/giantswarm/prometheus-meta-operator/pull/637/commits/bc6a26759eb955de92b41ed5eb33fa37980660f2 -[Unreleased]: https://github.com/giantswarm/prometheus-rules/compare/v4.29.0...HEAD +[Unreleased]: https://github.com/giantswarm/prometheus-rules/compare/v4.30.0...HEAD +[4.30.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.29.0...v4.30.0 [4.29.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.28.0...v4.29.0 [4.28.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.27.0...v4.28.0 [4.27.0]: https://github.com/giantswarm/prometheus-rules/compare/v4.26.2...v4.27.0 diff --git a/helm/prometheus-rules/Chart.yaml b/helm/prometheus-rules/Chart.yaml index 078811b7..83deb913 100644 --- a/helm/prometheus-rules/Chart.yaml +++ b/helm/prometheus-rules/Chart.yaml @@ -5,7 +5,7 @@ home: https://github.com/giantswarm/prometheus-rules icon: https://s.giantswarm.io/app-icons/1/png/default-app-light.png name: prometheus-rules appVersion: '0.1.0' -version: '4.29.0' +version: '4.30.0' annotations: application.giantswarm.io/team: "atlas" config.giantswarm.io/version: 1.x.x From cd22123adfdadba6e1f25899d5a1de6c8ec9dfdb Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:01:12 +0100 Subject: [PATCH 08/12] fix(deps): update module github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring to v0.79.0 (#1457) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- test/hack/checkLabels/go.mod | 22 +++++++++++----------- test/hack/checkLabels/go.sum | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/test/hack/checkLabels/go.mod b/test/hack/checkLabels/go.mod index 5b03eb01..6c2d254d 100644 --- a/test/hack/checkLabels/go.mod +++ b/test/hack/checkLabels/go.mod @@ -1,13 +1,13 @@ module checkLabels -go 1.23 +go 1.23.0 toolchain go1.23.4 require ( // Try to keep version in sync with our prometheus rule CRD version. // see https://github.com/giantswarm/prometheus-operator-crd/blob/master/helm/prometheus-operator-crd/Chart.yaml#L11 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.2 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0 sigs.k8s.io/yaml v1.4.0 ) @@ -42,18 +42,18 @@ require ( github.com/prometheus/procfs v0.12.0 // indirect github.com/rogpeppe/go-internal v1.12.0 // indirect github.com/x448/float16 v0.8.4 // indirect - golang.org/x/net v0.29.0 // indirect + golang.org/x/net v0.32.0 // indirect golang.org/x/oauth2 v0.16.0 // indirect - golang.org/x/sys v0.25.0 // indirect - golang.org/x/text v0.18.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/text v0.21.0 // indirect google.golang.org/appengine v1.6.7 // indirect - google.golang.org/protobuf v1.34.2 // indirect + google.golang.org/protobuf v1.35.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/api v0.31.2 // indirect - k8s.io/apimachinery v0.31.2 // indirect + k8s.io/api v0.32.0 // indirect + k8s.io/apimachinery v0.32.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 // indirect - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + k8s.io/utils v0.0.0-20241210054802-24370beab758 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.5.0 // indirect ) diff --git a/test/hack/checkLabels/go.sum b/test/hack/checkLabels/go.sum index 1ec7c0f9..dd419aad 100644 --- a/test/hack/checkLabels/go.sum +++ b/test/hack/checkLabels/go.sum @@ -561,6 +561,8 @@ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.1 h github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.1/go.mod h1:SvsRXw4m1F2vk7HquU5h475bFpke27mIUswfyw9u3ug= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.2 h1:SyoVBXD/r0PntR1rprb90ClI32FSUNOCWqqTatnipHM= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.2/go.mod h1:SvsRXw4m1F2vk7HquU5h475bFpke27mIUswfyw9u3ug= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0 h1:IiCqr23V8SexkXkPmK+6tS/Ped/oCVhXSSmLacEATy4= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= github.com/prometheus/alertmanager v0.22.2 h1:JrDZalSEMb2/2bqGAhls6ZnvOxbC5jMIu29JV+uWTC0= github.com/prometheus/alertmanager v0.22.2/go.mod h1:rYinOWxFuCnNssc3iOjn2oMTlhLaPcUuqV5yk5JKUAE= github.com/prometheus/alertmanager v0.25.0 h1:vbXKUR6PYRiZPRIKfmXaG+dmCKG52RtPL4Btl8hQGvg= @@ -842,6 +844,8 @@ golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE= golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg= golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -955,6 +959,8 @@ golang.org/x/sys v0.23.0 h1:YfKFowiIMvtgl1UERQoTPPToxltDeZfbj4H7dVUCwmM= golang.org/x/sys v0.23.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -991,6 +997,8 @@ golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc= golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1155,6 +1163,8 @@ google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGm google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1220,6 +1230,8 @@ k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= k8s.io/api v0.31.2 h1:3wLBbL5Uom/8Zy98GRPXpJ254nEFpl+hwndmk9RwmL0= k8s.io/api v0.31.2/go.mod h1:bWmGvrGPssSK1ljmLzd3pwCQ9MgoTsRCuK35u6SygUk= +k8s.io/api v0.32.0 h1:OL9JpbvAU5ny9ga2fb24X8H6xQlVp+aJMFlgtQjR9CE= +k8s.io/api v0.32.0/go.mod h1:4LEwHZEf6Q/cG96F3dqR965sYOfmPM7rq81BLgsE0p0= k8s.io/apimachinery v0.25.4 h1:CtXsuaitMESSu339tfhVXhQrPET+EiWnIY1rcurKnAc= k8s.io/apimachinery v0.25.4/go.mod h1:jaF9C/iPNM1FuLl7Zuy5b9v+n35HGSh6AQ4HYRkCqwo= k8s.io/apimachinery v0.26.1 h1:8EZ/eGJL+hY/MYCNwhmDzVqq2lPl3N3Bo8rvweJwXUQ= @@ -1248,6 +1260,8 @@ k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= k8s.io/apimachinery v0.31.2 h1:i4vUt2hPK56W6mlT7Ry+AO8eEsyxMD1U44NR22CLTYw= k8s.io/apimachinery v0.31.2/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/apimachinery v0.32.0 h1:cFSE7N3rmEEtv4ei5X6DaJPHHX0C+upp+v5lVPiEwpg= +k8s.io/apimachinery v0.32.0/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE= k8s.io/klog/v2 v2.80.1 h1:atnLQ121W371wYYFawwYx1aEY2eUfs4l3J72wtgAwV4= k8s.io/klog/v2 v2.80.1/go.mod h1:y1WjHnz7Dj687irZUWR/WLkLc5N1YHtjLdmgWjndZn0= k8s.io/klog/v2 v2.90.0 h1:VkTxIV/FjRXn1fgNNcKGM8cfmL1Z33ZjXRTVxKCoF5M= @@ -1282,6 +1296,8 @@ k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1 k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3 h1:b2FmK8YH+QEwq/Sy2uAEhmqL5nPfGYbJOcaqjeYYZoA= k8s.io/utils v0.0.0-20240902221715-702e33fdd3c3/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0= +k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= @@ -1289,12 +1305,16 @@ sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 h1:iXTIw73aPyC+oRdyqqvVJuloN sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/structured-merge-diff/v4 v4.2.3 h1:PRbqxJClWWYMNV1dhaG4NsibJbArud9kFxnAMREiWFE= sigs.k8s.io/structured-merge-diff/v4 v4.2.3/go.mod h1:qjx8mGObPmV2aSZepjQjbmb2ihdVs8cGKBraizNC69E= sigs.k8s.io/structured-merge-diff/v4 v4.3.0 h1:UZbZAZfX0wV2zr7YZorDz6GXROfDFj6LvqCRm4VUVKk= sigs.k8s.io/structured-merge-diff/v4 v4.3.0/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= +sigs.k8s.io/structured-merge-diff/v4 v4.5.0 h1:nbCitCK2hfnhyiKo6uf2HxUPTCodY6Qaf85SbDIaMBk= +sigs.k8s.io/structured-merge-diff/v4 v4.5.0/go.mod h1:N8f93tFZh9U6vpxwRArLiikrE5/2tiu1w1AGfACIGE4= sigs.k8s.io/yaml v1.1.0/go.mod h1:UJmg0vDUVViEyp3mgSv9WPwZCDxu4rQW1olrI1uml+o= sigs.k8s.io/yaml v1.3.0 h1:a2VclLzOGrwOHDiV8EfBGhvjHvP46CtW5j6POvhYGGo= sigs.k8s.io/yaml v1.3.0/go.mod h1:GeOyir5tyXNByN85N/dRIT9es5UQNerPYEKK56eTBm8= From 9d4d10b13228d896bf51b2d5c5a3c3ade0e2e2e4 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 17 Dec 2024 13:35:31 +0100 Subject: [PATCH 09/12] fix(deps): update module github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring to v0.79.1 (#1458) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- test/hack/checkLabels/go.mod | 2 +- test/hack/checkLabels/go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/hack/checkLabels/go.mod b/test/hack/checkLabels/go.mod index 6c2d254d..ca86a9b7 100644 --- a/test/hack/checkLabels/go.mod +++ b/test/hack/checkLabels/go.mod @@ -7,7 +7,7 @@ toolchain go1.23.4 require ( // Try to keep version in sync with our prometheus rule CRD version. // see https://github.com/giantswarm/prometheus-operator-crd/blob/master/helm/prometheus-operator-crd/Chart.yaml#L11 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.1 sigs.k8s.io/yaml v1.4.0 ) diff --git a/test/hack/checkLabels/go.sum b/test/hack/checkLabels/go.sum index dd419aad..2e86b6d7 100644 --- a/test/hack/checkLabels/go.sum +++ b/test/hack/checkLabels/go.sum @@ -563,6 +563,8 @@ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.2 h github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.78.2/go.mod h1:SvsRXw4m1F2vk7HquU5h475bFpke27mIUswfyw9u3ug= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0 h1:IiCqr23V8SexkXkPmK+6tS/Ped/oCVhXSSmLacEATy4= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.1 h1:Dwk9xYZOd8gq+nhlZREvHbQ6enj3yjC5HPFOdcReqGw= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.1/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= github.com/prometheus/alertmanager v0.22.2 h1:JrDZalSEMb2/2bqGAhls6ZnvOxbC5jMIu29JV+uWTC0= github.com/prometheus/alertmanager v0.22.2/go.mod h1:rYinOWxFuCnNssc3iOjn2oMTlhLaPcUuqV5yk5JKUAE= github.com/prometheus/alertmanager v0.25.0 h1:vbXKUR6PYRiZPRIKfmXaG+dmCKG52RtPL4Btl8hQGvg= From 1c0ac16a2dbdbf89ebdf30c70ec38ce14e712459 Mon Sep 17 00:00:00 2001 From: Antonia <56017655+anvddriesch@users.noreply.github.com> Date: Wed, 18 Dec 2024 17:13:47 +0900 Subject: [PATCH 10/12] push to proxmox app collection (#1460) --- .circleci/config.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index 70d6e84a..3f197f5c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -90,3 +90,17 @@ workflows: ignore: /.*/ tags: only: /^v.*/ + + - architect/push-to-app-collection: + context: architect + name: proxmox-app-collection + app_name: prometheus-rules + app_namespace: monitoring + app_collection_repo: proxmox-app-collection + requires: + - app-catalog + filters: + branches: + ignore: /.*/ + tags: + only: /^v.*/ From 4476a3516bbf2574a16ca100b82c76ff5455e396 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Brigitte?= Date: Thu, 2 Jan 2025 11:56:10 +0100 Subject: [PATCH 11/12] Fix duplicate series in PromtailDown alert (#1462) * Fix duplicate series in PromtailDown alert * add namespace=kube-system to unit tests --- CHANGELOG.md | 4 ++++ .../atlas/alerting-rules/promtail.rules.yml | 4 ++-- .../atlas/alerting-rules/promtail.rules.test.yml | 16 ++++++++++------ 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f28c572c..dddd4339 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Fix duplicate series in `PromtailDown` alert. + ## [4.30.0] - 2024-12-10 ### Added diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml index 7069c486..d7ad11bd 100644 --- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml +++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/promtail.rules.yml @@ -16,10 +16,10 @@ spec: description: '{{`Scraping of all promtail pods to check if one failed every 30 minutes.`}}' opsrecipe: promtail/ expr: |- - kube_pod_info{pod=~"promtail.*"} + kube_pod_info{pod=~"promtail.*", namespace="kube-system"} * on(cluster_id, pod) group_left () - up{container="promtail"} == 0 + up{container="promtail", namespace="kube-system"} == 0 for: 30m labels: area: platform diff --git a/test/tests/providers/global/platform/atlas/alerting-rules/promtail.rules.test.yml b/test/tests/providers/global/platform/atlas/alerting-rules/promtail.rules.test.yml index 75dbf3c7..90e92e97 100644 --- a/test/tests/providers/global/platform/atlas/alerting-rules/promtail.rules.test.yml +++ b/test/tests/providers/global/platform/atlas/alerting-rules/promtail.rules.test.yml @@ -6,18 +6,18 @@ tests: - interval: 1m input_series: # For the first 60min: test with 1 pod: none, up, down - - series: 'up{container="promtail", cluster_id="gauss", cluster_type="management_cluster", installation="gauss", pod="promtail-1xxxx", provider="aws", pipeline="testing"}' + - series: 'up{container="promtail", cluster_id="gauss", cluster_type="management_cluster", installation="gauss", namespace="kube-system", pod="promtail-1xxxx", provider="aws", pipeline="testing"}' values: "_x20 1+0x20 0+0x40" - - series: kube_pod_info{cluster_id="gauss", cluster_type="management_cluster", installation="gauss", pod="promtail-1xxxx", node="ip-10-0-5-1.eu-west-1.compute.internal", provider="aws", pipeline="testing"} + - series: kube_pod_info{cluster_id="gauss", cluster_type="management_cluster", installation="gauss", namespace="kube-system", pod="promtail-1xxxx", node="ip-10-0-5-1.eu-west-1.compute.internal", provider="aws", pipeline="testing"} values: "1x180" # From 60min: test with 2 pods: 1 up and 1 down, 2 up, 2 down. - - series: 'up{container="promtail", cluster_id="gauss", cluster_type="management_cluster", installation="gauss", pod="promtail-2xxxx", provider="aws", pipeline="testing"}' + - series: 'up{container="promtail", cluster_id="gauss", cluster_type="management_cluster", installation="gauss", namespace="kube-system", pod="promtail-2xxxx", provider="aws", pipeline="testing"}' values: "_x80 1+0x40 1+0x20 0+0x40" - - series: kube_pod_info{cluster_id="gauss", cluster_type="management_cluster", installation="gauss", pod="promtail-2xxxx", node="ip-10-0-5-2.eu-west-1.compute.internal", provider="aws", pipeline="testing"} + - series: kube_pod_info{cluster_id="gauss", cluster_type="management_cluster", installation="gauss", namespace="kube-system", pod="promtail-2xxxx", node="ip-10-0-5-2.eu-west-1.compute.internal", provider="aws", pipeline="testing"} values: "1x180" - - series: 'up{container="promtail", cluster_type="management_cluster", cluster_id="gauss", installation="gauss", pod="promtail-3xxxx", provider="aws", pipeline="testing"}' + - series: 'up{container="promtail", cluster_type="management_cluster", cluster_id="gauss", installation="gauss", namespace="kube-system", pod="promtail-3xxxx", provider="aws", pipeline="testing"}' values: "_x80 0+0x40 1+0x20 0+0x40" - - series: kube_pod_info{cluster_id="gauss", cluster_type="management_cluster", installation="gauss", pod="promtail-3xxxx", node="ip-10-0-5-3.eu-west-1.compute.internal", provider="aws", pipeline="testing"} + - series: kube_pod_info{cluster_id="gauss", cluster_type="management_cluster", installation="gauss", namespace="kube-system", pod="promtail-3xxxx", node="ip-10-0-5-3.eu-west-1.compute.internal", provider="aws", pipeline="testing"} values: "1x180" alert_rule_test: - alertname: PromtailDown @@ -38,6 +38,7 @@ tests: cluster_id: gauss cluster_type: management_cluster installation: gauss + namespace: kube-system node: ip-10-0-5-1.eu-west-1.compute.internal pipeline: testing pod: promtail-1xxxx @@ -63,6 +64,7 @@ tests: cluster_id: gauss cluster_type: management_cluster installation: gauss + namespace: kube-system node: ip-10-0-5-3.eu-west-1.compute.internal pipeline: testing pod: promtail-3xxxx @@ -89,6 +91,7 @@ tests: cluster_id: gauss cluster_type: management_cluster installation: gauss + namespace: kube-system node: ip-10-0-5-2.eu-west-1.compute.internal pipeline: testing pod: promtail-2xxxx @@ -110,6 +113,7 @@ tests: cluster_id: gauss cluster_type: management_cluster installation: gauss + namespace: kube-system node: ip-10-0-5-3.eu-west-1.compute.internal pipeline: testing pod: promtail-3xxxx From 43a9cfe6889d22d3faa0e74ca075fdbf15b6557e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Thu, 2 Jan 2025 12:00:27 +0100 Subject: [PATCH 12/12] fix(deps): update module github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring to v0.79.2 (#1461) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- test/hack/checkLabels/go.mod | 2 +- test/hack/checkLabels/go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/test/hack/checkLabels/go.mod b/test/hack/checkLabels/go.mod index ca86a9b7..9e085575 100644 --- a/test/hack/checkLabels/go.mod +++ b/test/hack/checkLabels/go.mod @@ -7,7 +7,7 @@ toolchain go1.23.4 require ( // Try to keep version in sync with our prometheus rule CRD version. // see https://github.com/giantswarm/prometheus-operator-crd/blob/master/helm/prometheus-operator-crd/Chart.yaml#L11 - github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.1 + github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.2 sigs.k8s.io/yaml v1.4.0 ) diff --git a/test/hack/checkLabels/go.sum b/test/hack/checkLabels/go.sum index 2e86b6d7..11e4c92f 100644 --- a/test/hack/checkLabels/go.sum +++ b/test/hack/checkLabels/go.sum @@ -565,6 +565,8 @@ github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0 h github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.0/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.1 h1:Dwk9xYZOd8gq+nhlZREvHbQ6enj3yjC5HPFOdcReqGw= github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.1/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.2 h1:DGv150w4UyxnjNHlkCw85R3+lspOxegtdnbpP2vKRrk= +github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.79.2/go.mod h1:AVMP4QEW8xuGWnxaWSpI3kKjP9fDA31nO68zsyREJZA= github.com/prometheus/alertmanager v0.22.2 h1:JrDZalSEMb2/2bqGAhls6ZnvOxbC5jMIu29JV+uWTC0= github.com/prometheus/alertmanager v0.22.2/go.mod h1:rYinOWxFuCnNssc3iOjn2oMTlhLaPcUuqV5yk5JKUAE= github.com/prometheus/alertmanager v0.25.0 h1:vbXKUR6PYRiZPRIKfmXaG+dmCKG52RtPL4Btl8hQGvg=