diff --git a/CHANGELOG.md b/CHANGELOG.md
index 187a4a3c5..1e76dde01 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ### Added
 
 - Add Heartbeat alert for mimir.
+- Add missing alert about loki containers not running to ensure we do not suffer from [extra cloud cost](https://github.com/giantswarm/giantswarm/issues/30124).
+- Add missing alert about mimir containers not running to ensure we do not suffer from [extra cloud cost](https://github.com/giantswarm/giantswarm/issues/30124).
 
 ## [3.5.0] - 2024-03-27
 
diff --git a/README.md b/README.md
index a016b43e1..d485a7718 100644
--- a/README.md
+++ b/README.md
@@ -60,6 +60,10 @@ Any Alert includes:
    - `cancel_if_.*`
 
 
+### Specific alert labels
+
+- `all_pipelines: true`: When adding this label to an alert, you are sure the alert will be send to opsgenie, even if the installation is not a stable installation.
+
 #### Routing
 
 Alertmanager does the routing based on the labels menitoned above.
diff --git a/helm/prometheus-rules/templates/alerting-rules/loki.all.rules.yml b/helm/prometheus-rules/templates/alerting-rules/loki.rules.yml
similarity index 74%
rename from helm/prometheus-rules/templates/alerting-rules/loki.all.rules.yml
rename to helm/prometheus-rules/templates/alerting-rules/loki.rules.yml
index 9eb724803..9f87870bf 100644
--- a/helm/prometheus-rules/templates/alerting-rules/loki.all.rules.yml
+++ b/helm/prometheus-rules/templates/alerting-rules/loki.rules.yml
@@ -3,12 +3,32 @@ kind: PrometheusRule
 metadata:
   labels:
     {{- include "labels.common" . | nindent 4 }}
-  name: grafana.all.rules
+  name: loki.rules
   namespace: {{ .Values.namespace }}
 spec:
   groups:
   - name: loki
     rules:
+    # Coming from https://github.com/giantswarm/giantswarm/issues/30124
+    # This alert ensures Loki containers are not restarting too often (flappiness).
+    # If it is not the the case, this can incur high costs by cloud providers (s3 api calls are quite expensive).
+    - alert: LokiRestartingTooOften
+      annotations:
+        description: '{{`Loki containers are restarting too often.`}}'
+        opsrecipe: loki/
+      expr: |
+        increase(
+          kube_pod_container_status_restarts_total{cluster_type="management_cluster", namespace="loki"}[1h]
+        ) > 5
+      for: 5m
+      labels:
+        area: managedservices
+        # This label is used to ensure the alert go through even for non-stable installations
+        all_pipelines: "true"
+        cancel_if_outside_working_hours: "true"
+        severity: page
+        team: atlas
+        topic: observability
     # Rules inspired from loki-mixins - https://github.com/grafana/loki/blob/main/production/loki-mixin-compiled/alerts.yaml
     - alert: LokiRequestErrors
       annotations:
diff --git a/helm/prometheus-rules/templates/alerting-rules/mimir.rules.yml b/helm/prometheus-rules/templates/alerting-rules/mimir.rules.yml
index f19b9ca05..281e7479f 100644
--- a/helm/prometheus-rules/templates/alerting-rules/mimir.rules.yml
+++ b/helm/prometheus-rules/templates/alerting-rules/mimir.rules.yml
@@ -24,6 +24,26 @@ spec:
         type: "heartbeat"
         # TODO(@team-atlas): remove once we use mimir alertmanager
         namespace: "monitoring" # Needed due to https://github.com/prometheus-operator/prometheus-operator/issues/3737
+    # Coming from https://github.com/giantswarm/giantswarm/issues/30124
+    # This alert ensures Mimir containers are not restarting too often (flappiness).
+    # If it is not the the case, this can incur high costs by cloud providers (s3 api calls are quite expensive).
+    # This alert will not page for the prometheus-buddy.
+    - alert: MimirRestartingTooOften
+      annotations:
+        description: '{{`Mimir containers are restarting too often.`}}'
+      expr: |
+        increase(
+          kube_pod_container_status_restarts_total{cluster_type="management_cluster", namespace="mimir", container!="prometheus"}[1h]
+        ) > 5
+      for: 5m
+      labels:
+        area: managedservices
+        # This label is used to ensure the alert go through even for non-stable installations
+        all_pipelines: "true"
+        cancel_if_outside_working_hours: "true"
+        severity: page
+        team: atlas
+        topic: observability
     - alert: MimirComponentDown
       annotations:
         description: '{{`Mimir component : {{ $labels.service }} is down.`}}'
@@ -31,11 +51,9 @@ spec:
       for: 5m
       labels:
         area: managedservices
-        cancel_if_apiserver_down: "true"
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
-        cancel_if_scrape_timeout: "true"
         cancel_if_outside_working_hours: "true"
         severity: page
         team: atlas
@@ -47,7 +65,6 @@ spec:
       for: 1h
       labels:
         area: managedservices
-        cancel_if_apiserver_down: "true"
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
@@ -62,7 +79,6 @@ spec:
       for: 1h
       labels:
         area: managedservices
-        cancel_if_apiserver_down: "true"
         cancel_if_cluster_status_creating: "true"
         cancel_if_cluster_status_deleting: "true"
         cancel_if_cluster_status_updating: "true"
diff --git a/test/tests/providers/global/loki.all.rules.test.yml b/test/tests/providers/global/loki.rules.test.yml
similarity index 83%
rename from test/tests/providers/global/loki.all.rules.test.yml
rename to test/tests/providers/global/loki.rules.test.yml
index 03bb95fe6..d3cfab8b9 100644
--- a/test/tests/providers/global/loki.all.rules.test.yml
+++ b/test/tests/providers/global/loki.rules.test.yml
@@ -1,6 +1,6 @@
 ---
 rule_files:
-  - loki.all.rules.yml
+  - loki.rules.yml
 
 tests:
   - interval: 1m
@@ -98,3 +98,29 @@ tests:
             exp_annotations:
               description: "Loki pod loki-compactor-676b8c897b-rq298 (namespace loki) sees 1 unhealthy ring members"
               opsrecipe: "loki/"
+  - interval: 1m
+    input_series:
+      - series: 'kube_pod_container_status_restarts_total{cluster_type="management_cluster", namespace="loki"}'
+        values: "0+0x20 0+5x20 100+0x140" # 0 restarts after 20 minutes then we restart 5 times per minute for 20 minutes then we stop restarting for 140 minutes
+    alert_rule_test:
+      - alertname: LokiRestartingTooOften
+        eval_time: 15m  # should be OK after 15 minutes
+        exp_alerts:
+      - alertname: LokiRestartingTooOften
+        eval_time: 85m  # After 85 minutes, should fire an alert for the t+85 error
+        exp_alerts:
+          - exp_labels:
+              all_pipelines: true
+              area: managedservices
+              cancel_if_outside_working_hours: "true"
+              cluster_type: management_cluster
+              namespace: loki
+              severity: page
+              team: atlas
+              topic: observability
+            exp_annotations:
+              description: Loki containers are restarting too often.
+              opsrecipe: loki/
+      - alertname: LokiRestartingTooOften
+        eval_time: 140m  # After 140m minutes, all should be back to normal
+        exp_alerts:
diff --git a/test/tests/providers/global/mimir.rules.test.yml b/test/tests/providers/global/mimir.rules.test.yml
index ce66772fc..d67e708c4 100644
--- a/test/tests/providers/global/mimir.rules.test.yml
+++ b/test/tests/providers/global/mimir.rules.test.yml
@@ -71,11 +71,9 @@ tests:
               severity: page
               team: atlas
               topic: observability
-              cancel_if_apiserver_down: "true"
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
               cancel_if_cluster_status_updating: "true"
-              cancel_if_scrape_timeout: "true"
               cancel_if_outside_working_hours: "true"
               cluster_id: gauss
             exp_annotations:
@@ -95,7 +93,6 @@ tests:
         exp_alerts:
           - exp_labels:
               area: managedservices
-              cancel_if_apiserver_down: "true"
               cancel_if_outside_working_hours: "true"
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
@@ -119,7 +116,6 @@ tests:
         exp_alerts:
           - exp_labels:
               area: managedservices
-              cancel_if_apiserver_down: "true"
               cancel_if_outside_working_hours: "true"
               cancel_if_cluster_status_creating: "true"
               cancel_if_cluster_status_deleting: "true"
@@ -135,3 +131,31 @@ tests:
               description: "Mimir ruler is failing to process PrometheusRules."
       - alertname: MimirRulerEventsFailed
         eval_time: 160m
+  - interval: 1m
+    input_series:
+      - series: 'kube_pod_container_status_restarts_total{cluster_type="management_cluster", namespace="mimir", container="mimir-ingester"}'
+        values: "0+0x20 0+5x20 100+0x140" # 0 restarts after 20 minutes then we restart 5 times per minute for 20 minutes then we stop restarting for 140 minutes
+      - series: 'kube_pod_container_status_restarts_total{cluster_type="management_cluster", namespace="mimir", container="prometheus"}'
+        values: "0+5x180"                 # prometheus container restarts 5 times per minute for 180 minutes
+    alert_rule_test:
+      - alertname: MimirRestartingTooOften
+        eval_time: 15m  # should be OK after 15 minutes
+        exp_alerts:
+      - alertname: MimirRestartingTooOften
+        eval_time: 85m  # After 85 minutes, should fire an alert for the t+85 error
+        exp_alerts:
+          - exp_labels:
+              all_pipelines: true
+              area: managedservices
+              cancel_if_outside_working_hours: "true"
+              cluster_type: management_cluster
+              container: mimir-ingester
+              namespace: mimir
+              severity: page
+              team: atlas
+              topic: observability
+            exp_annotations:
+              description: Mimir containers are restarting too often.
+      - alertname: MimirRestartingTooOften
+        eval_time: 140m  # After 140m minutes, all should be back to normal
+        exp_alerts: