diff --git a/resources/prometheus/prometheus-rules.yaml b/resources/prometheus/prometheus-rules.yaml index 22bed52..98eef14 100644 --- a/resources/prometheus/prometheus-rules.yaml +++ b/resources/prometheus/prometheus-rules.yaml @@ -241,7 +241,7 @@ spec: record: rhacs_tenants:namespace:pod:container:max_memory_usage_ratio - alert: RHACSTenantWorkloadMemoryUtilizationHigh expr: | - rhacs_tenants:namespace:pod:container:max_memory_usage_ratio{container="central"} >= 0.85 + rhacs_tenants:namespace:pod:container:max_memory_usage_ratio{container="central"} >= 0.9 for: 30m labels: severity: warning diff --git a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml index 58c6489..7ccf1f8 100644 --- a/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml +++ b/resources/prometheus/unit_tests/RHACSTenantWorkloadMemoryUtilizationHigh.yaml @@ -7,8 +7,8 @@ tests: - interval: 1m input_series: - series: container_memory_working_set_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa", pod="mypod", container="central"} - # first 10 minutes no alert and then 85% CPU usage for 40 minutes - values: "50+0x10 85+0x40" + # first 10 minutes no alert and then 90% CPU usage for 40 minutes + values: "50+0x10 90+0x40" - series: container_spec_memory_limit_bytes{namespace="rhacs-aaaaaaaaaaaaaaaaaaaa",pod="mypod", container="central"} values: "100+0x40" alert_rule_test: @@ -26,7 +26,7 @@ tests: container: central exp_annotations: summary: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' is reaching its memory limit. - description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' reached 85% of its memory limit and is at risk of being OOM killed. + description: tenant 'rhacs-aaaaaaaaaaaaaaaaaaaa' container 'central' in pod 'mypod' reached 90% of its memory limit and is at risk of being OOM killed. sop_url: "https://gitlab.cee.redhat.com/stackrox/acs-cloud-service/runbooks/-/blob/master/sops/dp-039-tenant-workload-memory-utilization-high.md" - interval: 1m input_series: