Skip to content

Commit

Permalink
[bosh_alerts][bosh_tsdb_system.alerts] Adjust alerts to provide possi…
Browse files Browse the repository at this point in the history
…bility to disable values from alert message
  • Loading branch information
benjaminguttmann-avtq committed Jul 8, 2019
1 parent abe7249 commit 429af3f
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions jobs/bosh_alerts/templates/bosh_tsdb_system.alerts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting a high CPU load average"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` had a CPU load average (1m) above <%= p('bosh_alerts.job_high_cpu_load.threshold') %> for <%= p('bosh_alerts.job_high_cpu_load.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \": {{$value}}\" %> <% end %>"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` had a CPU load average (1m) above <%= p('bosh_alerts.job_high_cpu_load.threshold') %> for <%= p('bosh_alerts.job_high_cpu_load.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}' %> <% end %> <% end %>"

- alert: BOSHTSDBJobLowFreeRAM
expr: avg(bosh_tsdb_job_mem_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_low_free_ram.threshold') %>
Expand All @@ -19,7 +19,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting low free RAM"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_free_ram.threshold') %>% of its RAM for <%= p('bosh_alerts.job_low_free_ram.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \"<% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \": {{$value}}%\" %> <% end %>"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_free_ram.threshold') %>% of its RAM for <%= p('bosh_alerts.job_low_free_ram.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobLowSwap
expr: avg(bosh_tsdb_job_swap_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_low_swap.threshold') %>
Expand All @@ -29,7 +29,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting low swap"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_swap.threshold') %>% of its swap for <%= p('bosh_alerts.job_low_swap.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \"<% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \": {{$value}}%\" %> <% end %>"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_swap.threshold') %>% of its swap for <%= p('bosh_alerts.job_low_swap.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobSystemDiskFull
expr: avg(bosh_tsdb_job_system_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_system_disk_full.threshold') %>
Expand All @@ -39,7 +39,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of system disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_system_disk_full.threshold') %>% of its system disk for <%= p('bosh_alerts.job_system_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \"<% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \": {{$value}}%\" %> <% end %>"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_system_disk_full.threshold') %>% of its system disk for <%= p('bosh_alerts.job_system_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobEphemeralDiskFull
expr: avg(bosh_tsdb_job_ephemeral_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>
Expand All @@ -49,7 +49,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of ephemeral disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>% of its ephemeral disk for <%= p('bosh_alerts.job_ephemeral_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \"<% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \": {{$value}}%\" %> <% end %>"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>% of its ephemeral disk for <%= p('bosh_alerts.job_ephemeral_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobPersistentDiskFull
expr: avg(bosh_tsdb_job_persistent_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>
Expand All @@ -59,4 +59,4 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of persistent disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>% of its persistent disk for <%= p('bosh_alerts.job_persistent_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \"<% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <%= disabled ? \": {{$value}}%\" %> <% end %>"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>% of its persistent disk for <%= p('bosh_alerts.job_persistent_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

0 comments on commit 429af3f

Please sign in to comment.