Skip to content

Commit

Permalink
Merge pull request #322 from benjaminguttmann-avtq/disable_alert_values
Browse files Browse the repository at this point in the history
Add property to remove {{value}} from alert descriptions
  • Loading branch information
frodenas authored Aug 29, 2019
2 parents 27e24c5 + 429af3f commit 9d7534f
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 13 deletions.
3 changes: 3 additions & 0 deletions jobs/bosh_alerts/spec
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,6 @@ properties:
bosh_alerts.tsdb_message_too_old.evaluation_time:
description: "TSDB Message too old alert evaluation time"
default: 5m
bosh_alerts.disable_values_in_alert_msg:
description: "Remove values from alert messages"
default: false
14 changes: 7 additions & 7 deletions jobs/bosh_alerts/templates/bosh_system.alerts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting a high CPU load average"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` had a CPU load average (1m) above <%= p('bosh_alerts.job_high_cpu_load.threshold') %> for <%= p('bosh_alerts.job_high_cpu_load.evaluation_time') %>: {{$value}}"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` had a CPU load average (1m) above <%= p('bosh_alerts.job_high_cpu_load.threshold') %> for <%= p('bosh_alerts.job_high_cpu_load.evaluation_time') %> <% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}' %> <% end %> <% end %>"

- alert: BOSHJobLowFreeRAM
expr: avg(bosh_job_mem_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_name, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_low_free_ram.threshold') %>
Expand All @@ -19,7 +19,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting low free RAM"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_free_ram.threshold') %>% of its RAM for <%= p('bosh_alerts.job_low_free_ram.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_free_ram.threshold') %>% of its RAM for <%= p('bosh_alerts.job_low_free_ram.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHJobLowSwap
expr: avg(bosh_job_swap_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_name, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_low_swap.threshold') %>
Expand All @@ -29,7 +29,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting low swap"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_swap.threshold') %>% of its swap for <%= p('bosh_alerts.job_low_swap.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_swap.threshold') %>% of its swap for <%= p('bosh_alerts.job_low_swap.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHJobSystemDiskFull
expr: avg(bosh_job_system_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_name, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_system_disk_full.threshold') %>
Expand All @@ -39,7 +39,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of system disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_system_disk_full.threshold') %>% of its system disk for <%= p('bosh_alerts.job_system_disk_full.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_system_disk_full.threshold') %>% of its system disk for <%= p('bosh_alerts.job_system_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHJobEphemeralDiskFull
expr: avg(bosh_job_ephemeral_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_name, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>
Expand All @@ -49,7 +49,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of ephemeral disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>% of its ephemeral disk for <%= p('bosh_alerts.job_ephemeral_disk_full.evaluation_time') %>s: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>% of its ephemeral disk for <%= p('bosh_alerts.job_ephemeral_disk_full.evaluation_time') %>s<% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHJobPersistentDiskFull
expr: avg(bosh_job_persistent_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_name, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>
Expand All @@ -59,7 +59,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of persistent disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>% of its persistent disk for <%= p('bosh_alerts.job_persistent_disk_full.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>% of its persistent disk for <%= p('bosh_alerts.job_persistent_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHJobPersistentDiskInodesExhausted
expr: avg(bosh_job_persistent_disk_inode_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_name, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_persistent_disk_inodes_exhausted.threshold') %>
Expand All @@ -69,4 +69,4 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of inodes"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_inodes_exhausted.threshold') %>% of its persistent disk inodes for <%= p('bosh_alerts.job_persistent_disk_inodes_exhausted.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_name}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_inodes_exhausted.threshold') %>% of its persistent disk inodes for <%= p('bosh_alerts.job_persistent_disk_inodes_exhausted.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"
12 changes: 6 additions & 6 deletions jobs/bosh_alerts/templates/bosh_tsdb_system.alerts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting a high CPU load average"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` had a CPU load average (1m) above <%= p('bosh_alerts.job_high_cpu_load.threshold') %> for <%= p('bosh_alerts.job_high_cpu_load.evaluation_time') %>: {{$value}}"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` had a CPU load average (1m) above <%= p('bosh_alerts.job_high_cpu_load.threshold') %> for <%= p('bosh_alerts.job_high_cpu_load.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}' %> <% end %> <% end %>"

- alert: BOSHTSDBJobLowFreeRAM
expr: avg(bosh_tsdb_job_mem_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_low_free_ram.threshold') %>
Expand All @@ -19,7 +19,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting low free RAM"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_free_ram.threshold') %>% of its RAM for <%= p('bosh_alerts.job_low_free_ram.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_free_ram.threshold') %>% of its RAM for <%= p('bosh_alerts.job_low_free_ram.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobLowSwap
expr: avg(bosh_tsdb_job_swap_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_low_swap.threshold') %>
Expand All @@ -29,7 +29,7 @@ groups:
severity: warning
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is reporting low swap"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_swap.threshold') %>% of its swap for <%= p('bosh_alerts.job_low_swap.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_low_swap.threshold') %>% of its swap for <%= p('bosh_alerts.job_low_swap.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobSystemDiskFull
expr: avg(bosh_tsdb_job_system_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_system_disk_full.threshold') %>
Expand All @@ -39,7 +39,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of system disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_system_disk_full.threshold') %>% of its system disk for <%= p('bosh_alerts.job_system_disk_full.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_system_disk_full.threshold') %>% of its system disk for <%= p('bosh_alerts.job_system_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobEphemeralDiskFull
expr: avg(bosh_tsdb_job_ephemeral_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>
Expand All @@ -49,7 +49,7 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of ephemeral disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>% of its ephemeral disk for <%= p('bosh_alerts.job_ephemeral_disk_full.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_ephemeral_disk_full.threshold') %>% of its ephemeral disk for <%= p('bosh_alerts.job_ephemeral_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

- alert: BOSHTSDBJobPersistentDiskFull
expr: avg(bosh_tsdb_job_persistent_disk_percent{bosh_job_name!~"^compilation.*",bosh_deployment!="bosh-health-check"}) by(environment, bosh_deployment, bosh_job_name, bosh_job_index) > <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>
Expand All @@ -59,4 +59,4 @@ groups:
severity: critical
annotations:
summary: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` is running out of persistent disk"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>% of its persistent disk for <%= p('bosh_alerts.job_persistent_disk_full.evaluation_time') %>: {{$value}}%"
description: "BOSH Job `{{$labels.environment}}/{{$labels.bosh_deployment}}/{{$labels.bosh_job_name}}/{{$labels.bosh_job_index}}` has used more than <%= p('bosh_alerts.job_persistent_disk_full.threshold') %>% of its persistent disk for <%= p('bosh_alerts.job_persistent_disk_full.evaluation_time') %><% if_p('bosh_alerts.disable_values_in_alert_msg') do | disabled | %> <% if !(disabled) %> <%= ': {{$value}}%' %> <% end %> <% end %>"

0 comments on commit 9d7534f

Please sign in to comment.