From 5eedfa9e63ab1e692dbb86637363f0b3b48f385f Mon Sep 17 00:00:00 2001 From: lance6716 Date: Wed, 5 Aug 2020 16:40:59 +0800 Subject: [PATCH] add alert --- dm/dm-ansible/conf/dm_worker.rules.yml | 35 ++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/dm/dm-ansible/conf/dm_worker.rules.yml b/dm/dm-ansible/conf/dm_worker.rules.yml index f83a50549a..8436c6e40a 100644 --- a/dm/dm-ansible/conf/dm_worker.rules.yml +++ b/dm/dm-ansible/conf/dm_worker.rules.yml @@ -136,3 +136,38 @@ groups: description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' value: '{{ $value }}' summary: dm syncer binlog file not catch up relay exceed 10 min + + - alert: DM_worker_offline + expr: dm_master_worker_state == 0 + for: 1h + labels: + env: ENV_LABELS_ENV + level: critical + expr: dm_master_worker_state == 0 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' + value: '{{ $value }}' + summary: dm worker offline exceed 1h + + - alert: DM_pending_DDL + expr: dm_master_ddl_state_number > 0 + for: 1h + labels: + env: ENV_LABELS_ENV + level: critical + expr: dm_master_ddl_state_number > 0 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' + value: '{{ $value }}' + summary: DDL pending exceed 1h + + - alert: DM_DDL_error + expr: increase(dm_master_shard_ddl_error[1m]) > 0 + labels: + env: ENV_LABELS_ENV + level: critical + expr: increase(dm_master_shard_ddl_error[1m]) > 0 + annotations: + description: 'cluster: ENV_LABELS_ENV, instance: {{ $labels.instance }}, task: {{ $labels.task }}, values: {{ $value }}' + value: '{{ $value }}' + summary: DDL error happens \ No newline at end of file