From 8245ccace50791af19e6422e0abcc3a437fb2618 Mon Sep 17 00:00:00 2001 From: amyangfei Date: Mon, 15 Apr 2019 10:42:54 +0800 Subject: [PATCH] tests: add retry in metric check, make it more robust (#103) --- tests/README.md | 2 +- tests/_utils/check_metric | 23 ++++++++++++++++------- tests/all_mode/run.sh | 4 ++-- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/tests/README.md b/tests/README.md index 748bdae8c6..63f61f0b36 100644 --- a/tests/README.md +++ b/tests/README.md @@ -57,5 +57,5 @@ Several convenient commands are provided: * `check_port_alive ` - Wrapper to check a port is alive, at most 20 times. * `check_port ` - Checks a host:port is alive. * `wait_process_exit ` - Wait for one or more processes to exit by given process name. -* `check_metric ...` - check metric value from prometheus. +* `check_metric ...` - check metric value from prometheus. diff --git a/tests/_utils/check_metric b/tests/_utils/check_metric index 188452cb4b..f41f5e6dee 100755 --- a/tests/_utils/check_metric +++ b/tests/_utils/check_metric @@ -1,19 +1,28 @@ #!/bin/bash # parameter 1: port # parameter 2: metric name -# parameter 3...: valid value list +# parameter 3: retry count, if check failed we will wait 1s before next retry, until retry time exceeds retry count +# parameter 4...: valid value list set -eu port=$1 metric_name=$2 +retry_count=$3 -metric=$(curl -s http://127.0.0.1:$port/metrics | grep $metric_name | awk '{print $2}') -shift 2 -for pattern in "$@"; do - if [ "$metric" == "${pattern}" ]; then - exit 0 - fi +shift 3 + +counter=0 +while [ $counter -lt $retry_count ]; do + metric=$(curl -s http://127.0.0.1:$port/metrics | grep $metric_name | awk '{print $2}') + for pattern in "$@"; do + if [ "$metric" == "${pattern}" ]; then + exit 0 + fi + done + ((counter+=1)) + echo "wait for valid metric for $counter-th time" + sleep 1 done echo "metric $metric_name has invalid value $metric" diff --git a/tests/all_mode/run.sh b/tests/all_mode/run.sh index 3dd191b69e..f4521265e0 100755 --- a/tests/all_mode/run.sh +++ b/tests/all_mode/run.sh @@ -40,8 +40,8 @@ function run() { # use sync_diff_inspector to check data now! check_sync_diff $WORK_DIR $cur/conf/diff_config.toml - check_metric $WORKER1_PORT 'dm_syncer_replication_lag{task="test"}' 0 1 - check_metric $WORKER2_PORT 'dm_syncer_replication_lag{task="test"}' 0 1 + check_metric $WORKER1_PORT 'dm_syncer_replication_lag{task="test"}' 3 0 1 + check_metric $WORKER2_PORT 'dm_syncer_replication_lag{task="test"}' 3 0 1 } cleanup1 all_mode