Skip to content

Commit

Permalink
Merge pull request flux-framework#4131 from garlick/overlay_errstr
Browse files Browse the repository at this point in the history
flux-overlay: fix timeout error message
  • Loading branch information
mergify[bot] authored Feb 15, 2022
2 parents 6f0165b + d6e2aee commit a0bfb7f
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/cmd/builtin/overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ static int status_healthwalk (struct status *ctx,
struct status_node node = { .ghost = false, .connector = connector };
flux_future_t *f;
json_t *children;
const char *errstr;
int rc = 0;

monotime (&ctx->start);
Expand All @@ -436,19 +437,20 @@ static int status_healthwalk (struct status *ctx,
"status", &node.status,
"duration", &node.duration,
"children", &children) < 0) {
errstr = future_strerror (f, errno);
/* RPC failed.
* An error at level 0 should be fatal, e.g. unknown wait argument,
* bad rank, timeout. An error at level > 0 should return -1 so
* ghostwalk() can be tried (parent hasn't noticed child crash?)
* and sibling subtrees can be probed.
*/
if (level == 0)
log_msg_exit ("%s", future_strerror (f, errno));
log_msg_exit ("%s", errstr);
printf ("%s%s%s: %s%s\n",
status_indent (ctx, level),
connector_string (connector),
status_getname (ctx, rank),
future_strerror (f, errno),
errstr,
status_rpctime (ctx));
rc = -1;
goto done;
Expand Down
13 changes: 13 additions & 0 deletions t/t3303-system-healthcheck.t
Original file line number Diff line number Diff line change
Expand Up @@ -229,4 +229,17 @@ test_expect_success 'flux overlay disconnect fails on bad input' '
grep "TARGET must be a valid rank or hostname" disconn3.err
'

test_expect_success 'stop broker 12' '
$startctl kill 12 19
'

test_expect_success 'flux overlay status prints connection timed out on 12' '
flux overlay status --no-pretty --no-color >status.out &&
grep "fake12: Connection timed out" status.out
'

test_expect_success 'continue broker 12' '
$startctl kill 12 18
'

test_done

0 comments on commit a0bfb7f

Please sign in to comment.