Skip to content

Commit

Permalink
job-info: avoid error response on failed rpc
Browse files Browse the repository at this point in the history
Problem: Common usage in flux-core is to not respond with an RPC error
(such as through flux_respond_error()) if a prior RPC attempt fails (such
as through flux_respond_pack()).  This pattern is violated in several
places in job-info.

Solution: If flux_respond_pack() fails, do not call flux_respond_error()
in error handling paths.

Fixes #6498
  • Loading branch information
chu11 committed Dec 12, 2024
1 parent 69aaf2a commit 2354a68
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 3 deletions.
7 changes: 6 additions & 1 deletion src/modules/job-info/guest_watch.c
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ static void guest_namespace_watch_continuation (flux_future_t *f, void *arg)
struct guest_watch_ctx *gw = arg;
struct info_ctx *ctx = gw->ctx;
const char *event;
bool skip_respond_error = false;

if (flux_job_event_watch_get (f, &event) < 0) {
if (errno == ENOTSUP) {
Expand Down Expand Up @@ -639,6 +640,7 @@ static void guest_namespace_watch_continuation (flux_future_t *f, void *arg)
if (flux_respond_pack (ctx->h, gw->msg, "{s:s}", "event", event) < 0) {
flux_log_error (ctx->h, "%s: flux_respond_pack",
__FUNCTION__);
skip_respond_error = true;

Check warning on line 643 in src/modules/job-info/guest_watch.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/guest_watch.c#L643

Added line #L643 was not covered by tests
goto error_cancel;
}

Expand All @@ -657,6 +659,9 @@ static void guest_namespace_watch_continuation (flux_future_t *f, void *arg)
errno = save_errno;
}

if (skip_respond_error)
goto cleanup;

Check warning on line 663 in src/modules/job-info/guest_watch.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/guest_watch.c#L662-L663

Added lines #L662 - L663 were not covered by tests

error:
if (flux_respond_error (ctx->h, gw->msg, errno, NULL) < 0)
flux_log_error (ctx->h, "%s: flux_respond_error", __FUNCTION__);
Expand Down Expand Up @@ -763,7 +768,7 @@ static void main_namespace_lookup_continuation (flux_future_t *f, void *arg)
"event", tok, toklen) < 0) {
flux_log_error (ctx->h, "%s: flux_respond_pack",
__FUNCTION__);
goto error;
goto cleanup;

Check warning on line 771 in src/modules/job-info/guest_watch.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/guest_watch.c#L771

Added line #L771 was not covered by tests
}
}

Expand Down
10 changes: 8 additions & 2 deletions src/modules/job-info/update.c
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ static void eventlog_continuation (flux_future_t *f, void *arg)
json_t *context = NULL;
const char *errmsg = NULL;
const flux_msg_t *msg;
bool skip_respond_error = false;

if (flux_rpc_get (f, NULL) < 0) {
/* ENODATA is normal when job finishes or we've sent cancel */
Expand Down Expand Up @@ -196,6 +197,7 @@ static void eventlog_continuation (flux_future_t *f, void *arg)
"{s:O}",
uc->key, uc->update_object) < 0) {
flux_log_error (ctx->h, "%s: flux_respond", __FUNCTION__);
skip_respond_error = true;

Check warning on line 200 in src/modules/job-info/update.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/update.c#L200

Added line #L200 was not covered by tests
goto error_cancel;
}
msg = flux_msglist_next (uc->msglist);
Expand All @@ -212,6 +214,9 @@ static void eventlog_continuation (flux_future_t *f, void *arg)
* freed */
eventlog_watch_cancel (uc);

if (skip_respond_error)
goto cleanup;

Check warning on line 218 in src/modules/job-info/update.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/update.c#L217-L218

Added lines #L217 - L218 were not covered by tests

error:
msg = flux_msglist_first (uc->msglist);
while (msg) {
Expand Down Expand Up @@ -360,7 +365,7 @@ static void lookup_continuation (flux_future_t *f, void *arg)
if (flux_respond_pack (uc->ctx->h, msg, "{s:O}",
uc->key, uc->update_object) < 0) {
flux_log_error (ctx->h, "%s: flux_respond", __FUNCTION__);
goto error;
goto cleanup;

Check warning on line 368 in src/modules/job-info/update.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/update.c#L368

Added line #L368 was not covered by tests
}

next:
Expand Down Expand Up @@ -520,7 +525,7 @@ void update_watch_cb (flux_t *h,
"{s:O}",
uc->key, uc->update_object) < 0) {
flux_log_error (ctx->h, "%s: flux_respond", __FUNCTION__);
goto error;
goto cleanup;

Check warning on line 528 in src/modules/job-info/update.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/update.c#L528

Added line #L528 was not covered by tests
}
}
/* if uc->update_object has not been set, the initial lookup
Expand All @@ -536,6 +541,7 @@ void update_watch_cb (flux_t *h,
error:
if (flux_respond_error (h, msg, errno, errmsg) < 0)
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);
cleanup:
free (index_key);
}

Expand Down
5 changes: 5 additions & 0 deletions src/modules/job-info/watch.c
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ static void watch_continuation (flux_future_t *f, void *arg)
const char *tok;
size_t toklen;
const char *errmsg = NULL;
bool skip_respond_error = false;

if (flux_kvs_lookup_get (f, &s) < 0) {
if (errno != ENOENT && errno != ENODATA && errno != ENOTSUP)
Expand Down Expand Up @@ -266,6 +267,7 @@ static void watch_continuation (flux_future_t *f, void *arg)
flux_log_error (ctx->h,
"%s: flux_respond_pack",
__FUNCTION__);
skip_respond_error = true;

Check warning on line 270 in src/modules/job-info/watch.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/watch.c#L270

Added line #L270 was not covered by tests
goto error_cancel;
}

Expand Down Expand Up @@ -306,6 +308,9 @@ static void watch_continuation (flux_future_t *f, void *arg)
errno = save_errno;
}

if (skip_respond_error)
goto cleanup;

Check warning on line 312 in src/modules/job-info/watch.c

View check run for this annotation

Codecov / codecov/patch

src/modules/job-info/watch.c#L311-L312

Added lines #L311 - L312 were not covered by tests

error:
if (flux_respond_error (ctx->h, w->msg, errno, errmsg) < 0)
flux_log_error (ctx->h, "%s: flux_respond_error", __FUNCTION__);
Expand Down

0 comments on commit 2354a68

Please sign in to comment.