Skip to content

Commit

Permalink
resource module: wrap traversal operations with clear_err_message ()
Browse files Browse the repository at this point in the history
Recent debugging revealed that the traverser error message wasn't
getting cleared after some traversal operations, leading to spurious or
error logging that can be unrelated to the error cause.

Wrap each traverser function call with clear_err_message () to ensure
error messages are correctly attributed to the cause.
  • Loading branch information
milroy committed Dec 18, 2024
1 parent 3fb0b4e commit bc93a3f
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions resource/modules/resource_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,7 @@ static int mark_now (std::shared_ptr<resource_ctx_t> &ctx,
}
if ((rc = decode_rankset (ctx, ids, ranks)) < 0)
goto done;
ctx->traverser->clear_err_message ();
if ((rc = ctx->traverser->mark (ranks, status)) < 0) {
flux_log_error (ctx->h,
"%s: traverser::mark: %s",
Expand Down Expand Up @@ -1648,6 +1649,7 @@ static int run (std::shared_ptr<resource_ctx_t> &ctx,

dfu_traverser_t &tr = *(ctx->traverser);

tr.clear_err_message ();
if (std::string ("allocate") == cmd)
rc = tr.run (j, ctx->writers, match_op_t::MATCH_ALLOCATE, jobid, at);
else if (std::string ("allocate_with_satisfiability") == cmd)
Expand Down Expand Up @@ -1709,13 +1711,15 @@ static int run (std::shared_ptr<resource_ctx_t> &ctx,
static_cast<intmax_t> (jobid));
goto out;
}
tr.clear_err_message ();
if ((rc = tr.run (R, ctx->writers, rd, jobid, at, duration)) < 0) {
flux_log (ctx->h,
LOG_ERR,
"%s: dfu_traverser_t::run (id=%jd): %s",
__FUNCTION__,
static_cast<intmax_t> (jobid),
ctx->traverser->err_message ().c_str ());
ctx->traverser->clear_err_message ();
goto out;
}

Expand Down Expand Up @@ -1909,6 +1913,7 @@ static int run_remove (std::shared_ptr<resource_ctx_t> &ctx,
int rc = -1;
dfu_traverser_t &tr = *(ctx->traverser);

tr.clear_err_message ();
if (part_cancel) {
// RV1exec only reader supported in production currently
std::shared_ptr<resource_reader_base_t> reader;
Expand Down Expand Up @@ -1944,6 +1949,7 @@ static int run_remove (std::shared_ptr<resource_ctx_t> &ctx,
__FUNCTION__,
static_cast<intmax_t> (jobid),
ctx->traverser->err_message ().c_str ());
ctx->traverser->clear_err_message ();

Check warning on line 1952 in resource/modules/resource_match.cpp

View check run for this annotation

Codecov / codecov/patch

resource/modules/resource_match.cpp#L1952

Added line #L1952 was not covered by tests
goto out;
}
if (full_removal && is_existent_jobid (ctx, jobid))
Expand Down Expand Up @@ -2676,6 +2682,7 @@ static int run_find (std::shared_ptr<resource_ctx_t> &ctx,
match_format_t format = match_writers_factory_t::get_writers_type (format_str);
if (!(w = match_writers_factory_t::create (format)))
goto error;
ctx->traverser->clear_err_message ();
if ((rc = ctx->traverser->find (w, criteria)) < 0) {
if (ctx->traverser->err_message () != "") {
flux_log_error (ctx->h,
Expand Down Expand Up @@ -2947,6 +2954,7 @@ static void set_status_request_cb (flux_t *h,
errmsg = "unrecognized status '" + status + "'";
goto error;
}
ctx->traverser->clear_err_message ();
// mark the vertex
if (ctx->traverser->mark (resource_path, status_it->second) < 0) {
flux_log_error (h,
Expand Down

0 comments on commit bc93a3f

Please sign in to comment.