Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

job-manager: include R in sched.free request #5783

Merged
merged 1 commit into from
Mar 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/common/libschedutil/init.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ extern "C" {
typedef struct schedutil_ctx schedutil_t;

enum schedutil_flags {
SCHEDUTIL_FREE_NOLOOKUP = 1, // ops->free() will be called with R=NULL
SCHEDUTIL_FREE_NOLOOKUP = 1, // now the default so this flag is ignored
};

/* Create a handle for the schedutil convenience library.
Expand Down
59 changes: 1 addition & 58 deletions src/common/libschedutil/ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,73 +45,16 @@ static void cancel_cb (flux_t *h,
util->ops->cancel (h, msg, util->cb_arg);
}

static void free_continuation (flux_future_t *f, void *arg)
{
schedutil_t *util = arg;
const flux_msg_t *msg = flux_future_aux_get (f, "schedutil::msg");
flux_t *h = util->h;
const char *R;

if (flux_kvs_lookup_get (f, &R) < 0) {
flux_log_error (h, "sched.free lookup R");
goto error;
}
if (schedutil_remove_outstanding_future (util, f) < 0)
flux_log_error (h, "sched.free unable to remove outstanding future");
util->ops->free (h, msg, R, util->cb_arg);
flux_future_destroy (f);
return;
error:
flux_log_error (h, "sched.free");
if (flux_respond_error (h, msg, errno, NULL) < 0)
flux_log_error (h, "sched.free respond_error");
flux_future_destroy (f);
}

static void free_cb (flux_t *h,
flux_msg_handler_t *mh,
const flux_msg_t *msg,
void *arg)
{
schedutil_t *util = arg;
flux_jobid_t id;
flux_future_t *f;
char key[64];

assert (util);

if (util->flags & SCHEDUTIL_FREE_NOLOOKUP) {
util->ops->free (h, msg, NULL, util->cb_arg);
return;
}

if (flux_request_unpack (msg, NULL, "{s:I}", "id", &id) < 0)
goto error;
if (flux_job_kvs_key (key, sizeof (key), id, "R") < 0) {
errno = EPROTO;
goto error;
}
if (!(f = flux_kvs_lookup (h, NULL, 0, key)))
goto error;
if (flux_future_aux_set (f,
"schedutil::msg",
(void *)flux_msg_incref (msg),
(flux_free_f)flux_msg_decref) < 0) {
flux_msg_decref (msg);
goto error_future;
}
if (flux_future_then (f, -1, free_continuation, util) < 0)
goto error_future;
if (schedutil_add_outstanding_future (util, f) < 0)
flux_log_error (h, "sched.free unable to add outstanding future");

return;
error_future:
flux_future_destroy (f);
error:
flux_log_error (h, "sched.free");
if (flux_respond_error (h, msg, errno, NULL) < 0)
flux_log_error (h, "sched.free respond_error");
util->ops->free (h, msg, NULL, util->cb_arg);
}

static void prioritize_cb (flux_t *h,
Expand Down
12 changes: 4 additions & 8 deletions src/common/libschedutil/ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,12 @@ struct schedutil_ops {
const flux_msg_t *msg,
void *arg);

/* Callback for a free request. R is looked up as a convenience.
* 'msg' and 'R' are only valid for the duration of this call.
/* Callback for a free request.
* Currently 'R' is always NULL and may be unpacked as a JSON object
* under the "R" key in 'msg' if needed.
* 'msg' is only valid for the duration of this call.
* You should either respond to the request immediately (see
* free.h), or cache this information for later response.
*
* If R is unneeded, it is recommended that the
* SCHEDUTIL_FREE_NOLOOKUP flag be set in schedutil_create(). By
* setting this flag, R will not be looked up, saving a KVS lookup
* on every invocation of this callback. R will be set to NULL
* instead.
*/
void (*free)(flux_t *h,
const flux_msg_t *msg,
Expand Down
5 changes: 4 additions & 1 deletion src/modules/job-manager/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,10 @@ int free_request (struct alloc *alloc, struct job *job)

if (!(msg = flux_request_encode ("sched.free", NULL)))
return -1;
if (flux_msg_pack (msg, "{s:I}", "id", job->id) < 0)
if (flux_msg_pack (msg,
"{s:I s:O}",
"id", job->id,
"R", job->R_redacted) < 0)
goto error;
if (flux_send (alloc->ctx->h, msg, 0) < 0)
goto error;
Expand Down
16 changes: 10 additions & 6 deletions src/modules/sched-simple/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,13 +336,16 @@ static void check_cb (flux_reactor_t *r, flux_watcher_t *w,
}
}

static int try_free (flux_t *h, struct simple_sched *ss, const char *R)
static int try_free (flux_t *h, struct simple_sched *ss, json_t *R)
{
int rc = -1;
char *r = NULL;
struct rlist *alloc = rlist_from_R (R);
json_error_t error;
struct rlist *alloc = rlist_from_json (R, &error);
if (!alloc) {
flux_log_error (h, "free: unable to parse R=%s", R);
char *s = json_dumps (R, JSON_COMPACT);
flux_log_error (h, "free: unable to parse R=%s: %s", s, error.text);
ERRNO_SAFE_WRAP (free, s);
return -1;
}
r = rlist_dumps (alloc);
Expand All @@ -355,12 +358,13 @@ static int try_free (flux_t *h, struct simple_sched *ss, const char *R)
return rc;
}

void free_cb (flux_t *h, const flux_msg_t *msg, const char *R, void *arg)
void free_cb (flux_t *h, const flux_msg_t *msg, const char *R_str, void *arg)
{
struct simple_sched *ss = arg;
json_t *R;

if (!R) {
flux_log (h, LOG_ERR, "free: R is NULL");
if (flux_request_unpack (msg, NULL, "{s:o}", "R", &R) < 0) {
flux_log (h, LOG_ERR, "free: error unpacking sched.free request");
if (flux_respond_error (h, msg, EINVAL, NULL) < 0)
flux_log_error (h, "free_cb: flux_respond_error");
return;
Expand Down
5 changes: 4 additions & 1 deletion t/t2300-sched-simple.t
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,14 @@ test_expect_success 'sched-simple: cancel jobs' '
test_expect_success 'sched-simple: reload sched-simple to cover free flags' '
flux module reload sched-simple test-free-nolookup
'
# That SCHEDUTIL_FREE_NOLOOKUP is now a no-op but since flux-sched-0.33.0
# still uses it, ensure that free still works when it is used
test_expect_success 'sched-simple: submit job and cancel it' '
flux dmesg --clear &&
flux job submit basic.json >job19.id &&
flux job wait-event --timeout=5.0 $(cat job19.id) alloc &&
flux cancel $(cat job19.id) &&
$dmesg_grep -t 10 "free: R is NULL"
$dmesg_grep -t 10 "free: rank0/core0"
'
test_expect_success 'sched-simple: remove sched-simple and cancel jobs' '
flux module remove sched-simple &&
Expand Down
Loading