Skip to content

Commit

Permalink
sched: Add GPU scheduling
Browse files Browse the repository at this point in the history
Merge @TWRS' change and slightly modify it.

Propagte the gpu request information received
from flux submit to the request input object
for scheduling.

gpu becomes a constraint for resrc's resource
type matching logic.
  • Loading branch information
dongahn committed Apr 13, 2018
1 parent 17a7129 commit f542e78
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 3 deletions.
24 changes: 21 additions & 3 deletions sched/sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ static inline int fill_resource_req (flux_t *h, flux_lwj_t *j, json_t *jcb)
int rc = -1;
int64_t nn = 0;
int64_t nc = 0;
int64_t ngpus = 0;
int64_t walltime = 0;
json_t *o = NULL;
ssrvctx_t *ctx = getctx (h);
Expand All @@ -437,9 +438,12 @@ static inline int fill_resource_req (flux_t *h, flux_lwj_t *j, json_t *jcb)
goto done;
if (!Jget_int64 (o, JSC_RDESC_NCORES, &nc))
goto done;
if (!Jget_int64 (o, JSC_RDESC_NGPUS, &ngpus))
goto done;

j->req->nnodes = (uint64_t) nn;
j->req->ncores = (uint64_t) nc;
j->req->ngpus = (uint64_t) ngpus;
if (!Jget_int64 (o, JSC_RDESC_WALLTIME, &walltime) || !walltime) {
j->req->walltime = (uint64_t) 3600;
} else {
Expand Down Expand Up @@ -1473,8 +1477,6 @@ static resrc_reqst_t *get_resrc_reqst (ssrvctx_t *ctx, flux_lwj_t *job,
*/
req_res = Jnew ();
if (job->req->nnodes > 0) {
json_t *child_core = Jnew ();

Jadd_str (req_res, "type", "node");
Jadd_int64 (req_res, "req_qty", job->req->nnodes);
*nreqrd = job->req->nnodes;
Expand All @@ -1493,6 +1495,7 @@ static resrc_reqst_t *get_resrc_reqst (ssrvctx_t *ctx, flux_lwj_t *job,
Jadd_bool (req_res, "exclusive", false);
}

json_t *child_core = Jnew ();
Jadd_str (child_core, "type", "core");
Jadd_int64 (child_core, "req_qty", job->req->corespernode);
/* setting size == 1 devotes (all of) the core to the job */
Expand All @@ -1501,7 +1504,22 @@ static resrc_reqst_t *get_resrc_reqst (ssrvctx_t *ctx, flux_lwj_t *job,
Jadd_bool (child_core, "exclusive", true);
Jadd_int64 (child_core, "starttime", starttime);
Jadd_int64 (child_core, "endtime", starttime + job->req->walltime);
json_object_set_new (req_res, "req_child", child_core);

json_t *children = Jnew_ar();
json_array_append_new (children, child_core);
if (job->req->ngpus) {
json_t *child_gpu = Jnew ();
Jadd_str (child_gpu, "type", "gpu");
Jadd_int64 (child_gpu, "req_qty", job->req->ngpus);
/* setting size == 1 devotes (all of) the gpu to the job */
Jadd_int64 (child_gpu, "req_size", 1);
/* setting exclusive to true prevents multiple jobs per core */
Jadd_bool (child_gpu, "exclusive", true);
Jadd_int64 (child_gpu, "starttime", starttime);
Jadd_int64 (child_gpu, "endtime", starttime + job->req->walltime);
json_array_append_new (children, child_gpu);
}
json_object_set_new (req_res, "req_children", children);
} else if (job->req->ncores > 0) {
Jadd_str (req_res, "type", "core");
Jadd_int (req_res, "req_qty", job->req->ncores);
Expand Down
1 change: 1 addition & 0 deletions sched/scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
typedef struct flux_resources {
uint64_t nnodes; /*!< num of nodes requested by a job */
uint64_t ncores; /*!< num of cores requested by a job */
uint64_t ngpus; /*!< num of gpus requested by a job */
uint64_t corespernode; /*!< num of cores per node requested by a job */
uint64_t walltime; /*!< walltime requested by a job */
bool node_exclusive; /*!< job requires exclusive use of node if true */
Expand Down

0 comments on commit f542e78

Please sign in to comment.