From 8ed3467fc2eed317e0df2ec7640d63d02ff43d70 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Thu, 4 Nov 2021 20:39:23 -0700 Subject: [PATCH 1/2] jobspec: add level 0 support for moldable jobspecs Problem: job-list and jobshell currently assume an integer value for the "count" key within a jobspec. While this complies with RFC 25 (Jobspec V1), this disallows users to submit a moldable jobspec that contains a dictionary instead with min/max/operator/operand with the count key. Because moldability will soon be required to enable node-exclusive scheduling for our system instance work, we need level 0 support. Modify parse_res_level() functions within job-list and jobshell where this assumption is made. Unpack the "count" key as a json_t object instead of an integer object in those functions and subsequently handle the moldable jobspec case where its value is a dictionary. Does not change semantics whatsoever since this is level-0 support. As such, the min count is used for these components when a moldable jobspec is given. --- src/modules/job-list/job_state.c | 16 ++++++++++++++-- src/shell/jobspec.c | 14 ++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/modules/job-list/job_state.c b/src/modules/job-list/job_state.c index 12ea94da05a3..2feeac139e10 100644 --- a/src/modules/job-list/job_state.c +++ b/src/modules/job-list/job_state.c @@ -357,20 +357,32 @@ static int parse_res_level (struct list_ctx *ctx, { json_error_t error; struct res_level res; + json_t *cnt_o = NULL; res.with = NULL; /* For jobspec version 1, expect exactly one array element per level. */ if (json_unpack_ex (o, &error, 0, - "[{s:s s:i s?o}]", + "[{s:s s:o s?o}]", "type", &res.type, - "count", &res.count, + "count", &cnt_o, "with", &res.with) < 0) { flux_log (ctx->h, LOG_ERR, "%s: job %ju invalid jobspec: %s", __FUNCTION__, (uintmax_t)job->id, error.text); return -1; } + if (json_is_integer (cnt_o)) + res.count = json_integer_value (cnt_o); + else if (json_unpack_ex (cnt_o, &error, 0, + "{s:i}", + "min", &res.count)) { + flux_log (ctx->h, LOG_ERR, + "%s: job %ju invalid count format in jobspec: %s", + __FUNCTION__, (uintmax_t)job->id, error.text); + return -1; + } + *resp = res; return 0; } diff --git a/src/shell/jobspec.c b/src/shell/jobspec.c index 317e60ec7e92..1eda186255e9 100644 --- a/src/shell/jobspec.c +++ b/src/shell/jobspec.c @@ -40,6 +40,7 @@ static int parse_res_level (json_t *o, { json_error_t loc_error; struct res_level res; + json_t *cnt_o = NULL; if (o == NULL) { set_error (error, "level %d: missing", level); @@ -49,13 +50,22 @@ static int parse_res_level (json_t *o, /* For jobspec version 1, expect exactly one array element per level. */ if (json_unpack_ex (o, &loc_error, 0, - "{s:s s:i s?o}", + "{s:s s:o s?o}", "type", &res.type, - "count", &res.count, + "count", &cnt_o, "with", &res.with) < 0) { set_error (error, "level %d: %s", level, loc_error.text); return -1; } + if (json_is_integer (cnt_o)) + res.count = json_integer_value (cnt_o); + else if (json_unpack_ex (cnt_o, &loc_error, 0, + "{s:i}", + "min", &res.count)) { + set_error (error, "level %d (count): %s", level, loc_error.text); + return -1; + } + *resp = res; return 0; } From 20b36752f7ac81afd74e2d215cbf34f99f6913f9 Mon Sep 17 00:00:00 2001 From: "Dong H. Ahn" Date: Tue, 9 Nov 2021 22:39:51 -0800 Subject: [PATCH 2/2] testsuite: add jobshell tests with moldable jobspec Extend the existing jobspec unit tests for jobshell with moldable jobspecs. Add good inputs to test that the jobspec parsing code used by jobshell can handle both partitially or fully qualified resource count spec where only the "min" key is mandatory. Limit testing resources to "core" and "gpu" where moldable count spec is expected to be used in a near term. --- src/shell/test/jobspec.c | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/shell/test/jobspec.c b/src/shell/test/jobspec.c index 7b2118e1129a..59d7b6da64b3 100644 --- a/src/shell/test/jobspec.c +++ b/src/shell/test/jobspec.c @@ -34,6 +34,30 @@ struct input good_input[] = { "flux jobspec srun hostname (slot->core)", "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": 1, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", }, + { + "flux jobspec srun hostname (slot->core[>=1])", + "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": {\"min\": 1}, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", + }, + { + "flux jobspec srun hostname (slot->core[>=1,<=8])", + "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": {\"min\": 1, \"max\": 8}, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", + }, + { + "flux jobspec srun hostname (slot->core[>=1,<=8,oper=+])", + "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": {\"min\": 1, \"max\": 8, \"operator\": \"+\"}, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", + }, + { + "flux jobspec srun hostname (slot->core[>=1,<=8,oper=+,op=2])", + "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": {\"min\": 1, \"max\": 8, \"operator\": \"+\", \"operand\": 2}, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", + }, + { + "flux jobspec srun hostname (slot->core[>=1,<=8,oper=*,op=3])", + "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": {\"min\": 1, \"max\": 8, \"operator\": \"*\", \"operand\": 3}, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", + }, + { + "flux jobspec srun hostname (slot->core[>=1,<=8,oper=^,op=2])", + "{\"tasks\": [{\"slot\": \"task\", \"count\": {\"per_slot\": 1}, \"command\": [\"hostname\"], \"attributes\": {}}], \"attributes\": {\"system\": {\"cwd\": \"/home/garlick/proj/flux-core/src/cmd\"}}, \"version\": 1, \"resources\": [{\"count\": 1, \"with\": [{\"count\": {\"min\": 1, \"max\": 8, \"operator\": \"^\", \"operand\": 2}, \"type\": \"core\"}], \"type\": \"slot\", \"label\": \"task\"}]}", + }, { "node->socket->slot->core", "{\"resources\": [{\"type\": \"node\", \"count\": 1, \"with\": [{\"type\": \"socket\", \"count\": 1, \"with\": [{\"type\": \"slot\", \"count\": 1, \"with\": [{\"type\": \"core\", \"count\": 1}], \"label\": \"task\"}]}]}], \"tasks\": [{\"command\": [\"hostname\"], \"slot\": \"task\", \"count\": {\"per_slot\": 1}}], \"attributes\": {\"system\": {\"duration\": 0, \"cwd\": \"/usr/libexec/flux\", \"environment\": {}}}, \"version\": 1}", @@ -50,6 +74,14 @@ struct input good_input[] = { "node->socket->slot->(core[2],gpu)", "{\"resources\": [{\"type\": \"node\", \"count\": 1, \"with\": [{\"type\": \"socket\", \"count\": 1, \"with\": [{\"type\": \"slot\", \"label\": \"task\", \"count\": 1, \"with\": [{\"type\": \"core\", \"count\": 2}, {\"type\": \"gpu\", \"count\": 1}]}]}]}], \"tasks\": [{\"command\": [\"hostname\"], \"slot\": \"task\", \"count\": {\"per_slot\": 1}}], \"attributes\": {\"system\": {\"duration\": 0, \"cwd\": \"/usr/libexec/flux\", \"environment\": {}}}, \"version\": 1}", }, + { + "node->socket->slot->(core[>=2],gpu[>=1])", + "{\"resources\": [{\"type\": \"node\", \"count\": 1, \"with\": [{\"type\": \"socket\", \"count\": 1, \"with\": [{\"type\": \"slot\", \"label\": \"task\", \"count\": 1, \"with\": [{\"type\": \"core\", \"count\": {\"min\": 2}}, {\"type\": \"gpu\", \"count\": {\"min\": 1}}]}]}]}], \"tasks\": [{\"command\": [\"hostname\"], \"slot\": \"task\", \"count\": {\"per_slot\": 1}}], \"attributes\": {\"system\": {\"duration\": 0, \"cwd\": \"/usr/libexec/flux\", \"environment\": {}}}, \"version\": 1}", + }, + { + "node->socket->slot->(core[>=2,<=8],gpu[>=1,<=2])", + "{\"resources\": [{\"type\": \"node\", \"count\": 1, \"with\": [{\"type\": \"socket\", \"count\": 1, \"with\": [{\"type\": \"slot\", \"label\": \"task\", \"count\": 1, \"with\": [{\"type\": \"core\", \"count\": {\"min\": 2, \"max\": 8}}, {\"type\": \"gpu\", \"count\": {\"min\": 1, \"max\": 2}}]}]}]}], \"tasks\": [{\"command\": [\"hostname\"], \"slot\": \"task\", \"count\": {\"per_slot\": 1}}], \"attributes\": {\"system\": {\"duration\": 0, \"cwd\": \"/usr/libexec/flux\", \"environment\": {}}}, \"version\": 1}", + }, { "node->socket->slot->(gpu,core)", "{\"resources\": [{\"type\": \"node\", \"count\": 1, \"with\": [{\"type\": \"socket\", \"count\": 1, \"with\": [{\"type\": \"slot\", \"label\": \"task\", \"count\": 1, \"with\": [{\"type\": \"gpu\", \"count\": 1}, {\"type\": \"core\", \"count\": 1}]}]}]}], \"tasks\": [{\"command\": [\"hostname\"], \"slot\": \"task\", \"count\": {\"per_slot\": 1}}], \"attributes\": {\"system\": {\"duration\": 0, \"cwd\": \"/usr/libexec/flux\", \"environment\": {}}}, \"version\": 1}", @@ -74,11 +106,19 @@ struct input good_input[] = { }; struct output good_output[] = { + {1, 1, 1, -1}, + {1, 1, 1, -1}, + {1, 1, 1, -1}, + {1, 1, 1, -1}, + {1, 1, 1, -1}, + {1, 1, 1, -1}, {1, 1, 1, -1}, {1, 1, 1, 1}, {30, 30, 3, 15}, {5, 5, 6, -1}, {1, 1, 2, 1}, + {1, 1, 2, 1}, + {1, 1, 2, 1}, {1, 1, 1, 1}, {1, 1, 2, 1}, {6, 6, 5, 3},