From b4080d343e3175dbe1eb3af96028a0e2258fc6c5 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Wed, 20 Mar 2024 14:11:50 -0700 Subject: [PATCH] resource: support gpus in resource.status Problem: fluxion tests fail when run with the new resource.status RPC. The failing tests involve gpus, which it turns out are not supported by rlist_set_allocated() (#5807) so they do not appear in the 'allocated' object in the new resource.status RPC. Instead of marking resources allocated and the extracting the allocated object with rlist_copy_allocated(), just use the set already provided by the job-manager. However that presents one other obstacle because although fluxion includes properties in the R objects returned by sched.alloc, sched-simple does not. Therefore, add some code to copy any matching properties into the allocated set before returning it in the resource.status response. --- src/modules/resource/status.c | 158 +++++++++++++++++++++++++--------- 1 file changed, 115 insertions(+), 43 deletions(-) diff --git a/src/modules/resource/status.c b/src/modules/resource/status.c index ad68a109259f..f12667bf246b 100644 --- a/src/modules/resource/status.c +++ b/src/modules/resource/status.c @@ -13,6 +13,7 @@ #endif #include #include +#include #include "resource.h" #include "inventory.h" @@ -51,28 +52,6 @@ static int mark_down (struct rlist *rl, const struct idset *ids) return 0; } -/* Mark the resources in 'o' (an Rv1 resource object) ALLOCATED - * in the resource set 'rl'. - */ -static int mark_allocated (struct rlist *rl, json_t *o) -{ - if (o && !json_is_null (o)) { - struct rlist *r; - - if (!(r = rlist_from_json (o, NULL))) { - errno = EINVAL; - return -1; - } - if (rlist_set_allocated (rl, r) < 0) { - rlist_destroy (r); - errno = EINVAL; - return -1; - } - rlist_destroy (r); - } - return 0; -} - /* Get an Rv1 resource object that includes all resources. */ static json_t *get_all (struct rlist *rl) @@ -108,27 +87,121 @@ static json_t *get_down (struct rlist *rl) return NULL; } -/* Get an Rv1 resource object that includes only ALLOCATED resources. +/* Create an empty but valid Rv1 object. */ -static json_t *get_allocated (struct rlist *rl) +static json_t *get_empty_set (void) { + struct rlist *rl; json_t *o; - struct rlist *r; - if (!(r = rlist_copy_allocated (rl)) - || !(o = rlist_to_R (r))) - goto error; - rlist_destroy (r); + if (!(rl = rlist_create ())) + return NULL; + o = rlist_to_R (rl); + rlist_destroy (rl); + return o; +} + +/* Update property 'name' in 'alloc' resource set. + * Take the intersection of the alloc ranks vs the property ranks, + * and if non-empty, add properties to 'alloc' for those ranks. + */ +static int update_one_property (struct rlist *alloc, + struct idset *alloc_ranks, + struct idset *prop_ranks, + const char *name) +{ + struct idset *ids; + char *targets = NULL; + int rc = -1; + + if (!(ids = idset_intersect (alloc_ranks, prop_ranks)) + || idset_count (ids) == 0) { + rc = 0; + goto done; + } + if (!(targets = idset_encode (ids, IDSET_FLAG_RANGE))) + goto done; + if (rlist_add_property (alloc, NULL, name, targets) < 0) + goto done; + rc = 0; +done: + free (targets); + idset_destroy (ids); + return rc; +} + +/* Fetch properties from a resource set in JSON form. + */ +static json_t *get_properties (struct rlist *rl) +{ + char *s; + json_t *o = NULL; + + if ((s = rlist_properties_encode (rl))) + o = json_loads (s, 0, NULL); + free (s); return o; +} + +/* Given a resource set 'all' with properties, assign any to 'alloc' + * that have matching ranks. + */ +static int update_properties (struct rlist *alloc, struct rlist *all) +{ + struct idset *alloc_ranks; + json_t *props; + const char *name; + json_t *val; + + if (!(alloc_ranks = rlist_ranks (alloc))) + return -1; + if (!(props = get_properties (all)) + || json_object_size (props) == 0) { + json_decref (props); + return 0; + } + json_object_foreach (props, name, val) { + struct idset *prop_ranks; + + if (!(prop_ranks = idset_decode (json_string_value (val)))) + continue; + if (update_one_property (alloc, alloc_ranks, prop_ranks, name) < 0) { + idset_destroy (prop_ranks); + goto error; + } + idset_destroy (prop_ranks); + } + idset_destroy (alloc_ranks); + json_decref (props); + return 0; error: - rlist_destroy (r); - return NULL; + idset_destroy (alloc_ranks); + json_decref (props); + return -1; +} + +static json_t *update_properties_json (json_t *R, struct rlist *all) +{ + struct rlist *alloc; + json_t *R2 = NULL; + + if (!(alloc = rlist_from_json (R, NULL))) + return NULL; + if (update_properties (alloc, all) < 0) + goto done; + R2 = rlist_to_R (alloc); +done: + rlist_destroy (alloc); + return R2; } -/* Given the "all" resource set 'rl', set the "all", "down", and +/* Given the resource set 'rl' with some ranks marked down AND the + * allocated set from the job manager, set the "all", "down", and * "allocated" keys in 'obj' to Rv1 resource objects. */ -static int set_resource_status (json_t *obj, struct rlist *rl) +static int set_resource_status (json_t *obj, + struct rlist *rl, + json_t *allocated) { json_t *o; @@ -142,8 +215,11 @@ static int set_resource_status (json_t *obj, struct rlist *rl) json_decref (o); goto error; } - if (!(o = get_allocated (rl)) - || json_object_set_new (obj, "allocated", o) < 0) { + if (allocated) + o = update_properties_json (allocated, rl); + else + o = get_empty_set (); + if (!o || json_object_set_new (obj, "allocated", o) < 0) { json_decref (o); goto error; } @@ -156,13 +232,11 @@ static int set_resource_status (json_t *obj, struct rlist *rl) /* Create an rlist object from R. Omit the scheduling key. Then: * - exclude the ranks in 'exclude' (if non-NULL) * - mark down the ranks in 'down' and/or 'drain' (if non-NULL) - * - mark allocated the resources in 'allocated' (if non-NULL and not json NULL) */ static struct rlist *get_resource (const json_t *R, const struct idset *exclude, const struct idset *down, - struct idset *drain, - json_t *allocated) + struct idset *drain) { json_t *cpy; struct rlist *rl; @@ -180,9 +254,7 @@ static struct rlist *get_resource (const json_t *R, if (rlist_remove_ranks (rl, (struct idset *)exclude) < 0) goto error; } - if (mark_down (rl, down) < 0 - || mark_down (rl, drain) < 0 - || mark_allocated (rl, allocated) < 0) + if (mark_down (rl, down) < 0 || mark_down (rl, drain) < 0) goto error; json_decref (cpy); return rl; @@ -206,7 +278,7 @@ static json_t *prepare_response (struct status *status, json_t *allocated) if (!(R = inventory_get (ctx->inventory)) || !(drain_info = drain_get_info (ctx->drain)) - || !(rl = get_resource (R, exclude, down, drain, allocated))) + || !(rl = get_resource (R, exclude, down, drain))) goto error; if (!(o = json_pack ("{s:O s:O}", "R", R, "drain", drain_info))) { errno = ENOMEM; @@ -215,7 +287,7 @@ static json_t *prepare_response (struct status *status, json_t *allocated) if (rutil_set_json_idset (o, "online", monitor_get_up (ctx->monitor)) < 0 || rutil_set_json_idset (o, "offline", down) < 0 || rutil_set_json_idset (o, "exclude", exclude) < 0 - || set_resource_status (o, rl) < 0) + || set_resource_status (o, rl, allocated) < 0) goto error; json_decref (drain_info);