Skip to content

Commit

Permalink
resource: support gpus in resource.status
Browse files Browse the repository at this point in the history
Problem: fluxion tests fail when run with the new resource.status
RPC.

The failing tests involve gpus, which it turns out are not supported
by rlist_set_allocated() (#5807) so they do not appear in the 'allocated'
object in the new resource.status RPC.

Instead of marking resources allocated and the extracting the allocated
object with rlist_copy_allocated(), just use the set already provided
by the job-manager.

However that presents one other obstacle because although fluxion includes
properties in the R objects returned by sched.alloc, sched-simple does
not.  Therefore, add some code to copy any matching properties into the
allocated set before returning it in the resource.status response.
  • Loading branch information
garlick committed Mar 21, 2024
1 parent 43ff3d0 commit b4080d3
Showing 1 changed file with 115 additions and 43 deletions.
158 changes: 115 additions & 43 deletions src/modules/resource/status.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#endif
#include <jansson.h>
#include <flux/core.h>
#include <flux/idset.h>

#include "resource.h"
#include "inventory.h"
Expand Down Expand Up @@ -51,28 +52,6 @@ static int mark_down (struct rlist *rl, const struct idset *ids)
return 0;
}

/* Mark the resources in 'o' (an Rv1 resource object) ALLOCATED
* in the resource set 'rl'.
*/
static int mark_allocated (struct rlist *rl, json_t *o)
{
if (o && !json_is_null (o)) {
struct rlist *r;

if (!(r = rlist_from_json (o, NULL))) {
errno = EINVAL;
return -1;
}
if (rlist_set_allocated (rl, r) < 0) {
rlist_destroy (r);
errno = EINVAL;
return -1;
}
rlist_destroy (r);
}
return 0;
}

/* Get an Rv1 resource object that includes all resources.
*/
static json_t *get_all (struct rlist *rl)
Expand Down Expand Up @@ -108,27 +87,121 @@ static json_t *get_down (struct rlist *rl)
return NULL;

Check warning on line 87 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L85-L87

Added lines #L85 - L87 were not covered by tests
}

/* Get an Rv1 resource object that includes only ALLOCATED resources.
/* Create an empty but valid Rv1 object.
*/
static json_t *get_allocated (struct rlist *rl)
static json_t *get_empty_set (void)
{
struct rlist *rl;
json_t *o;
struct rlist *r;

if (!(r = rlist_copy_allocated (rl))
|| !(o = rlist_to_R (r)))
goto error;
rlist_destroy (r);
if (!(rl = rlist_create ()))
return NULL;
o = rlist_to_R (rl);
rlist_destroy (rl);
return o;
}

/* Update property 'name' in 'alloc' resource set.
* Take the intersection of the alloc ranks vs the property ranks,
* and if non-empty, add properties to 'alloc' for those ranks.
*/
static int update_one_property (struct rlist *alloc,
struct idset *alloc_ranks,
struct idset *prop_ranks,
const char *name)
{
struct idset *ids;
char *targets = NULL;
int rc = -1;

if (!(ids = idset_intersect (alloc_ranks, prop_ranks))
|| idset_count (ids) == 0) {
rc = 0;
goto done;
}
if (!(targets = idset_encode (ids, IDSET_FLAG_RANGE)))
goto done;

Check warning on line 123 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L123

Added line #L123 was not covered by tests
if (rlist_add_property (alloc, NULL, name, targets) < 0)
goto done;

Check warning on line 125 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L125

Added line #L125 was not covered by tests
rc = 0;
done:
free (targets);
idset_destroy (ids);
return rc;
}

/* Fetch properties from a resource set in JSON form.
*/
static json_t *get_properties (struct rlist *rl)
{
char *s;
json_t *o = NULL;

if ((s = rlist_properties_encode (rl)))
o = json_loads (s, 0, NULL);
free (s);
return o;
}

/* Given a resource set 'all' with properties, assign any to 'alloc'
* that have matching ranks.
*/
static int update_properties (struct rlist *alloc, struct rlist *all)
{
struct idset *alloc_ranks;
json_t *props;
const char *name;
json_t *val;

if (!(alloc_ranks = rlist_ranks (alloc)))
return -1;
if (!(props = get_properties (all))
|| json_object_size (props) == 0) {
json_decref (props);
return 0;
}
json_object_foreach (props, name, val) {
struct idset *prop_ranks;

if (!(prop_ranks = idset_decode (json_string_value (val))))
continue;

Check warning on line 167 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L167

Added line #L167 was not covered by tests
if (update_one_property (alloc, alloc_ranks, prop_ranks, name) < 0) {
idset_destroy (prop_ranks);
goto error;

Check warning on line 170 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L169-L170

Added lines #L169 - L170 were not covered by tests
}
idset_destroy (prop_ranks);
}
idset_destroy (alloc_ranks);
json_decref (props);
return 0;
error:
rlist_destroy (r);
return NULL;
idset_destroy (alloc_ranks);
json_decref (props);
return -1;

Check warning on line 180 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L177-L180

Added lines #L177 - L180 were not covered by tests
}

static json_t *update_properties_json (json_t *R, struct rlist *all)
{
struct rlist *alloc;
json_t *R2 = NULL;

if (!(alloc = rlist_from_json (R, NULL)))
return NULL;
if (update_properties (alloc, all) < 0)
goto done;

Check warning on line 191 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L191

Added line #L191 was not covered by tests
R2 = rlist_to_R (alloc);
done:
rlist_destroy (alloc);
return R2;
}

/* Given the "all" resource set 'rl', set the "all", "down", and
/* Given the resource set 'rl' with some ranks marked down AND the
* allocated set from the job manager, set the "all", "down", and
* "allocated" keys in 'obj' to Rv1 resource objects.
*/
static int set_resource_status (json_t *obj, struct rlist *rl)
static int set_resource_status (json_t *obj,
struct rlist *rl,
json_t *allocated)
{
json_t *o;

Expand All @@ -142,8 +215,11 @@ static int set_resource_status (json_t *obj, struct rlist *rl)
json_decref (o);
goto error;

Check warning on line 216 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L215-L216

Added lines #L215 - L216 were not covered by tests
}
if (!(o = get_allocated (rl))
|| json_object_set_new (obj, "allocated", o) < 0) {
if (allocated)
o = update_properties_json (allocated, rl);
else
o = get_empty_set ();
if (!o || json_object_set_new (obj, "allocated", o) < 0) {
json_decref (o);
goto error;

Check warning on line 224 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L223-L224

Added lines #L223 - L224 were not covered by tests
}
Expand All @@ -156,13 +232,11 @@ static int set_resource_status (json_t *obj, struct rlist *rl)
/* Create an rlist object from R. Omit the scheduling key. Then:
* - exclude the ranks in 'exclude' (if non-NULL)
* - mark down the ranks in 'down' and/or 'drain' (if non-NULL)
* - mark allocated the resources in 'allocated' (if non-NULL and not json NULL)
*/
static struct rlist *get_resource (const json_t *R,
const struct idset *exclude,
const struct idset *down,
struct idset *drain,
json_t *allocated)
struct idset *drain)
{
json_t *cpy;
struct rlist *rl;
Expand All @@ -180,9 +254,7 @@ static struct rlist *get_resource (const json_t *R,
if (rlist_remove_ranks (rl, (struct idset *)exclude) < 0)
goto error;

Check warning on line 255 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L255

Added line #L255 was not covered by tests
}
if (mark_down (rl, down) < 0
|| mark_down (rl, drain) < 0
|| mark_allocated (rl, allocated) < 0)
if (mark_down (rl, down) < 0 || mark_down (rl, drain) < 0)
goto error;

Check warning on line 258 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L258

Added line #L258 was not covered by tests
json_decref (cpy);
return rl;
Expand All @@ -206,7 +278,7 @@ static json_t *prepare_response (struct status *status, json_t *allocated)

if (!(R = inventory_get (ctx->inventory))
|| !(drain_info = drain_get_info (ctx->drain))
|| !(rl = get_resource (R, exclude, down, drain, allocated)))
|| !(rl = get_resource (R, exclude, down, drain)))
goto error;

Check warning on line 282 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L282

Added line #L282 was not covered by tests
if (!(o = json_pack ("{s:O s:O}", "R", R, "drain", drain_info))) {
errno = ENOMEM;
Expand All @@ -215,7 +287,7 @@ static json_t *prepare_response (struct status *status, json_t *allocated)
if (rutil_set_json_idset (o, "online", monitor_get_up (ctx->monitor)) < 0
|| rutil_set_json_idset (o, "offline", down) < 0
|| rutil_set_json_idset (o, "exclude", exclude) < 0
|| set_resource_status (o, rl) < 0)
|| set_resource_status (o, rl, allocated) < 0)
goto error;

Check warning on line 291 in src/modules/resource/status.c

View check run for this annotation

Codecov / codecov/patch

src/modules/resource/status.c#L291

Added line #L291 was not covered by tests

json_decref (drain_info);
Expand Down

0 comments on commit b4080d3

Please sign in to comment.