From 43ff3d0886da7971237a49f1a564112a8a3be949 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Thu, 14 Mar 2024 14:21:11 -0700 Subject: [PATCH] convert users from sched to resource status RPC Problem: the sched.resource-status RPC is slow when the scheduler is busy, which makes python users and tools such as 'flux resource status' unresponsive. Now that resource.status returns a superset of of sched.resource-status information, switch the python bindings, flux-top(1), and flux-resource(1) to use resource.status. --- src/bindings/python/flux/job/info.py | 2 +- src/bindings/python/flux/resource/list.py | 4 ++-- src/cmd/top/summary_pane.c | 6 +++--- src/modules/resource/monitor.c | 4 +--- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/bindings/python/flux/job/info.py b/src/bindings/python/flux/job/info.py index a2170777b5e0..c1578ef919f9 100644 --- a/src/bindings/python/flux/job/info.py +++ b/src/bindings/python/flux/job/info.py @@ -227,7 +227,7 @@ def __init__(self, uri=None): if not uri or SchedResourceList is None: raise ValueError handle = flux.Flux(str(uri)) - future = handle.rpc("sched.resource-status") + future = handle.rpc("resource.status") self.stats = StatsInfo(handle).update_sync() self.resources = SchedResourceList(future.get()) self.initialized = True diff --git a/src/bindings/python/flux/resource/list.py b/src/bindings/python/flux/resource/list.py index cb7a3ecd6753..e3b43191eb2f 100644 --- a/src/bindings/python/flux/resource/list.py +++ b/src/bindings/python/flux/resource/list.py @@ -16,7 +16,7 @@ class SchedResourceList: """ - Encapsulate response from sched.resource-status query. + Encapsulate response from resource.status query. The response will contain 3 Rv1 resource sets: :ivar all: all resources known to scheduler @@ -98,4 +98,4 @@ def resource_list(flux_handle): Returns: ResourceListRPC: a future representing the request. """ - return ResourceListRPC(flux_handle, "sched.resource-status") + return ResourceListRPC(flux_handle, "resource.status") diff --git a/src/cmd/top/summary_pane.c b/src/cmd/top/summary_pane.c index b9d1d338edb1..abb28fb036d2 100644 --- a/src/cmd/top/summary_pane.c +++ b/src/cmd/top/summary_pane.c @@ -408,7 +408,7 @@ static void resource_continuation (flux_future_t *f, void *arg) if (flux_rpc_get_unpack (f, "o", &o) < 0) { if (errno != ENOSYS) /* Instance may not be up yet */ - fatal (errno, "sched.resource-status RPC failed"); + fatal (errno, "resource.status RPC failed"); } else { json_t *queue_constraint; @@ -432,7 +432,7 @@ static void resource_continuation (flux_future_t *f, void *arg) &sum->core.down, &sum->gpu.down, queue_constraint) < 0) - fatal (0, "error decoding sched.resource-status RPC response"); + fatal (0, "error decoding resource.status RPC response"); } flux_future_destroy (f); sum->f_resource = NULL; @@ -539,7 +539,7 @@ void summary_pane_query (struct summary_pane *sum) { if (!sum->f_resource) { if (!(sum->f_resource = flux_rpc (sum->top->h, - "sched.resource-status", + "resource.status", NULL, 0, 0)) diff --git a/src/modules/resource/monitor.c b/src/modules/resource/monitor.c index f55809cd9cc7..21a5701b5f79 100644 --- a/src/modules/resource/monitor.c +++ b/src/modules/resource/monitor.c @@ -30,9 +30,7 @@ * ranks in the initial program for the same reason as above. * - the 'resource.monitor-waitup' RPC allows a test to wait for some number * of ranks to be up, where "up" is defined as having had an online event - * posted. Thus, after waiting, resource.status (flux resource status) - * should show those ranks up, while sched.resource-status - * (flux resource list command) may still show them down. + * posted. */ #if HAVE_CONFIG_H