Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a set-status RPC for marking vertices up or down #1110

Merged
merged 4 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions resource/modules/resource_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,9 @@
static void params_request_cb (flux_t *h, flux_msg_handler_t *w,
const flux_msg_t *msg, void *arg);

static void set_status_request_cb (flux_t *h, flux_msg_handler_t *w,
const flux_msg_t *msg, void *arg);

static const struct flux_msg_handler_spec htab[] = {
{ FLUX_MSGTYPE_REQUEST,
"sched-fluxion-resource.match", match_request_cb, 0 },
Expand Down Expand Up @@ -288,6 +291,8 @@
"sched-fluxion-resource.satisfiability", satisfiability_request_cb, 0 },
{ FLUX_MSGTYPE_REQUEST,
"sched-fluxion-resource.params", params_request_cb, 0 },
{ FLUX_MSGTYPE_REQUEST,
"sched-fluxion-resource.set_status", set_status_request_cb, 0 },
FLUX_MSGHANDLER_TABLE_END
};

Expand Down Expand Up @@ -2633,6 +2638,57 @@
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);
}

/*
* Mark a vertex as up or down
*/
static void set_status_request_cb (flux_t *h, flux_msg_handler_t *w,
const flux_msg_t *msg, void *arg)
{
const char *rp = NULL, *st = NULL;
std::string resource_path = "", status = "", errmsg = "";
std::shared_ptr<resource_ctx_t> ctx = getctx ((flux_t *)arg);
resource_pool_t::string_to_status sts = resource_pool_t::str_to_status;
std::map<std::string, std::vector<vtx_t>>::const_iterator it {};
resource_pool_t::string_to_status::iterator status_it {};

if (flux_request_unpack (msg, NULL, "{s:s, s:s}",
"resource_path", &rp,
"status", &st) < 0){
errmsg = "malformed RPC";
goto error;

Check warning on line 2658 in resource/modules/resource_match.cpp

View check run for this annotation

Codecov / codecov/patch

resource/modules/resource_match.cpp#L2657-L2658

Added lines #L2657 - L2658 were not covered by tests
}
resource_path = rp;
status = st;
// check that the path/vertex exists
it = ctx->db->metadata.by_path.find (resource_path);
if (it == ctx->db->metadata.by_path.end ()) {
errmsg = "could not find path '" + resource_path + "' in resource graph";
goto error;
}
// check that the status given is valid ('up' or 'down')
status_it = sts.find (status);
if (status_it == sts.end ()) {
errmsg = "unrecognized status '" + status + "'";
goto error;
}
// mark the vertex
if (ctx->traverser->mark (resource_path, status_it->second) < 0) {
flux_log_error (h, "%s: traverser::mark: %s", __FUNCTION__,
ctx->traverser->err_message ().c_str ());
ctx->traverser->clear_err_message ();
errmsg = "Failed to set status of resource vertex";
goto error;

Check warning on line 2680 in resource/modules/resource_match.cpp

View check run for this annotation

Codecov / codecov/patch

resource/modules/resource_match.cpp#L2676-L2680

Added lines #L2676 - L2680 were not covered by tests
}
if (flux_respond (h, msg, NULL) < 0) {
flux_log_error (h, "%s: flux_respond", __FUNCTION__);

Check warning on line 2683 in resource/modules/resource_match.cpp

View check run for this annotation

Codecov / codecov/patch

resource/modules/resource_match.cpp#L2683

Added line #L2683 was not covered by tests
}
return;

error:
if (flux_respond_error (h, msg, EINVAL, errmsg.c_str ()) < 0)
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);

Check warning on line 2689 in resource/modules/resource_match.cpp

View check run for this annotation

Codecov / codecov/patch

resource/modules/resource_match.cpp#L2689

Added line #L2689 was not covered by tests
return;
}

/******************************************************************************
* *
Expand Down
1 change: 1 addition & 0 deletions t/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ set(ALL_TESTS
t4009-match-update.t
t4010-match-conf.t
t4011-match-duration.t
t4012-set-status.t
t5000-valgrind.t
t6000-graph-size.t
t6001-match-formats.t
Expand Down
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ TESTS = \
t4009-match-update.t \
t4010-match-conf.t \
t4011-match-duration.t \
t4012-set-status.t \
t5000-valgrind.t \
t5100-issues-test-driver.t \
t6000-graph-size.t \
Expand Down
31 changes: 29 additions & 2 deletions t/scripts/flux-ion-resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# FLUX_EXEC_PATH or `flux python flux-ion-resource` if not to
# avoid python version mismatch
#
from __future__ import print_function
import argparse
import errno
import yaml
Expand Down Expand Up @@ -94,6 +93,10 @@ def rpc_find(self, criteria, find_format=None):
def rpc_status(self):
return self.f.rpc("sched-fluxion-resource.status").get()

def rpc_set_status(self, resource_path, status):
payload = {"resource_path": resource_path, "status": status}
return self.f.rpc("sched-fluxion-resource.set_status", payload).get()

def rpc_namespace_info(self, rank, type_name, identity):
payload = {"rank": rank, "type-name": type_name, "id": identity}
return self.f.rpc("sched-fluxion-resource.ns-info", payload).get()
Expand Down Expand Up @@ -288,6 +291,16 @@ def status_action(args):
print(json.dumps(resp))


"""
Action for set-status sub-command
"""


def set_status_action(args):
r = ResourceModuleInterface()
r.rpc_set_status(args.resource_path, args.status)


"""
Action for ns-info sub-command
"""
Expand Down Expand Up @@ -339,8 +352,9 @@ def main():
istr = "Print info on a single job."
sstr = "Print overall performance statistics."
cstr = "Cancel an allocated or reserved job."
fstr = "Find resources matching with a crieria."
fstr = "Find resources matching with a criteria."
ststr = "Display resource status."
ssstr = "Set up/down status of a resource vertex."
pstr = "Set property-key=value for specified resource."
gstr = "Get value for specified resource and property-key."
nstr = "Get remapped ID given raw ID seen by the reader."
Expand All @@ -352,6 +366,7 @@ def main():
parser_c = subpar.add_parser("cancel", help=cstr, description=cstr)
parser_f = subpar.add_parser("find", help=fstr, description=fstr)
parser_st = subpar.add_parser("status", help=ststr, description=ststr)
parser_ss = subpar.add_parser("set-status", help=ssstr, description=ssstr)
parser_sp = subpar.add_parser("set-property", help=pstr, description=pstr)
parser_gp = subpar.add_parser("get-property", help=gstr, description=gstr)
parser_n = subpar.add_parser("ns-info", help=nstr, description=nstr)
Expand Down Expand Up @@ -419,6 +434,18 @@ def main():
st_help = "Resource status"
parser_st.set_defaults(func=status_action)

# Positional argument for set-status sub-command
#
parser_ss.add_argument(
"resource_path",
help="path to vertex",
)
parser_ss.add_argument(
"status",
help="status of vertex",
)
parser_ss.set_defaults(func=set_status_action)

#
# Positional argument for match allocate sub-command
#
Expand Down
1 change: 0 additions & 1 deletion t/scripts/flux-jsonschemalint.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# FLUX_EXEC_PATH or `flux python flux-jsonschemalint` if not to
# avoid python version mismatch
#
from __future__ import print_function

import argparse
import errno
Expand Down
84 changes: 84 additions & 0 deletions t/t4012-set-status.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/sh
#set -x

test_description='Test the basic functionality of properties (get/set) within resource
'

. `dirname $0`/sharness.sh

grug="${SHARNESS_TEST_SRCDIR}/data/resource/grugs/tiny.graphml"
jobspec="${SHARNESS_TEST_SRCDIR}/data/resource/jobspecs/basics/test008.yaml"

#
# test_under_flux is under sharness.d/
#
test_under_flux 1

#
# print only with --debug
#

test_debug '
echo ${grug}
'

test_expect_success 'loading resource module with a tiny machine config works' '
load_resource \
load-file=${grug} load-format=grug \
prune-filters=ALL:core subsystems=containment policy=high
'

test_expect_success 'set-status basic test works' '
flux ion-resource find status=down | grep null &&
flux ion-resource set-status /tiny0/rack0/node0 down &&
flux ion-resource find status=down | grep node0 &&
flux ion-resource set-status /tiny0/rack0/node0 up &&
flux ion-resource find status=down | grep null
'

test_expect_success 'bad resource path produces an error' '
test_must_fail flux ion-resource set-status /foobar/not/a/vertex down
'

test_expect_success 'bad status produces an error' '
test_must_fail flux ion-resource set-status /tiny0/rack0/node0 foobar
'

test_expect_success 'set-status not-so-basic test works' '
flux ion-resource find status=down | grep null &&
flux ion-resource set-status /tiny0/rack0/node0 down &&
flux ion-resource find status=down | grep node0 &&
flux ion-resource set-status /tiny0/rack0/node1 down &&
flux ion-resource find status=down | grep "node\[0-1\]" &&
flux ion-resource set-status /tiny0/rack0/node0 up &&
flux ion-resource find status=down | grep node1 &&
flux ion-resource set-status /tiny0/rack0/node1 up &&
flux ion-resource find status=down | grep null
'

test_expect_success 'jobs fail when all nodes are marked down' '
flux ion-resource set-status /tiny0/rack0/node0 down &&
flux ion-resource set-status /tiny0/rack0/node1 down &&
flux ion-resource find status=up | grep null &&
flux ion-resource match satisfiability $jobspec &&
test_must_fail flux ion-resource match allocate $jobspec &&
flux ion-resource set-status /tiny0/rack0/node0 up &&
flux ion-resource set-status /tiny0/rack0/node1 up &&
flux ion-resource find status=down | grep null
'

test_expect_success 'jobs fail when all racks are marked down' '
flux ion-resource find status=down | grep null &&
flux ion-resource set-status /tiny0/rack0 down &&
flux ion-resource find status=up | grep null &&
flux ion-resource match satisfiability $jobspec &&
test_must_fail flux ion-resource match allocate $jobspec &&
flux ion-resource set-status /tiny0/rack0 up &&
flux ion-resource find status=down | grep null
'

test_expect_success 'removing resource works' '
remove_resource
'

test_done
Loading