Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add by_rank map to graph store #674

Merged
merged 8 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions resource/modules/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ sched_fluxion_resource_la_CXXFLAGS = \
$(FLUX_CORE_CFLAGS)
sched_fluxion_resource_la_LIBADD = \
../libresource.la \
$(FLUX_IDSET_LIBS) \
$(FLUX_CORE_LIBS) \
$(DL_LIBS) \
$(HWLOC_LIBS) \
Expand Down
78 changes: 75 additions & 3 deletions resource/modules/resource_match.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ extern "C" {
#include "config.h"
#endif
#include <flux/core.h>
#include <flux/idset.h>
#include <jansson.h>
#include "src/common/libutil/shortjansson.h"
}
Expand Down Expand Up @@ -91,6 +92,7 @@ resource_ctx_t::~resource_ctx_t ()
flux_msg_handler_delvec (handlers);
}


/******************************************************************************
* *
* Request Handler Prototypes *
Expand Down Expand Up @@ -1114,26 +1116,96 @@ static void info_request_cb (flux_t *h, flux_msg_handler_t *w,
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);
}

static int get_stat_by_rank (std::shared_ptr<resource_ctx_t>& ctx, json_t *o)
{
int rc = -1;
int saved_errno = 0;
char *str = nullptr;
struct idset *ids = nullptr;
std::map<size_t, struct idset *> s2r;

for (auto &kv : ctx->db->metadata.by_rank) {
if (kv.first == -1)
continue;
SteVwonder marked this conversation as resolved.
Show resolved Hide resolved
if (s2r.find (kv.second.size ()) == s2r.end ()) {
if ( !(ids = idset_create (0, IDSET_FLAG_AUTOGROW)))
goto done;
s2r[kv.second.size ()] = ids;
}
if ( (rc = idset_set (s2r[kv.second.size ()],
static_cast<unsigned int> (kv.first))) < 0)
goto done;
}

for (auto &kv : s2r) {
if ( !(str = idset_encode (kv.second,
IDSET_FLAG_BRACKETS | IDSET_FLAG_RANGE))) {
rc = -1;
goto done;
}
if ( (rc = json_object_set_new (o, str,
json_integer (static_cast<json_int_t> (kv.first)))) < 0) {
errno = ENOMEM;
goto done;
}
saved_errno = errno;
free (str);
errno = saved_errno;
str = nullptr;
}

done:
for (auto &kv : s2r)
idset_destroy (kv.second);
saved_errno = errno;
s2r.clear ();
free (str);
errno = saved_errno;
return rc;
}

static void stat_request_cb (flux_t *h, flux_msg_handler_t *w,
const flux_msg_t *msg, void *arg)
{
std::shared_ptr<resource_ctx_t> ctx = getctx ((flux_t *)arg);
int saved_errno;
json_t *o = nullptr;
double avg = 0.0f;
double min = 0.0f;

if (ctx->perf.njobs) {
avg = ctx->perf.accum / (double)ctx->perf.njobs;
min = ctx->perf.min;
}
if (flux_respond_pack (h, msg, "{s:I s:I s:f s:I s:f s:f s:f}",
if ( !(o = json_object ())) {
errno = ENOMEM;
goto error;
}
if (get_stat_by_rank (ctx, o) < 0) {
flux_log_error (h, "%s: get_stat_by_rank", __FUNCTION__);
goto error_free;
}
if (flux_respond_pack (h, msg, "{s:I s:I s:o s:f s:I s:f s:f s:f}",
"V", num_vertices (ctx->db->resource_graph),
"E", num_edges (ctx->db->resource_graph),
"by_rank", o,
"load-time", ctx->perf.load,
"njobs", ctx->perf.njobs,
"min-match", min,
"max-match", ctx->perf.max,
"avg-match", avg) < 0)
flux_log_error (h, "%s", __FUNCTION__);
"avg-match", avg) < 0) {
flux_log_error (h, "%s: flux_respond_pack", __FUNCTION__);
}

return;

error_free:
saved_errno = errno;
json_decref (o);
errno = saved_errno;
error:
if (flux_respond_error (h, msg, errno, NULL) < 0)
flux_log_error (h, "%s: flux_respond_error", __FUNCTION__);
}

static inline int64_t next_jobid (const std::map<uint64_t,
Expand Down
1 change: 1 addition & 0 deletions resource/readers/resource_reader_grug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ vtx_t dfs_emitter_t::emit_vertex (ggv_t u, gge_t e, const gg_t &recipe,
m.by_path[g[v].paths[ssys]] = v;
m.by_type[g[v].type].push_back (v);
m.by_name[g[v].name].push_back (v);
m.by_rank[m_rank].push_back (v);
return v;
}

Expand Down
1 change: 1 addition & 0 deletions resource/readers/resource_reader_hwloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ vtx_t resource_reader_hwloc_t::add_new_vertex (resource_graph_t &g,
m.by_path[g[v].paths[subsys]] = v;
m.by_type[g[v].type].push_back (v);
m.by_name[g[v].name].push_back (v);
m.by_rank[rank].push_back (v);
return v;
}

Expand Down
1 change: 1 addition & 0 deletions resource/readers/resource_reader_jgf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ int resource_reader_jgf_t::add_graph_metadata (vtx_t v,
}
m.by_type[g[v].type].push_back (v);
m.by_name[g[v].name].push_back (v);
m.by_rank[g[v].rank].push_back (v);
rc = 0;

done:
Expand Down
1 change: 1 addition & 0 deletions resource/store/resource_graph_store.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ struct resource_graph_metadata_t {
std::map<subsystem_t, relation_infra_t> v_rt_edges;
std::map<std::string, std::vector <vtx_t>> by_type;
std::map<std::string, std::vector <vtx_t>> by_name;
std::map<int64_t, std::vector <vtx_t>> by_rank;
milroy marked this conversation as resolved.
Show resolved Hide resolved
std::map<std::string, vtx_t> by_path;
};

Expand Down
5 changes: 5 additions & 0 deletions resource/utilities/resource-query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,11 @@ static int populate_resource_db (std::shared_ptr<resource_context_t> &ctx)
ctx->db->metadata.by_name.size () << std::endl;
std::cout << "INFO: by_path Key-Value Pairs: " <<
ctx->db->metadata.by_path.size () << std::endl;
for (auto it = ctx->db->metadata.by_rank.begin ();
it != ctx->db->metadata.by_rank.end (); ++it) {
std::cout << "INFO: number of vertices with rank "
<< it->first << ": " << it->second.size () << "\n";
}
}

done:
Expand Down
3 changes: 2 additions & 1 deletion t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ TESTS = \
t4009-match-update.t \
t5000-valgrind.t \
t6000-graph-size.t \
t6001-match-formats.t
t6001-match-formats.t \
t6002-graph-hwloc.t

check_SCRIPTS = $(TESTS)

Expand Down
1 change: 1 addition & 0 deletions t/scripts/flux-ion-resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def stat_action (args):
resp = r.rpc_stat ()
print ("Num. of Vertices: ", resp['V'])
print ("Num. of Edges: ", resp['E'])
print ("Num. of Vertices by Rank: ", json.dumps (resp['by_rank']))
print ("Graph Load Time: ", resp['load-time'], "Secs")
print ("Num. of Jobs Matched: ", resp['njobs'])
print ("Min. Match Time: ", resp['min-match'], "Secs")
Expand Down
42 changes: 33 additions & 9 deletions t/t6000-graph-size.t
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,10 @@ test_expect_success "vertex/edge counts for a tiny machine are correct" '
test ${edg} -eq 198
'

test_expect_success "by_type, by_name, and by_path map sizes are correct \
for GRUG" '
# Note that by default the rank is -1, meaning that the by_rank map
# contains the same number of vertices as the overall resource graph.
test_expect_success "by_type, by_name, by_path, and by_rank map sizes are \
correct for GRUG" '
echo "quit" > input2.txt &&
${query} -L ${tiny_grug} -e -S CA -P high < input2.txt > out2.txt &&
by_type=$(grep "by_type" out2.txt | sed "s/INFO: by_type Key-Value \
Expand All @@ -30,13 +32,21 @@ Pairs: //") &&
Pairs: //") &&
by_path=$(grep "by_path" out2.txt | sed "s/INFO: by_path Key-Value \
Pairs: //") &&
by_rank=$(grep "number of" out2.txt | sed "s/INFO: number of \
vertices with rank //") &&
rank=$( echo ${by_rank} | sed "s/:.*//" ) &&
nvertices=$( echo ${by_rank} | sed "s/[^:]*://" ) &&
test ${by_type} -eq 7 &&
test ${by_name} -eq 52 &&
test ${by_path} -eq 100
test ${by_path} -eq 100 &&
test ${rank} -eq -1 &&
test ${nvertices} -eq 100
'

test_expect_success "by_type, by_name, and by_path map sizes are correct \
for JGF" '
# Note that by default the rank is -1, meaning that the by_rank map
# contains the same number of vertices as the overall resource graph.
test_expect_success "by_type, by_name, by_path, and by_rank map sizes are \
milroy marked this conversation as resolved.
Show resolved Hide resolved
correct for JGF." '
echo "quit" > input3.txt &&
${query} -L ${tiny_jgf} -e -S CA -P high -f jgf < input3.txt > \
out3.txt &&
Expand All @@ -46,13 +56,21 @@ Pairs: //") &&
Pairs: //") &&
by_path=$(grep "by_path" out3.txt | sed "s/INFO: by_path Key-Value \
Pairs: //") &&
by_rank=$(grep "number of" out3.txt | sed "s/INFO: number of \
vertices with rank //") &&
rank=$( echo ${by_rank} | sed "s/:.*//" ) &&
nvertices=$( echo ${by_rank} | sed "s/[^:]*://" ) &&
test ${by_type} -eq 7 &&
test ${by_name} -eq 52 &&
test ${by_path} -eq 100
test ${by_path} -eq 100 &&
test ${rank} -eq -1 &&
test ${nvertices} -eq 100
'

test_expect_success "by_type, by_name, and by_path map sizes are correct \
for hwloc" '
# Note that by default the rank is -1, meaning that the by_rank map
# contains the same number of vertices as the overall resource graph.
test_expect_success "by_type, by_name, by_path, and by_rank map sizes are \
correct for hwloc" '
echo "quit" > input4.txt &&
${query} -L ${exclusive_001N_hwloc} -e -S CA -P high -f hwloc < \
input4.txt > out4.txt &&
Expand All @@ -62,9 +80,15 @@ Pairs: //") &&
Pairs: //") &&
by_path=$(grep "by_path" out4.txt | sed "s/INFO: by_path Key-Value \
Pairs: //") &&
by_rank=$(grep "number of" out4.txt | sed "s/INFO: number of \
vertices with rank //") &&
rank=$( echo ${by_rank} | sed "s/:.*//" ) &&
nvertices=$( echo ${by_rank} | sed "s/[^:]*://" ) &&
test ${by_type} -eq 7 &&
test ${by_name} -eq 21 &&
test ${by_path} -eq 21
test ${by_path} -eq 21 &&
test ${rank} -eq -1 &&
test ${nvertices} -eq 21
'

test_expect_success "--reserve-vtx-vec works" '
Expand Down
41 changes: 41 additions & 0 deletions t/t6002-graph-hwloc.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/sh

test_description='Test Graph Data Store under the Fluxion Resource Module'

. `dirname $0`/sharness.sh

hwloc_basepath=`readlink -e ${SHARNESS_TEST_SRCDIR}/data/hwloc-data`
# 4 brokers, each (exclusively) have: 1 node, 2 sockets, 16 cores (8 per socket)
excl_4N4B="${hwloc_basepath}/004N/exclusive/04-brokers"

verify() {
local of=$1
echo "{\"[0-3]\": 37}" | jq ' ' > ref.out
cat ${of} | grep Rank: | awk '{ print $6 $7}' | jq ' ' > cmp.out
diff cmp.out ref.out
return $?
}

skip_all_unless_have jq

test_under_flux 4

test_expect_success 'qmanager: hwloc reload works' '
flux hwloc reload ${excl_4N4B}
'

test_expect_success 'qmanager: loading resource and qmanager modules works' '
flux module remove sched-simple &&
load_resource prune-filters=ALL:core subsystems=containment policy=low
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Mainly a note to myself)

We may need to add an whitelist/allowlist here to make this test work under both hwloc1 and hwloc2, since the number of resources under each execution target may differ (e.g., numanode vs no numanode). Since this PR is on the critical path, I wouldn't worry about it right now. If it does turn out to be an issue, I can handle it in my upcoming hwloc2 PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks and this makes sense. Yes, let's target this work when hwloc2 PR is posted. We can either solve this w/ allowlist or by calculating number of ranks x per rank vertices = overall vertex count.

'

test_expect_success 'qmanager: graph stat as expected' '
flux ion-resource stat > stat.out &&
verify stat.out
'

test_expect_success 'removing resource and qmanager modules' '
remove_resource
'

test_done