From 2f11ff600848b17d246e73bf824ad23895cb21fd Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Wed, 3 Aug 2022 12:59:59 -0700 Subject: [PATCH 1/8] broker: drop unnecessary include directive Problem: kary.h is included by broker.c but that class is not used there. Drop include directive. --- src/broker/broker.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/broker/broker.c b/src/broker/broker.c index b406e3418252..3300096c0325 100644 --- a/src/broker/broker.c +++ b/src/broker/broker.c @@ -37,7 +37,6 @@ #include "src/common/libutil/cleanup.h" #include "src/common/libidset/idset.h" #include "src/common/libutil/ipaddr.h" -#include "src/common/libutil/kary.h" #include "src/common/libpmi/pmi.h" #include "src/common/libpmi/pmi_strerror.h" #include "src/common/libutil/fsd.h" From 9b77c3e0118bc58c997cd088bd167364fa57651e Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 7 Aug 2022 06:11:45 -0700 Subject: [PATCH 2/8] broker: fix comment typo Problem: overlay function comment refers to itself using the name of another function, potentially creating confusion. Correct comment. --- src/broker/overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/broker/overlay.c b/src/broker/overlay.c index adce6f9afc3e..76faf71b35cc 100644 --- a/src/broker/overlay.c +++ b/src/broker/overlay.c @@ -406,7 +406,7 @@ static void update_torpid_children (struct overlay *ov) } } -/* N.B. overlay_child_status_update() ensures child_lookup() only +/* N.B. overlay_child_status_update() ensures child_lookup_online() only * succeeds for online peers. */ static struct child *child_lookup_online (struct overlay *ov, const char *id) From 7a44edd7a91b73ef1969b1f352ffb885db1107db Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Wed, 3 Aug 2022 15:24:02 -0700 Subject: [PATCH 3/8] broker: add topology class Problem: the kary class used by the overlay network to calculate routes and peers is limited to complete k-ary trees, but soon non-complete tree topologies will be required. Add a new topology class which represents arbitrary tree topologies. The default topology is flat. A complete k-ary topology may be set with topology_set_kary(). Methods are added to support the queries needed by the broker overlay network. Add unit test. --- src/broker/Makefile.am | 12 +- src/broker/test/topology.c | 337 +++++++++++++++++++++++++++++++++++++ src/broker/topology.c | 305 +++++++++++++++++++++++++++++++++ src/broker/topology.h | 59 +++++++ 4 files changed, 711 insertions(+), 2 deletions(-) create mode 100644 src/broker/test/topology.c create mode 100644 src/broker/topology.c create mode 100644 src/broker/topology.h diff --git a/src/broker/Makefile.am b/src/broker/Makefile.am index 87048e582888..3047ec300bf6 100644 --- a/src/broker/Makefile.am +++ b/src/broker/Makefile.am @@ -65,7 +65,9 @@ libbroker_la_SOURCES = \ groups.h \ groups.c \ shutdown.h \ - shutdown.c + shutdown.c \ + topology.h \ + topology.c flux_broker_LDADD = \ $(builddir)/libbroker.la \ @@ -88,7 +90,8 @@ TESTS = test_attr.t \ test_pmiutil.t \ test_boot_config.t \ test_runat.t \ - test_overlay.t + test_overlay.t \ + test_topology.t test_ldadd = \ $(builddir)/libbroker.la \ @@ -150,3 +153,8 @@ test_overlay_t_SOURCES = test/overlay.c test_overlay_t_CPPFLAGS = $(test_cppflags) test_overlay_t_LDADD = $(test_ldadd) test_overlay_t_LDFLAGS = $(test_ldflags) + +test_topology_t_SOURCES = test/topology.c +test_topology_t_CPPFLAGS = $(test_cppflags) +test_topology_t_LDADD = $(test_ldadd) +test_topology_t_LDFLAGS = $(test_ldflags) diff --git a/src/broker/test/topology.c b/src/broker/test/topology.c new file mode 100644 index 000000000000..7bd2aea242ac --- /dev/null +++ b/src/broker/test/topology.c @@ -0,0 +1,337 @@ +/************************************************************\ + * Copyright 2022 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +#if HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#include "src/common/libtap/tap.h" + +#include "src/broker/topology.h" + +void check_subtree (json_t *o, + const char *s, + int exp_rank, + int exp_size, + size_t exp_count) +{ + int rank = -1; + int size = -1; + json_t *children = NULL; + int rc = -1; + + if (o) { + rc = json_unpack (o, + "{s:i s:i s:o}", + "rank", &rank, + "size", &size, + "children", &children); + } + + diag ("rank=%d size=%d children=%zu", + rank, + size, + children ? json_array_size (children) : -1); + + ok (rc == 0 + && rank == exp_rank + && size == exp_size + && children != NULL + && json_array_size (children) == exp_count, + "topology_get_json_subtree_at %s returns expected object", s); +} + +void test_flat (void) +{ + struct topology *topo; + int child_ranks[15]; + json_t *o; + bool pass; + + topo = topology_create (16); + ok (topo != NULL, + "topology_create size=16 works"); + ok (topology_set_kary (topo, 0) == 0, + "topology_set_kary k=0 allowed to indicate flat topo"); + ok (topology_get_size (topo) == 16, + "topology_get_size returns 16"); + ok (topology_get_rank (topo) == 0, + "topology_get_rank returns 0"); + ok (topology_get_parent (topo) < 0, + "topology_get_parent fails"); + ok (topology_get_child_ranks (topo, child_ranks, 15) == 15, + "topology_get_child_ranks returns 15"); + + pass = true; + for (int i = 0; i < 15; i++) { + if (child_ranks[i] != i + 1) + pass = false; + } + + ok (pass == true, + "child_ranks array contains ranks 1-15"); + ok (topology_get_level (topo) == 0, + "topology_get_level returns 0"); + ok (topology_get_maxlevel (topo) == 1, + "topology_get_maxlevel returns 1"); + ok (topology_get_descendant_count (topo) == 15, + "topology_get_descendant_count returns 15"); + ok (topology_get_child_route (topo, 5) == 5, + "topology_get_child_route rank=5 returns 5"); + + o = topology_get_json_subtree_at (topo, 0); + check_subtree (o, "rank=0", 0, 16, 15); + json_decref (o); + o = topology_get_json_subtree_at (topo, 15); + check_subtree (o, "rank=15", 15, 1, 0); + json_decref (o); + + ok (topology_incref (topo) == topo, + "topology_incref returns topo pointer"); + topology_decref (topo); + topology_decref (topo); +} + +void test_k1 (void) +{ + struct topology *topo; + int child_ranks[15]; + json_t *o; + + topo = topology_create (16); + ok (topo != NULL, + "topology_create size=16 works"); + ok (topology_set_kary (topo, 1) == 0, + "topology_set_kary k=1 works"); + ok (topology_get_rank (topo) == 0, + "topology_get_rank returns 0"); + ok (topology_get_size (topo) == 16, + "topology_get_size returns 16"); + ok (topology_get_parent (topo) < 0, + "topology_get_parent fails"); + ok (topology_get_child_ranks (topo, child_ranks, 15) == 1, + "topology_get_child_ranks returns 1"); + + ok (child_ranks[0] == 1, + "child_ranks array contains ranks 1"); + ok (topology_get_level (topo) == 0, + "topology_get_level returns 0"); + ok (topology_get_maxlevel (topo) == 15, + "topology_get_maxlevel returns 15"); + ok (topology_get_descendant_count (topo) == 15, + "topology_get_descendant_count returns 15"); + ok (topology_get_child_route (topo, 5) == 1, + "topology_get_child_route rank=5 returns 1"); + + o = topology_get_json_subtree_at (topo, 0); + check_subtree (o, "rank=0", 0, 16, 1); + json_decref (o); + o = topology_get_json_subtree_at (topo, 1); + check_subtree (o, "rank=1", 1, 15, 1); + json_decref (o); + o = topology_get_json_subtree_at (topo, 15); + check_subtree (o, "rank=15", 15, 1, 0); + json_decref (o); + + topology_decref (topo); +} + +void test_k2 (void) +{ + struct topology *topo; + int child_ranks[15]; + json_t *o; + + topo = topology_create (16); + ok (topo != NULL, + "topology_create size=16 works"); + ok (topology_set_kary (topo, 2) == 0, + "topology_set_kary k=2 works"); + ok (topology_get_rank (topo) == 0, + "topology_get_rank returns 0"); + ok (topology_get_size (topo) == 16, + "topology_get_size returns 16"); + ok (topology_get_parent (topo) < 0, + "topology_get_parent fails"); + ok (topology_get_child_ranks (topo, child_ranks, 15) == 2, + "topology_get_child_ranks returns 2"); + + ok (child_ranks[0] == 1 && child_ranks[1] == 2, + "child_ranks array contains ranks 1-2"); + ok (topology_get_level (topo) == 0, + "topology_get_level returns 0"); + ok (topology_get_maxlevel (topo) == 4, + "topology_get_maxlevel returns 4"); + ok (topology_get_descendant_count (topo) == 15, + "topology_get_descendant_count returns 15"); + ok (topology_get_child_route (topo, 5) == 2, + "topology_get_child_route rank=5 returns 2"); + + o = topology_get_json_subtree_at (topo, 0); + check_subtree (o, "rank=0", 0, 16, 2); + json_decref (o); + o = topology_get_json_subtree_at (topo, 1); + check_subtree (o, "rank=1", 1, 8, 2); + json_decref (o); + o = topology_get_json_subtree_at (topo, 2); + check_subtree (o, "rank=2", 2, 7, 2); + json_decref (o); + o = topology_get_json_subtree_at (topo, 3); + check_subtree (o, "rank=3", 3, 4, 2); + json_decref (o); + o = topology_get_json_subtree_at (topo, 4); + check_subtree (o, "rank=4", 4, 3, 2); + json_decref (o); + o = topology_get_json_subtree_at (topo, 15); + check_subtree (o, "rank=15", 15, 1, 0); + json_decref (o); + + topology_decref (topo); +} + +void test_k2_router (void) +{ + struct topology *topo; + int child_ranks[15]; + json_t *o; + + topo = topology_create (16); + ok (topo != NULL, + "topology_create size=16 works"); + ok (topology_set_kary (topo, 2) == 0, + "topology_set_kary k=2 works"); + ok (topology_set_rank (topo, 1) == 0, + "topology_set_rank 1 works"); + ok (topology_get_rank (topo) == 1, + "topology_get_rank returns 1"); + ok (topology_get_size (topo) == 16, + "topology_get_size returns 16"); + ok (topology_get_parent (topo) == 0, + "topology_get_parent returns 0"); + ok (topology_get_child_ranks (topo, child_ranks, 15) == 2, + "topology_get_child_ranks returns 2"); + ok (child_ranks[0] == 3 && child_ranks[1] == 4, + "child_ranks array contains ranks 3-4"); + ok (topology_get_level (topo) == 1, + "topology_get_level returns 1"); + ok (topology_get_maxlevel (topo) == 4, + "topology_get_maxlevel returns 4"); + ok (topology_get_descendant_count (topo) == 7, + "topology_get_descendant_count returns 7"); + ok (topology_get_child_route (topo, 10) == 4, + "topology_get_child_route rank=10 returns 4"); + + o = topology_get_json_subtree_at (topo, 1); + check_subtree (o, "rank=1", 1, 8, 2); + json_decref (o); + + topology_decref (topo); +} + +void test_invalid (void) +{ + struct topology *topo; + int a[16]; + + if (!(topo = topology_create (16))) + BAIL_OUT ("could not create topology"); + + errno = 0; + ok (topology_create (0) == NULL && errno == EINVAL, + "topology_create size=0 fails with EINVAL"); + + lives_ok ({topology_decref (NULL);}, + "topology_decref topo=NULL doesn't crash"); + + ok (topology_incref (NULL) == NULL, + "topology_incref topo=NULL returns NULL"); + + errno = 0; + ok (topology_set_kary (NULL, 2) < 0 && errno == EINVAL, + "topology_set_kary topo=NULL fails with EINVAL"); + + errno = 0; + ok (topology_set_rank (NULL, 0) < 0 && errno == EINVAL, + "topology_set_rank topo=NULL fails with EINVAL"); + errno = 0; + ok (topology_set_rank (topo, -1) < 0 && errno == EINVAL, + "topology_set_rank rank=-1 fails with EINVAL"); + + ok (topology_get_rank (NULL) == -1, + "topology_get_rank topo=NULL returns -1"); + ok (topology_get_size (NULL) == -1, + "topology_get_rank topo=NULL returns -1"); + ok (topology_get_parent (NULL) == -1, + "topology_get_parent topo=NULL returns -1"); + ok (topology_get_level (NULL) == 0, + "topology_get_level topo=NULL returns 0"); + ok (topology_get_maxlevel (NULL) == 0, + "topology_get_maxlevel topo=NULL returns 0"); + + errno = 0; + ok (topology_get_child_ranks (NULL, NULL, 0) == -1 && errno == EINVAL, + "topology_get_child_ranks topo=NULL fails with EINVAL"); + errno = 0; + ok (topology_get_child_ranks (topo, NULL, 2) == -1 && errno == EINVAL, + "topology_get_child_ranks buf=NULL size>0 fails with EINVAL"); + errno = 0; + ok (topology_get_child_ranks (topo, a, 2) == -1 && errno == EOVERFLOW, + "topology_get_child_ranks size=too short fails with EOVERFLOW"); + + ok (topology_get_descendant_count (NULL) == 0, + "topology_get_descendant_count topo=NULL returns 0"); + + ok (topology_get_child_route (NULL, 1) == -1, + "topology_get_child_route topo=NULL returns -1"); + ok (topology_get_child_route (topo, 0) == -1, + "topology_get_child_route rank=0 returns -1"); + ok (topology_get_child_route (topo, 99) == -1, + "topology_get_child_route rank=99 returns -1"); + + errno = 0; + ok (topology_get_json_subtree_at (NULL, 0) == NULL && errno == EINVAL, + "topology_get_json_subtree_at topo=NULL fails with EINVAL"); + errno = 0; + ok (topology_get_json_subtree_at (topo, -1) == NULL && errno == EINVAL, + "topology_get_json_subtree_at rank=-1 fails with EINVAL"); + + errno = 0; + ok (topology_aux_get (NULL, 0, "foo") == NULL && errno == EINVAL, + "topology_aux_get topo=NULL fails with EINVAL"); + errno = 0; + ok (topology_aux_get (topo, -1, "foo") == NULL && errno == EINVAL, + "topology_aux_get rank=-1 fails with EINVAL"); + errno = 0; + ok (topology_aux_get (topo, 99, "foo") == NULL && errno == EINVAL, + "topology_aux_get rank=99 fails with EINVAL"); + errno = 0; + ok (topology_aux_get (topo, 0, "foo") == NULL && errno == ENOENT, + "topology_aux_get key=unknown fails with ENOENT"); + + topology_decref (topo); +} + +int main (int argc, char *argv[]) +{ + plan (NO_PLAN); + + test_flat (); + test_k1 (); + test_k2 (); + test_k2_router (); + test_invalid (); + + done_testing (); +} + +// vi: ts=4 sw=4 expandtab diff --git a/src/broker/topology.c b/src/broker/topology.c new file mode 100644 index 000000000000..4d8f8e4469ad --- /dev/null +++ b/src/broker/topology.c @@ -0,0 +1,305 @@ +/************************************************************\ + * Copyright 2022 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +/* topology.c - create arbitrary TBON topology and allow useful queries */ + +#if HAVE_CONFIG_H +#include "config.h" +#endif +#include +#include + +#include "src/common/libutil/kary.h" +#include "src/common/libutil/errno_safe.h" +#include "src/common/libutil/aux.h" + +#include "topology.h" + +struct node { + int parent; + struct aux_item *aux; +}; + +struct topology { + int rank; + int size; + int refcount; + struct node *node; +}; + +void topology_decref (struct topology *topo) +{ + if (topo && --topo->refcount == 0) { + int saved_errno = errno; + for (int i = 0; i < topo->size; i++) + aux_destroy (&topo->node[i].aux); + free (topo); + errno = saved_errno; + } +} + +struct topology *topology_incref (struct topology *topo) +{ + if (topo) + topo->refcount++; + return topo; +} + +struct topology *topology_create (int size) +{ + struct topology *topo; + + if (size < 1) { + errno = EINVAL; + return NULL; + } + if (!(topo = calloc (1, sizeof (*topo) + sizeof (topo->node[0]) * size))) + return NULL; + topo->refcount = 1; + topo->size = size; + topo->node = (struct node *)(topo + 1); + topo->node[0].parent = -1; + // topo->node is 0-initialized, so rank 0 is default parent of all nodes + return topo; +} + +int topology_set_kary (struct topology *topo, int k) +{ + if (!topo || k < 0) { + errno = EINVAL; + return -1; + } + if (k > 0) { + for (int i = 0; i < topo->size; i++) { + topo->node[i].parent = kary_parentof (k, i); + if (topo->node[i].parent == KARY_NONE) + topo->node[i].parent = -1; + } + } + return 0; +} + +int topology_set_rank (struct topology *topo, int rank) +{ + if (!topo || rank < 0 || rank >= topo->size) { + errno = EINVAL; + return -1; + } + topo->rank = rank; + return 0; +} + +void *topology_aux_get (struct topology *topo, int rank, const char *name) +{ + if (!topo || rank < 0 || rank >= topo->size) { + errno = EINVAL; + return NULL; + } + return aux_get (topo->node[rank].aux, name); +} + +int topology_aux_set (struct topology *topo, + int rank, + const char *name, + void *val, + flux_free_f destroy) +{ + if (!topo || rank < 0 || rank >= topo->size) { + errno = EINVAL; + return -1; + } + return aux_set (&topo->node[rank].aux, name, val, destroy); +} + +int topology_get_rank (struct topology *topo) +{ + return topo ? topo->rank : -1; +} + +int topology_get_size (struct topology *topo) +{ + return topo ? topo->size : -1; +} + +// O(1) +int topology_get_parent (struct topology *topo) +{ + return topo ? topo->node[topo->rank].parent : -1; +} + +// O(size) +static ssize_t topology_get_child_ranks_at (struct topology *topo, + int rank, + int *child_ranks, + size_t child_ranks_length) +{ + ssize_t count = 0; + + if (!topo + || rank < 0 + || rank >= topo->size + || (child_ranks_length > 0 && child_ranks == NULL)) { + errno = EINVAL; + return -1; + } + for (int i = 0; i < topo->size; i++) { + if (topo->node[i].parent == rank) { + if (child_ranks) { + if (count >= child_ranks_length) { + errno = EOVERFLOW; + return -1; + } + child_ranks[count] = i; + } + count++; + } + } + + return count; +} + +ssize_t topology_get_child_ranks (struct topology *topo, + int *child_ranks, + size_t child_ranks_length) +{ + if (!topo) { + errno = EINVAL; + return -1; + } + return topology_get_child_ranks_at (topo, + topo->rank, + child_ranks, + child_ranks_length); +} + +// O(level) +int topology_get_level (struct topology *topo) +{ + int level = 0; + + if (topo) { + int rank = topo->rank; + while (rank != 0) { + rank = topo->node[rank].parent; + level++; + } + } + return level; +} + +// O(size*level) +int topology_get_maxlevel (struct topology *topo) +{ + int maxlevel = 0; + + if (topo) { + for (int i = 0; i < topo->size; i++) { + int rank = i; + int level = 0; + while (rank != 0) { + rank = topo->node[rank].parent; + level++; + } + if (maxlevel < level) + maxlevel = level; + } + } + return maxlevel; +} + +// O(level) +static bool is_descendant_of (struct topology *topo, int rank, int ancestor) +{ + if (rank < 0 + || ancestor < 0 + || !topo + || rank >= topo->size + || ancestor >= topo->size + || topo->node[rank].parent == -1) + return false; + if (topo->node[rank].parent == ancestor) + return true; + return is_descendant_of (topo, topo->node[rank].parent, ancestor); +} + +// O(size*level) +int topology_get_descendant_count_at (struct topology *topo, int rank) +{ + int count = 0; + if (topo) { + for (int i = 0; i < topo->size; i++) { + if (is_descendant_of (topo, i, rank)) + count++; + } + } + return count; +} + +int topology_get_descendant_count (struct topology *topo) +{ + return topology_get_descendant_count_at (topo, topo ? topo->rank : 0); +} + +// O(level) +int topology_get_child_route (struct topology *topo, int rank) +{ + if (!topo || rank <= 0 || rank >= topo->size) + return -1; + if (topo->node[rank].parent == topo->rank) + return rank; + return topology_get_child_route (topo, topo->node[rank].parent); +} + +json_t *topology_get_json_subtree_at (struct topology *topo, int rank) +{ + int child_count; + int *child_ranks = NULL; + json_t *o; + json_t *children = NULL; + json_t *child; + int size; + + if ((child_count = topology_get_child_ranks_at (topo, rank, NULL, 0)) < 0 + || !(child_ranks = calloc (child_count, sizeof (child_ranks[0]))) + || topology_get_child_ranks_at (topo, + rank, + child_ranks, + child_count) < 0) + goto error; + if (!(children = json_array())) + goto nomem; + for (int i = 0; i < child_count; i++) { + if (!(child = topology_get_json_subtree_at (topo, child_ranks[i]))) + goto error; + if (json_array_append_new (children, child) < 0) { + json_decref (child); + goto nomem; + } + } + + size = topology_get_descendant_count_at (topo, rank) + 1; + if (!(o = json_pack ("{s:i s:i s:O}", + "rank", rank, + "size", size, + "children", children))) + goto nomem; + json_decref (children); + free (child_ranks); + return o; +nomem: + errno = ENOMEM; +error: + ERRNO_SAFE_WRAP (json_decref, children); + ERRNO_SAFE_WRAP (free, child_ranks); + return NULL; + +} + +// vi:ts=4 sw=4 expandtab diff --git a/src/broker/topology.h b/src/broker/topology.h new file mode 100644 index 000000000000..64b4db2f99bd --- /dev/null +++ b/src/broker/topology.h @@ -0,0 +1,59 @@ +/************************************************************\ + * Copyright 2022 Lawrence Livermore National Security, LLC + * (c.f. AUTHORS, NOTICE.LLNS, COPYING) + * + * This file is part of the Flux resource manager framework. + * For details, see https://github.com/flux-framework. + * + * SPDX-License-Identifier: LGPL-3.0 +\************************************************************/ + +#ifndef _BROKER_TOPOLOGY_H +#define _BROKER_TOPOLOGY_H + +#include +#include +#include + +/* Create/destroy tree topology of size. + * The initial topology is "flat" (rank 0 is parent of all other ranks), + * and queries are from the perspective of rank 0. + */ +struct topology *topology_create (int size); +void topology_decref (struct topology *topo); +struct topology *topology_incref (struct topology *topo); + +/* Configure topology as a complete k-ary tree with fanout k + */ +int topology_set_kary (struct topology *topo, int k); + +/* Set "my rank", which provides the point of view for queries. + */ +int topology_set_rank (struct topology *topo, int rank); + +/* Associate aux data with rank for lookup in O(1*rank_aux_elements) + */ +void *topology_aux_get (struct topology *topo, int rank, const char *name); +int topology_aux_set (struct topology *topo, + int rank, + const char *name, + void *aux, + flux_free_f destroy); + +/* Queries + */ +int topology_get_rank (struct topology *topo); +int topology_get_size (struct topology *topo); +int topology_get_parent (struct topology *topo); +ssize_t topology_get_child_ranks (struct topology *topo, + int *child_ranks, + size_t child_ranks_length); +int topology_get_level (struct topology *topo); +int topology_get_maxlevel (struct topology *topo); +int topology_get_descendant_count (struct topology *topo); +int topology_get_child_route (struct topology *topo, int rank); +json_t *topology_get_json_subtree_at (struct topology *topo, int rank); + +#endif /* !_BROKER_TOPOLOGY_H */ + +// vi:ts=4 sw=4 expandtab From b4fa19615e36c6bb617c777841cd830869e2d4c6 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Fri, 5 Aug 2022 11:52:58 -0700 Subject: [PATCH 4/8] broker: use topology class Problem: the broker overlay code uses the kary class to calculate peers and routes, but this will not work once the topology is allowed to have a non-complete tree shape. Switch to the topology class, which supports any tree shape. The topology is created in the bootstrap methods and passed to the overlay using overlay_set_topology(), which replaces overlay_set_geometry(). Use the topology_set_kary() method to enact a complete topology as before, but never call the kary_*() functions directly and remove all assuptions about complete trees from the code. Note that formerly the fact that child peers had contiguous ranks was used to directly index into the array of children when selecting a route. To avoid slowing down that critical path, use the topology aux container to store a pointer to the child peer struct. The container is accessed in O(1), and its items in O(N) where N is the item's position in the container. Update the overlay unit test. --- src/broker/boot_config.c | 14 ++++-- src/broker/boot_pmi.c | 39 +++++++++------ src/broker/overlay.c | 103 ++++++++++++++------------------------ src/broker/overlay.h | 5 +- src/broker/test/overlay.c | 23 ++++++--- 5 files changed, 88 insertions(+), 96 deletions(-) diff --git a/src/broker/boot_config.c b/src/broker/boot_config.c index c78447cbbc0d..2c9a8591c51a 100644 --- a/src/broker/boot_config.c +++ b/src/broker/boot_config.c @@ -22,12 +22,12 @@ #include #include "src/common/libutil/log.h" -#include "src/common/libutil/kary.h" #include "src/common/libutil/errno_safe.h" #include "src/common/libpmi/clique.h" #include "attr.h" #include "overlay.h" +#include "topology.h" #include "boot_config.h" @@ -458,6 +458,7 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs) uint32_t size; int fanout = overlay_get_fanout (overlay); json_t *hosts = NULL; + struct topology *topo = NULL; /* Ingest the [bootstrap] stanza. */ @@ -492,7 +493,10 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs) /* Tell overlay network this broker's rank and size. * If a curve certificate was provided, load it. */ - if (overlay_set_geometry (overlay, size, rank) < 0) + if (!(topo = topology_create (size)) + || topology_set_kary (topo, fanout) < 0 + || topology_set_rank (topo, rank) < 0 + || overlay_set_topology (overlay, topo) < 0) goto error; if (conf.curve_cert) { if (overlay_cert_load (overlay, conf.curve_cert) < 0) @@ -509,7 +513,7 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs) * attribute to the URI peers will connect to. If broker has no * downstream peers, set tbon.endpoint to NULL. */ - if (kary_childof (fanout, size, rank, 0) != KARY_NONE) { + if (topology_get_child_ranks (topo, NULL, 0) > 0) { char bind_uri[MAX_URI + 1]; char my_uri[MAX_URI + 1]; @@ -559,7 +563,7 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs) char parent_uri[MAX_URI + 1]; if (boot_config_geturibyrank (hosts, &conf, - kary_parentof (fanout, rank), + topology_get_parent (topo), parent_uri, sizeof (parent_uri)) < 0) goto error; @@ -584,9 +588,11 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs) goto error; } json_decref (hosts); + topology_decref (topo); return 0; error: ERRNO_SAFE_WRAP (json_decref, hosts); + ERRNO_SAFE_WRAP (topology_decref, topo); return -1; } diff --git a/src/broker/boot_pmi.c b/src/broker/boot_pmi.c index 565f3061c4ca..35406b715126 100644 --- a/src/broker/boot_pmi.c +++ b/src/broker/boot_pmi.c @@ -20,7 +20,6 @@ #include "src/common/libutil/log.h" #include "src/common/libutil/cleanup.h" #include "src/common/libutil/ipaddr.h" -#include "src/common/libutil/kary.h" #include "src/common/libutil/errno_safe.h" #include "src/common/libpmi/pmi.h" #include "src/common/libpmi/pmi_strerror.h" @@ -28,6 +27,7 @@ #include "attr.h" #include "overlay.h" +#include "topology.h" #include "boot_pmi.h" #include "pmiutil.h" @@ -179,7 +179,6 @@ static int set_hostlist_attr (attr_t *attrs, struct hostlist *hl) int boot_pmi (struct overlay *overlay, attr_t *attrs) { int fanout = overlay_get_fanout (overlay); - int rank; char key[64]; char val[1024]; char hostname[MAXHOSTNAMELEN + 1]; @@ -188,6 +187,9 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) json_t *o; struct pmi_handle *pmi; struct pmi_params pmi_params; + struct topology *topo = NULL; + int child_count; + int *child_ranks = NULL; int result; const char *uri; int i; @@ -215,9 +217,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) log_err ("error setting broker.mapping attribute"); goto error; } - if (overlay_set_geometry (overlay, - pmi_params.size, - pmi_params.rank) < 0) + if (!(topo = topology_create (pmi_params.size)) + || topology_set_kary (topo, fanout) < 0 + || topology_set_rank (topo, pmi_params.rank) < 0 + || overlay_set_topology (overlay, topo) < 0) goto error; if (gethostname (hostname, sizeof (hostname)) < 0) { log_err ("gethostname"); @@ -242,15 +245,17 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) */ overlay_set_ipv6 (overlay, 1); + child_count = topology_get_child_ranks (topo, NULL, 0); + if (child_count > 0) { + if (!(child_ranks = calloc (child_count, sizeof (child_ranks[0]))) + || topology_get_child_ranks (topo, child_ranks, child_count) < 0) + goto error; + } + /* If there are to be downstream peers, then bind to socket and extract * the concretized URI for sharing with other ranks. - * N.B. there are no downstream peers if the 0th child of this rank - * in k-ary tree does not exist. */ - if (kary_childof (fanout, - pmi_params.size, - pmi_params.rank, - 0) != KARY_NONE) { + if (child_count > 0) { char buf[1024]; if (format_bind_uri (buf, sizeof (buf), attrs, pmi_params.rank) < 0) @@ -306,8 +311,8 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) if (pmi_params.rank > 0) { const char *peer_pubkey; const char *peer_uri; + int rank = topology_get_parent (topo); - rank = kary_parentof (fanout, pmi_params.rank); if (snprintf (key, sizeof (key), "%d", rank) >= sizeof (key)) { log_msg ("pmi key string overflow"); goto error; @@ -343,12 +348,10 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) /* Fetch the business card of children and inform overlay of public keys. */ - for (i = 0; i < fanout; i++) { + for (i = 0; i < child_count; i++) { const char *peer_pubkey; + int rank = child_ranks[i]; - rank = kary_childof (fanout, pmi_params.size, pmi_params.rank, i); - if (rank == KARY_NONE) - break; if (snprintf (key, sizeof (key), "%d", rank) >= sizeof (key)) { log_msg ("pmi key string overflow"); goto error; @@ -427,11 +430,15 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) free (bizcard); broker_pmi_destroy (pmi); hostlist_destroy (hl); + free (child_ranks); + topology_decref (topo); return 0; error: free (bizcard); broker_pmi_destroy (pmi); hostlist_destroy (hl); + free (child_ranks); + topology_decref (topo); return -1; } diff --git a/src/broker/overlay.c b/src/broker/overlay.c index 76faf71b35cc..154ade9bb64f 100644 --- a/src/broker/overlay.c +++ b/src/broker/overlay.c @@ -25,7 +25,6 @@ #include "src/common/libzmqutil/monitor.h" #include "src/common/libczmqcontainers/czmq_containers.h" #include "src/common/libutil/log.h" -#include "src/common/libutil/kary.h" #include "src/common/libutil/cleanup.h" #include "src/common/libutil/fsd.h" #include "src/common/libutil/errno_safe.h" @@ -130,6 +129,7 @@ struct overlay { flux_msg_handler_t **handlers; flux_future_t *f_sync; + struct topology *topo; uint32_t size; uint32_t rank; int fanout; @@ -244,19 +244,22 @@ static void overlay_monitor_notify (struct overlay *ov, uint32_t rank) } } -static int child_count (uint32_t rank, uint32_t size, int k) +int overlay_set_topology (struct overlay *ov, struct topology *topo) { - int count; - for (count = 0; kary_childof (k, size, rank, count) != KARY_NONE; count++) - ; - return count; -} + int *child_ranks = NULL; + ssize_t child_count; -int overlay_set_geometry (struct overlay *ov, uint32_t size, uint32_t rank) -{ - ov->size = size; - ov->rank = rank; - ov->child_count = child_count (rank, size, ov->fanout); + ov->topo = topology_incref (topo); + /* Determine which ranks, if any are direct children of this one. + */ + if ((child_count = topology_get_child_ranks (topo, NULL, 0)) < 0 + || !(child_ranks = calloc (child_count, sizeof (child_ranks[0]))) + || topology_get_child_ranks (topo, child_ranks, child_count) < 0) + goto error; + + ov->size = topology_get_size (topo); + ov->rank = topology_get_rank (topo); + ov->child_count = child_count; if (ov->child_count > 0) { int i; @@ -268,24 +271,29 @@ int overlay_set_geometry (struct overlay *ov, uint32_t size, uint32_t rank) zhashx_set_key_destructor (ov->child_hash, NULL); for (i = 0; i < ov->child_count; i++) { struct child *child = &ov->children[i]; - child->rank = kary_childof (ov->fanout, size, rank, i); + child->rank = child_ranks[i]; child->status = SUBTREE_STATUS_OFFLINE; monotime (&child->status_timestamp); child->tracker = rpc_track_create (MSG_HASH_TYPE_UUID_MATCHTAG); if (!child->tracker) return -1; + if (topology_aux_set (topo, child->rank, "child", child, NULL) < 0) + return -1; } ov->status = SUBTREE_STATUS_PARTIAL; } else ov->status = SUBTREE_STATUS_FULL; monotime (&ov->status_timestamp); - if (rank > 0) { - ov->parent.rank = kary_parentof (ov->fanout, rank); + if (ov->rank > 0) { + ov->parent.rank = topology_get_parent (topo); ov->parent.tracker = rpc_track_create (MSG_HASH_TYPE_UUID_MATCHTAG); } - + free (child_ranks); return 0; +error: + free (child_ranks); + return -1; } int overlay_get_fanout (struct overlay *ov) @@ -435,31 +443,22 @@ bool overlay_msg_is_local (const flux_msg_t *msg) return (msg && flux_msg_aux_get (msg, "overlay::remote") == NULL); } -/* Given a rank, find a (direct) child peer. - * Since child ranks are numerically contiguous, perform a range check - * and index into the child array directly. +/* Lookup (direct) child peer by rank. * Returns NULL on lookup failure. */ static struct child *child_lookup_byrank (struct overlay *ov, uint32_t rank) { - uint32_t first; - int i; - - if ((first = kary_childof (ov->fanout, ov->size, ov->rank, 0)) == KARY_NONE - || (i = rank - first) < 0 - || i >= ov->child_count) - return NULL; - return &ov->children[i]; + return topology_aux_get (ov->topo, rank, "child"); } /* Look up child that provides route to 'rank' (NULL if none). */ static struct child *child_lookup_route (struct overlay *ov, uint32_t rank) { - uint32_t child_rank; + int child_rank; - child_rank = kary_child_route (ov->fanout, ov->size, ov->rank, rank); - if (child_rank == KARY_NONE) + child_rank = topology_get_child_route (ov->topo, rank); + if (child_rank < 0) return NULL; return child_lookup_byrank (ov, child_rank); } @@ -1379,19 +1378,17 @@ int overlay_register_attrs (struct overlay *overlay) return -1; if (attr_add_int (overlay->attrs, "tbon.level", - kary_levelof (overlay->fanout, overlay->rank), + topology_get_level (overlay->topo), FLUX_ATTRFLAG_IMMUTABLE) < 0) return -1; if (attr_add_int (overlay->attrs, "tbon.maxlevel", - kary_levelof (overlay->fanout, overlay->size - 1), + topology_get_maxlevel (overlay->topo), FLUX_ATTRFLAG_IMMUTABLE) < 0) return -1; if (attr_add_int (overlay->attrs, "tbon.descendants", - kary_sum_descendants (overlay->fanout, - overlay->size, - overlay->rank), + topology_get_descendant_count (overlay->topo), FLUX_ATTRFLAG_IMMUTABLE) < 0) return -1; @@ -1556,38 +1553,11 @@ const char *overlay_get_subtree_status (struct overlay *ov, int rank) */ json_t *overlay_get_subtree_topo (struct overlay *ov, int rank) { - json_t *o; - json_t *children; - json_t *child; - int i, r; - int size; - - if (!(children = json_array())) - goto nomem; - for (i = 0; i < ov->fanout; i++) { - r = kary_childof (ov->fanout, ov->size, rank, i); - if (r == KARY_NONE) - break; - if (!(child = overlay_get_subtree_topo (ov, r))) - goto error; - if (json_array_append_new (children, child) < 0) { - json_decref (child); - goto nomem; - } + if (!ov) { + errno = EINVAL; + return NULL; } - size = kary_sum_descendants (ov->fanout, ov->size, rank) + 1; - if (!(o = json_pack ("{s:i s:i s:O}", - "rank", rank, - "size", size, - "children", children))) - goto nomem; - json_decref (children); - return o; -nomem: - errno = ENOMEM; -error: - ERRNO_SAFE_WRAP (json_decref, children); - return NULL; + return topology_get_json_subtree_at (ov->topo, rank); } /* Get the topology of the subtree rooted here. @@ -1964,6 +1934,7 @@ void overlay_destroy (struct overlay *ov) free (mon); zlist_destroy (&ov->monitor_callbacks); } + topology_decref (ov->topo); free (ov); errno = saved_errno; } diff --git a/src/broker/overlay.h b/src/broker/overlay.h index 488d105a80ac..e269144f2bc6 100644 --- a/src/broker/overlay.h +++ b/src/broker/overlay.h @@ -15,6 +15,7 @@ #include #include "attr.h" +#include "topology.h" typedef enum { OVERLAY_ANY = 0, @@ -44,9 +45,9 @@ void overlay_destroy (struct overlay *ov); */ int overlay_control_start (struct overlay *ov); -/* Set the overlay network size and rank of this broker. +/* Set the overlay topology. */ -int overlay_set_geometry (struct overlay *ov, uint32_t size, uint32_t rank); +int overlay_set_topology (struct overlay *ov, struct topology *topo); /* Send a message on the overlay network. * 'where' determines whether the message is routed upstream or downstream. diff --git a/src/broker/test/overlay.c b/src/broker/test/overlay.c index 428cb14f2a4b..14f4932d7446 100644 --- a/src/broker/test/overlay.c +++ b/src/broker/test/overlay.c @@ -26,6 +26,7 @@ #include "src/broker/overlay.h" #include "src/broker/attr.h" +#include "src/broker/topology.h" static zlist_t *logs; @@ -36,6 +37,7 @@ struct context { char name[32]; int rank; int size; + struct topology *topo; const char *uuid; const flux_msg_t *msg; }; @@ -76,6 +78,7 @@ void ctx_destroy (struct context *ctx) attr_destroy (ctx->attrs); overlay_destroy (ctx->ov); flux_msg_decref (ctx->msg); + topology_decref (ctx->topo); free (ctx); } @@ -99,6 +102,10 @@ struct context *ctx_create (flux_t *h, if (attr_add_int (ctx->attrs, "tbon.fanout", fanout, 0) < 0) BAIL_OUT ("could not add tbon.fanout attribute"); } + if (!(ctx->topo = topology_create (size)) + || topology_set_kary (ctx->topo, fanout) < 0 + || topology_set_rank (ctx->topo, rank) < 0) + BAIL_OUT ("cannot create topology"); ctx->h = h; ctx->size = size; ctx->rank = rank; @@ -118,8 +125,8 @@ void single (flux_t *h) struct context *ctx = ctx_create (h, "single", 1, 0, 2, NULL); flux_msg_t *msg; - ok (overlay_set_geometry (ctx->ov, 1, 0) == 0, - "%s: overlay_set_geometry size=1 rank=0 works", ctx->name); + ok (overlay_set_topology (ctx->ov, ctx->topo) == 0, + "%s: overlay_set_topology size=1 rank=0 works", ctx->name); ok (overlay_get_size (ctx->ov) == 1, "%s: overlay_get_size returns 1", ctx->name); @@ -268,8 +275,8 @@ void trio (flux_t *h) ctx[0] = ctx_create (h, "trio", size, 0, 2, recv_cb); - ok (overlay_set_geometry (ctx[0]->ov, size, 0) == 0, - "%s: overlay_set_geometry works", ctx[0]->name); + ok (overlay_set_topology (ctx[0]->ov, ctx[0]->topo) == 0, + "%s: overlay_set_topology works", ctx[0]->name); ok ((server_pubkey = overlay_cert_pubkey (ctx[0]->ov)) != NULL, "%s: overlay_cert_pubkey works", ctx[0]->name); @@ -280,8 +287,8 @@ void trio (flux_t *h) ctx[1] = ctx_create (h, "trio", size, 1, 2, recv_cb); - ok (overlay_set_geometry (ctx[1]->ov, size, 1) == 0, - "%s: overlay_init works", ctx[1]->name); + ok (overlay_set_topology (ctx[1]->ov, ctx[1]->topo) == 0, + "%s: overlay_set_topology works", ctx[1]->name); ok ((client_pubkey = overlay_cert_pubkey (ctx[1]->ov)) != NULL, "%s: overlay_cert_pubkey works", ctx[1]->name); @@ -493,8 +500,8 @@ void test_create (flux_t *h, for (rank = 0; rank < size; rank++) { ctx[rank] = ctx_create (h, name, size, rank, fanout, recv_cb); - if (overlay_set_geometry (ctx[rank]->ov, size, rank) < 0) - BAIL_OUT ("%s: overlay_set_geometry failed", ctx[rank]->name); + if (overlay_set_topology (ctx[rank]->ov, ctx[rank]->topo) < 0) + BAIL_OUT ("%s: overlay_set_topology failed", ctx[rank]->name); if (rank == 0) { snprintf (uri, sizeof (uri), "ipc://@%s", ctx[0]->name); /* Call overlay_bind() before overlay_authorize() is called From 8f8bdc032ba73f04d60e75f93ec26ac619530ce2 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 7 Aug 2022 08:29:23 -0700 Subject: [PATCH 5/8] broker/attr: fix strtol() error handling Problem: strtol() and strtoul() are not being checked properly in the broker attribute code. Fix the code to conform to recommendations in strtoul(3), which involves clearing errno before the call and testing it afterwards. --- src/broker/attr.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/broker/attr.c b/src/broker/attr.c index 546a8f881033..537415636735 100644 --- a/src/broker/attr.c +++ b/src/broker/attr.c @@ -242,13 +242,14 @@ static int set_int (const char *name, const char *val, void *arg) errno = EINVAL; return -1; } + errno = 0; n = strtol (val, &endptr, 0); - if (n <= INT_MIN || n >= INT_MAX) { - errno = ERANGE; + if (errno != 0 || *endptr != '\0') { + errno = EINVAL; return -1; } - if (*endptr != '\0') { - errno = EINVAL; + if (n <= INT_MIN || n >= INT_MAX) { + errno = ERANGE; return -1; } *i = (int)n; @@ -288,10 +289,9 @@ static int set_uint32 (const char *name, const char *val, void *arg) char *endptr; unsigned long n; + errno = 0; n = strtoul (val, &endptr, 0); - if (n == ULONG_MAX) /* ERANGE set by strtol */ - return -1; - if (endptr == val || *endptr != '\0') { + if (errno != 0 || *endptr != '\0') { errno = EINVAL; return -1; } From c8e7f8d036018715cbfc83572f4bc7fd437997e4 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 7 Aug 2022 12:10:39 -0700 Subject: [PATCH 6/8] broker: add attr_get_uint32() helper Problem: code for fetching a u32 attribute will be duplicated in boot_pmi.c and boot_config.c. Add attr_get_uint32() to attrs.[ch]. --- src/broker/attr.c | 19 +++++++++++++++++++ src/broker/attr.h | 4 ++++ 2 files changed, 23 insertions(+) diff --git a/src/broker/attr.c b/src/broker/attr.c index 537415636735..53adf7b2b277 100644 --- a/src/broker/attr.c +++ b/src/broker/attr.c @@ -314,6 +314,25 @@ int attr_add_active_uint32 (attr_t *attrs, const char *name, uint32_t *val, return attr_add_active (attrs, name, flags, get_uint32, set_uint32, val); } +int attr_get_uint32 (attr_t *attrs, const char *name, uint32_t *value) +{ + const char *s; + uint32_t i; + char *endptr; + + if (attr_get (attrs, name, &s, NULL) < 0) + return -1; + + errno = 0; + i = strtoul (s, &endptr, 10); + if (errno != 0 || *endptr != '\0') { + errno = EINVAL; + return -1; + } + *value = i; + return 0; +} + const char *attr_first (attr_t *attrs) { struct entry *e = zhash_first (attrs->hash); diff --git a/src/broker/attr.h b/src/broker/attr.h index 6defe4007e4e..eda870d52312 100644 --- a/src/broker/attr.h +++ b/src/broker/attr.h @@ -75,6 +75,10 @@ int attr_add_active_int (attr_t *attrs, const char *name, int *val, int attr_add_active_uint32 (attr_t *attrs, const char *name, uint32_t *val, int flags); +/* Get an attribute and parse it as an integer value. + */ +int attr_get_uint32 (attr_t *attrs, const char *name, uint32_t *value); + /* Iterate over attribute names with internal cursor. */ const char *attr_first (attr_t *attrs); From 64e93bf8a22d51ff449ce039343b932cb0a490e5 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Mon, 8 Aug 2022 13:46:43 -0700 Subject: [PATCH 7/8] broker: move tbon.fanout to bootstrap methods Problem: in the future, the broker bootstrap methods (config file and PMI) will handle the tbon.fanout broker attribute differently, but the attribute is currently managed in shared code in overlay.c. Move the handling of the attribute to the bootstrap methods where the code can be allowed to diverge. Drop tbon.fanout test from overlay unit test, since this is no longer a feature that belongs to the overlay abstraction. Allow a fanout value of 0 to indicate "flat", regardless of the instance size, since the topology class supports that. Update t0001-basic.t sharness test to allow that value to succeed. Update flux-broker-attributes(7) to say what a value of 0 means. --- doc/man7/flux-broker-attributes.rst | 4 +++- src/broker/boot_config.c | 16 +++++++++++++++- src/broker/boot_pmi.c | 17 ++++++++++++++++- src/broker/overlay.c | 18 ------------------ src/broker/overlay.h | 1 - src/broker/test/overlay.c | 1 - t/t0001-basic.t | 4 ++-- 7 files changed, 36 insertions(+), 25 deletions(-) diff --git a/doc/man7/flux-broker-attributes.rst b/doc/man7/flux-broker-attributes.rst index 851072ebda58..f2d363245ed9 100644 --- a/doc/man7/flux-broker-attributes.rst +++ b/doc/man7/flux-broker-attributes.rst @@ -146,7 +146,9 @@ TREE BASED OVERLAY NETWORK ========================== tbon.fanout [Updates: C] - Branching factor of the tree based overlay network. Default: ``2``. + Branching factor of the tree based overlay network. A value of ``0`` + means the topology is "flat" (rank 0 is the parent of all other ranks). + Default: ``2``. tbon.descendants Number of descendants "below" this node of the tree based diff --git a/src/broker/boot_config.c b/src/broker/boot_config.c index 2c9a8591c51a..5c6ce048fbcf 100644 --- a/src/broker/boot_config.c +++ b/src/broker/boot_config.c @@ -30,6 +30,8 @@ #include "topology.h" #include "boot_config.h" +#define DEFAULT_FANOUT 2 + /* Copy 'fmt' into 'buf', substituting the following tokens: * - %h host @@ -456,10 +458,22 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs) struct boot_conf conf; uint32_t rank; uint32_t size; - int fanout = overlay_get_fanout (overlay); + uint32_t fanout; json_t *hosts = NULL; struct topology *topo = NULL; + /* Fetch the tbon.fanout attribute and supply a default value if unset. + */ + if (attr_get_uint32 (attrs, "tbon.fanout", &fanout) < 0) + fanout = DEFAULT_FANOUT; + else + (void)attr_delete (attrs, "tbon.fanout", true); + if (attr_add_uint32 (attrs, + "tbon.fanout", + fanout, + FLUX_ATTRFLAG_IMMUTABLE) < 0) + return -1; + /* Ingest the [bootstrap] stanza. */ if (boot_config_parse (flux_get_conf (h), &conf, &hosts) < 0) diff --git a/src/broker/boot_pmi.c b/src/broker/boot_pmi.c index 35406b715126..c322a460b3a4 100644 --- a/src/broker/boot_pmi.c +++ b/src/broker/boot_pmi.c @@ -31,6 +31,8 @@ #include "boot_pmi.h" #include "pmiutil.h" +#define DEFAULT_FANOUT 2 + /* If the broker is launched via flux-shell, then the shell may opt * to set a "flux.instance-level" parameter in the PMI kvs to tell @@ -178,7 +180,7 @@ static int set_hostlist_attr (attr_t *attrs, struct hostlist *hl) int boot_pmi (struct overlay *overlay, attr_t *attrs) { - int fanout = overlay_get_fanout (overlay); + uint32_t fanout; char key[64]; char val[1024]; char hostname[MAXHOSTNAMELEN + 1]; @@ -194,6 +196,19 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs) const char *uri; int i; + /* Fetch the tbon.fanout attribute and supply a default value if unset. + */ + if (attr_get_uint32 (attrs, "tbon.fanout", &fanout) < 0) + fanout = DEFAULT_FANOUT; + else + (void)attr_delete (attrs, "tbon.fanout", true); + if (attr_add_uint32 (attrs, + "tbon.fanout", + fanout, + FLUX_ATTRFLAG_IMMUTABLE) < 0) + return -1; + + memset (&pmi_params, 0, sizeof (pmi_params)); if (!(pmi = broker_pmi_create ())) { log_err ("broker_pmi_create"); diff --git a/src/broker/overlay.c b/src/broker/overlay.c index 154ade9bb64f..1de952363577 100644 --- a/src/broker/overlay.c +++ b/src/broker/overlay.c @@ -41,8 +41,6 @@ #define FLUX_ZAP_DOMAIN "flux" -#define DEFAULT_FANOUT 2 - /* Overlay control messages */ enum control_type { @@ -132,7 +130,6 @@ struct overlay { struct topology *topo; uint32_t size; uint32_t rank; - int fanout; char uuid[UUID_STR_LEN]; int version; int zmqdebug; @@ -296,11 +293,6 @@ int overlay_set_topology (struct overlay *ov, struct topology *topo) return -1; } -int overlay_get_fanout (struct overlay *ov) -{ - return ov->fanout; -} - uint32_t overlay_get_rank (struct overlay *ov) { return ov->rank; @@ -1996,16 +1988,6 @@ struct overlay *overlay_create (flux_t *h, uuid_unparse (uuid, ov->uuid); if (!(ov->monitor_callbacks = zlist_new ())) goto nomem; - if (overlay_configure_attr_int (ov->attrs, - "tbon.fanout", - DEFAULT_FANOUT, - &ov->fanout) < 0) - goto error; - if (ov->fanout < 1) { - log_msg ("tbon.fanout must be >= 1"); - errno = EINVAL; - goto error; - } if (overlay_configure_attr_int (ov->attrs, "tbon.prefertcp", 0, NULL) < 0) goto error; if (overlay_configure_torpid (ov) < 0) diff --git a/src/broker/overlay.h b/src/broker/overlay.h index e269144f2bc6..7fcd2f6611db 100644 --- a/src/broker/overlay.h +++ b/src/broker/overlay.h @@ -75,7 +75,6 @@ int overlay_set_parent_pubkey (struct overlay *ov, const char *pubkey); /* Misc. accessors */ -int overlay_get_fanout (struct overlay *ov); uint32_t overlay_get_rank (struct overlay *ov); void overlay_set_rank (struct overlay *ov, uint32_t rank); // test only uint32_t overlay_get_size (struct overlay *ov); diff --git a/src/broker/test/overlay.c b/src/broker/test/overlay.c index 14f4932d7446..ea2bb0757b65 100644 --- a/src/broker/test/overlay.c +++ b/src/broker/test/overlay.c @@ -138,7 +138,6 @@ void single (flux_t *h) check_attr (ctx, "tbon.parent-endpoint", NULL); check_attr (ctx, "rank", "0"); check_attr (ctx, "size", "1"); - check_attr (ctx, "tbon.fanout", "2"); check_attr (ctx, "tbon.level", "0"); check_attr (ctx, "tbon.maxlevel", "0"); check_attr (ctx, "tbon.descendants", "0"); diff --git a/t/t0001-basic.t b/t/t0001-basic.t index 8e9e1a771143..500b77e42827 100755 --- a/t/t0001-basic.t +++ b/t/t0001-basic.t @@ -497,8 +497,8 @@ test_expect_success 'broker -Stbon.fanout=4 option works' ' flux getattr tbon.fanout >fanout.out && test_cmp fanout.exp fanout.out ' -test_expect_success 'broker -Stbon.fanout=0 fails' ' - test_must_fail flux start ${ARGS} -o,-Stbon.fanout=0 /bin/true +test_expect_success 'broker -Stbon.fanout=0 works' ' + flux start ${ARGS} -o,-Stbon.fanout=0 /bin/true ' test_expect_success 'broker fails on unknown option' ' test_must_fail flux start ${ARGS} -o,--not-an-option /bin/true From f004baf330d0977291c55905a966855453a18b80 Mon Sep 17 00:00:00 2001 From: Jim Garlick Date: Sun, 7 Aug 2022 22:34:57 -0700 Subject: [PATCH 8/8] broker: use default fanout of 0 for config boot Problem: the systemd unit file has to specify a large value for the tbon.fanout attribute to make the default system instance use a flat TBON topology. Make the default TBON fanout 0 (flat) when Flux is booted from config files. Drop the tbon.fanout=256 setting in the systemd unit file. --- etc/flux.service.in | 1 - src/broker/boot_config.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/etc/flux.service.in b/etc/flux.service.in index 2514e7d13cef..03f881f53af0 100644 --- a/etc/flux.service.in +++ b/etc/flux.service.in @@ -11,7 +11,6 @@ ExecStart=/bin/bash -c '\ @X_BINDIR@/flux broker \ --config-path=@X_SYSCONFDIR@/flux/system/conf.d \ -Scron.directory=@X_SYSCONFDIR@/flux/system/cron.d \ - -Stbon.fanout=256 \ -Srundir=@X_RUNSTATEDIR@/flux \ -Sstatedir=${STATE_DIRECTORY:-/var/lib/flux} \ -Slocal-uri=local://@X_RUNSTATEDIR@/flux/local \ diff --git a/src/broker/boot_config.c b/src/broker/boot_config.c index 5c6ce048fbcf..9d25a4e482c8 100644 --- a/src/broker/boot_config.c +++ b/src/broker/boot_config.c @@ -30,7 +30,7 @@ #include "topology.h" #include "boot_config.h" -#define DEFAULT_FANOUT 2 +#define DEFAULT_FANOUT 0 /* Copy 'fmt' into 'buf', substituting the following tokens: