Skip to content

Commit

Permalink
Merge pull request #6283 from garlick/issue#6272
Browse files Browse the repository at this point in the history
broker: use enclosing instance tbon.interface-hint unless overridden
  • Loading branch information
mergify[bot] authored Sep 24, 2024
2 parents fa81b09 + cc2b3a2 commit 7efc1a0
Show file tree
Hide file tree
Showing 10 changed files with 265 additions and 165 deletions.
5 changes: 5 additions & 0 deletions doc/man5/flux-config-tbon.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ interface-hint
network address in CIDR form, e.g. ``10.0.2.0/24``. NOTE: IPv6
network addresses are not supported at this time.

This configured value may be overridden by setting the
``tbon.interface-hint`` broker attribute on the command line.
It is inherited by sub-instances spawned for batch jobs and allocations.
Refer to :man7:`flux-broker-attributes` for more information.


EXAMPLE
=======
Expand Down
15 changes: 12 additions & 3 deletions doc/man7/flux-broker-attributes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,12 @@ tbon.prefertcp [Updates: C]
with PMI, tcp:// endpoints will be used instead of ipc://, even if all
brokers are on a single node. Default: ``0``.

tbon.interface-hint
When bootstrapping with PMI, tcp:// endpoints are chosen heuristically
tbon.interface-hint [Updates: C, R]
When bootstrapping with PMI, tcp endpoints are chosen heuristically
using one of the following methods:

default-route
The address associated with the default route (the default hint).
The address associated with the default route (default, but see below).
hostname
The address associated with the system hostname.
*interface*
Expand All @@ -203,6 +203,15 @@ tbon.interface-hint
network address in CIDR form, e.g. ``10.0.2.0/24``. NOTE: IPv6
network addresses are not supported at this time.

If the attribute is not explicitly set, its value is assigned from
(in descending precedence):

1. the TOML configuration key of the same name
2. the :envvar:`FLUX_IPADDR_INTERFACE` or :envvar:`FLUX_IPADDR_HOSTNAME`
environment variables (these should be considered deprecated)
3. the enclosing Flux instance's value (via the PMI KVS)
4. the compiled-in default of default-route

tbon.torpid_min [Updates: C, R]
The amount of time (in RFC 23 Flux Standard Duration format) that a broker
will allow the connection to its TBON parent to remain idle before sending a
Expand Down
7 changes: 7 additions & 0 deletions src/broker/boot_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,13 @@ int boot_config (flux_t *h, struct overlay *overlay, attr_t *attrs)
*/
overlay_set_ipv6 (overlay, conf.enable_ipv6);

/* Ensure that tbon.interface-hint is set.
*/
if (overlay_set_tbon_interface_hint (overlay, NULL) < 0) {
log_err ("error setting tbon.interface-hint attribute");
goto error;
}

/* If broker has "downstream" peers, determine the URI to bind to
* from the config and tell overlay. Also, set the tbon.endpoint
* attribute to the URI peers will connect to. If broker has no
Expand Down
64 changes: 47 additions & 17 deletions src/broker/boot_pmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,46 @@
* the booting instance at what "level" it will be running, i.e. the
* number of parents. If the PMI key is missing, this is not an error,
* instead the level of this instance is considered to be zero.
* Additionally, if level > 0, the shell will have put the instance's
* jobid in the PMI kvsname for us as well, so populate the 'jobid' attr.
*/
static int set_instance_level_attr (struct upmi *upmi,
const char *name,
attr_t *attrs)
attr_t *attrs,
bool *under_flux)
{
int rc;
char *val = NULL;
const char *level = "0";
const char *jobid = NULL;
int rc = -1;

if (upmi_get (upmi, "flux.instance-level", -1, &val, NULL) == 0) {
level = val;
jobid = name;
}
rc = attr_add (attrs, "instance-level", level, ATTR_IMMUTABLE);
free (val);
if (rc < 0 || attr_add (attrs, "jobid", jobid, ATTR_IMMUTABLE) < 0)
return -1;
return 0;
(void)upmi_get (upmi, "flux.instance-level", -1, &val, NULL);
if (attr_add (attrs, "instance-level", val ? val : "0", ATTR_IMMUTABLE) < 0)
goto error;
*under_flux = val ? true : false;
rc = 0;
error:
ERRNO_SAFE_WRAP (free, val);
return rc;
}

/* If the tbon.interface-hint broker attr is not already set, set it.
* If running under Flux, use the value, if any, placed in PMI KVS by
* the enclsoing instance. Otherwise, set a default value.
*/
static int set_tbon_interface_hint_attr (struct upmi *upmi,
attr_t *attrs,
struct overlay *ov,
bool under_flux)
{
char *val = NULL;
int rc = -1;

if (attr_get (attrs, "tbon.interface-hint", NULL, NULL) == 0)
return 0;
if (under_flux)
(void)upmi_get (upmi, "flux.tbon-interface-hint", -1, &val, NULL);
if (overlay_set_tbon_interface_hint (ov, val) < 0)
goto error;
rc = 0;
error:
ERRNO_SAFE_WRAP (free, val);
return rc;
}

static char *pmi_mapping_to_taskmap (const char *s)
Expand Down Expand Up @@ -241,6 +260,7 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
int i;
int upmi_flags = UPMI_LIBPMI_NOFLUX;
const char *upmi_method;
bool under_flux;

// N.B. overlay_create() sets the tbon.topo attribute
if (attr_get (attrs, "tbon.topo", &topo_uri, NULL) < 0) {
Expand All @@ -264,10 +284,20 @@ int boot_pmi (struct overlay *overlay, attr_t *attrs)
upmi_destroy (upmi);
return -1;
}
if (set_instance_level_attr (upmi, info.name, attrs) < 0) {
if (set_instance_level_attr (upmi, attrs, &under_flux) < 0) {
log_err ("set_instance_level_attr");
goto error;
}
if (under_flux) {
if (attr_add (attrs, "jobid", info.name, ATTR_IMMUTABLE) < 0) {
log_err ("error setting jobid attribute");
goto error;
}
}
if (set_tbon_interface_hint_attr (upmi, attrs, overlay, under_flux) < 0) {
log_err ("error setting tbon.interface-hint attribute");
goto error;
}
if (set_broker_mapping_attr (upmi, info.size, attrs) < 0) {
log_err ("error setting broker.mapping attribute");
goto error;
Expand Down
81 changes: 45 additions & 36 deletions src/broker/overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -2134,51 +2134,64 @@ static int get_torpid (const char *name, const char **val, void *arg)
return -1;
}

/* This is called from boot_*.c with a default value to set if the attribute
* is not set already.
*/
int overlay_set_tbon_interface_hint (struct overlay *ov, const char *val)
{
if (attr_get (ov->attrs, "tbon.interface-hint", NULL, NULL) == 0)
return 0;
if (!val)
val = default_interface_hint;
return attr_add (ov->attrs, "tbon.interface-hint", val, 0);
}

/* Set attribute with the following precedence:
* 1. broker attribute
* 2. TOML config
* 3. legacy environment variables
* Leave it unset if none of those are available.
* The bootstrap methods set it later, but only if not already set.
*/
static int overlay_configure_interface_hint (struct overlay *ov,
const char *table,
const char *name,
const char *default_value)
const char *name)
{
char long_name[128];
const char *val;
const char *s;
const char *val = NULL;
const char *config_val = NULL;
const char *attr_val = NULL;
int flags;
const flux_conf_t *cf;
flux_error_t error;

(void)snprintf (long_name, sizeof (long_name), "%s.%s", table, name);

/* Take initial value from compiled-in default or legacy
* environment variable settings.
*/
if ((s = getenv ("FLUX_IPADDR_INTERFACE")))
val = s;
else if (getenv ("FLUX_IPADDR_HOSTNAME"))
val = "hostname";
else
val = default_value;

/* Override with config file settings, if any.
*/
if ((cf = flux_get_conf (ov->h))) {
flux_error_t error;

s = NULL;
if (flux_conf_unpack (flux_get_conf (ov->h),
if (flux_conf_unpack (cf,
&error,
"{s?{s?s}}",
table,
name, &s) < 0) {
name, &config_val) < 0) {
log_msg ("Config file error [%s]: %s", table, error.text);
return -1;
}
if (s)
val = s;
}
/* Override with broker attribute, if any.
* Then set broker attribute to reflect current value.
*/
if (overlay_configure_attr (ov->attrs, long_name, val, NULL) < 0) {
log_err ("Error setting %s attribute", long_name);
return -1;
(void)snprintf (long_name, sizeof (long_name), "%s.%s", table, name);
(void)attr_get (ov->attrs, long_name, &attr_val, &flags);

if (attr_val)
val = attr_val;
else if (config_val)
val = config_val;
else if ((val = getenv ("FLUX_IPADDR_INTERFACE")))
;
else if (getenv ("FLUX_IPADDR_HOSTNAME"))
val = "hostname";

if (val && !attr_val) {
if (attr_add (ov->attrs, long_name, val, 0) < 0) {
log_err ("Error setting %s attribute value", long_name);
return -1;
}
}
return 0;
}
Expand Down Expand Up @@ -2502,10 +2515,7 @@ struct overlay *overlay_create (flux_t *h,
goto nomem;
if (overlay_configure_attr_int (ov->attrs, "tbon.prefertcp", 0, NULL) < 0)
goto error;
if (overlay_configure_interface_hint (ov,
"tbon",
"interface-hint",
default_interface_hint) < 0)
if (overlay_configure_interface_hint (ov, "tbon", "interface-hint") < 0)
goto error;
if (overlay_configure_torpid (ov) < 0)
goto error;
Expand Down Expand Up @@ -2567,7 +2577,6 @@ struct overlay *overlay_create (flux_t *h,
return NULL;
}


/*
* vi:tabstop=4 shiftwidth=4 expandtab
*/
1 change: 1 addition & 0 deletions src/broker/overlay.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ const char *overlay_get_uuid (struct overlay *ov);
bool overlay_uuid_is_parent (struct overlay *ov, const char *uuid);
bool overlay_uuid_is_child (struct overlay *ov, const char *uuid);
void overlay_set_ipv6 (struct overlay *ov, int enable);
int overlay_set_tbon_interface_hint (struct overlay *ov, const char *val);

/* Return an idset of critical ranks, i.e. non-leaf brokers
*/
Expand Down
11 changes: 11 additions & 0 deletions src/shell/pmi/pmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,16 @@ static int init_clique (struct shell_pmi *pmi)
return 0;
}

static int set_flux_tbon_interface_hint (struct shell_pmi *pmi)
{
const char *hint;

if (!(hint = flux_attr_get (pmi->shell->h, "tbon.interface-hint")))
return 0;
put_dict (pmi->locals, "flux.tbon-interface-hint", hint);
return 0;
}

static int set_flux_instance_level (struct shell_pmi *pmi)
{
char *p;
Expand Down Expand Up @@ -575,6 +585,7 @@ static struct shell_pmi *pmi_create (flux_shell_t *shell, json_t *config)
if (!nomap && init_clique (pmi) < 0)
goto error;
if (set_flux_instance_level (pmi) < 0
|| set_flux_tbon_interface_hint (pmi) < 0
|| (!nomap && set_flux_taskmap (pmi) < 0))
goto error;
return pmi;
Expand Down
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ TESTSCRIPTS = \
t0016-cron-faketime.t \
t0017-security.t \
t0018-content-files.t \
t0019-tbon-config.t \
t0020-terminus.t \
t0021-archive-cmd.t \
t0022-jj-reader.t \
Expand Down
Loading

0 comments on commit 7efc1a0

Please sign in to comment.