Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

broker: use enclosing instance tbon.interface-hint unless overridden #6283

Merged
merged 9 commits into from
Sep 24, 2024
5 changes: 5 additions & 0 deletions doc/man5/flux-config-tbon.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,11 @@ interface-hint
network address in CIDR form, e.g. ``10.0.2.0/24``. NOTE: IPv6
network addresses are not supported at this time.

This configured value may be overridden by setting the
``tbon.interface-hint`` broker attribute on the command line.
It is inherited by sub-instances spawned for batch jobs and allocations.
Refer to :man7:`flux-broker-attributes` for more information.


EXAMPLE
=======
Expand Down
15 changes: 12 additions & 3 deletions doc/man7/flux-broker-attributes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,12 @@ tbon.prefertcp [Updates: C]
with PMI, tcp:// endpoints will be used instead of ipc://, even if all
brokers are on a single node. Default: ``0``.

tbon.interface-hint
When bootstrapping with PMI, tcp:// endpoints are chosen heuristically
tbon.interface-hint [Updates: C, R]
When bootstrapping with PMI, tcp endpoints are chosen heuristically
using one of the following methods:

default-route
The address associated with the default route (the default hint).
The address associated with the default route (default, but see below).
hostname
The address associated with the system hostname.
*interface*
Expand All @@ -203,6 +203,15 @@ tbon.interface-hint
network address in CIDR form, e.g. ``10.0.2.0/24``. NOTE: IPv6
network addresses are not supported at this time.

If the attribute is not explicitly set, its value is assigned from
(in descending precedence):

1. the TOML configuration key of the same name
2. the :envvar:`FLUX_IPADDR_INTERFACE` or :envvar:`FLUX_IPADDR_HOSTNAME`
environment variables (these should be considered deprecated)
3. the enclosing Flux instance's value (via the PMI KVS)
4. the compiled-in default of default-route

tbon.torpid_min [Updates: C, R]
The amount of time (in RFC 23 Flux Standard Duration format) that a broker
will allow the connection to its TBON parent to remain idle before sending a
Expand Down
7 changes: 7 additions & 0 deletions src/broker/boot_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,13 @@
*/
overlay_set_ipv6 (overlay, conf.enable_ipv6);

/* Ensure that tbon.interface-hint is set.
*/
if (overlay_set_tbon_interface_hint (overlay, NULL) < 0) {
log_err ("error setting tbon.interface-hint attribute");
goto error;

Check warning on line 606 in src/broker/boot_config.c

View check run for this annotation

Codecov / codecov/patch

src/broker/boot_config.c#L605-L606

Added lines #L605 - L606 were not covered by tests
}

/* If broker has "downstream" peers, determine the URI to bind to
* from the config and tell overlay. Also, set the tbon.endpoint
* attribute to the URI peers will connect to. If broker has no
Expand Down
64 changes: 47 additions & 17 deletions src/broker/boot_pmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,27 +36,46 @@
* the booting instance at what "level" it will be running, i.e. the
* number of parents. If the PMI key is missing, this is not an error,
* instead the level of this instance is considered to be zero.
* Additionally, if level > 0, the shell will have put the instance's
* jobid in the PMI kvsname for us as well, so populate the 'jobid' attr.
*/
static int set_instance_level_attr (struct upmi *upmi,
const char *name,
attr_t *attrs)
attr_t *attrs,
bool *under_flux)
{
int rc;
char *val = NULL;
const char *level = "0";
const char *jobid = NULL;
int rc = -1;

if (upmi_get (upmi, "flux.instance-level", -1, &val, NULL) == 0) {
level = val;
jobid = name;
}
rc = attr_add (attrs, "instance-level", level, ATTR_IMMUTABLE);
free (val);
if (rc < 0 || attr_add (attrs, "jobid", jobid, ATTR_IMMUTABLE) < 0)
return -1;
return 0;
(void)upmi_get (upmi, "flux.instance-level", -1, &val, NULL);
if (attr_add (attrs, "instance-level", val ? val : "0", ATTR_IMMUTABLE) < 0)
goto error;

Check warning on line 49 in src/broker/boot_pmi.c

View check run for this annotation

Codecov / codecov/patch

src/broker/boot_pmi.c#L49

Added line #L49 was not covered by tests
*under_flux = val ? true : false;
rc = 0;
error:
ERRNO_SAFE_WRAP (free, val);
return rc;
}

/* If the tbon.interface-hint broker attr is not already set, set it.
* If running under Flux, use the value, if any, placed in PMI KVS by
* the enclsoing instance. Otherwise, set a default value.
*/
static int set_tbon_interface_hint_attr (struct upmi *upmi,
attr_t *attrs,
struct overlay *ov,
bool under_flux)
{
char *val = NULL;
int rc = -1;

if (attr_get (attrs, "tbon.interface-hint", NULL, NULL) == 0)
return 0;
if (under_flux)
(void)upmi_get (upmi, "flux.tbon-interface-hint", -1, &val, NULL);
if (overlay_set_tbon_interface_hint (ov, val) < 0)
goto error;

Check warning on line 74 in src/broker/boot_pmi.c

View check run for this annotation

Codecov / codecov/patch

src/broker/boot_pmi.c#L74

Added line #L74 was not covered by tests
rc = 0;
error:
ERRNO_SAFE_WRAP (free, val);
return rc;
}

static char *pmi_mapping_to_taskmap (const char *s)
Expand Down Expand Up @@ -241,6 +260,7 @@
int i;
int upmi_flags = UPMI_LIBPMI_NOFLUX;
const char *upmi_method;
bool under_flux;

// N.B. overlay_create() sets the tbon.topo attribute
if (attr_get (attrs, "tbon.topo", &topo_uri, NULL) < 0) {
Expand All @@ -264,10 +284,20 @@
upmi_destroy (upmi);
return -1;
}
if (set_instance_level_attr (upmi, info.name, attrs) < 0) {
if (set_instance_level_attr (upmi, attrs, &under_flux) < 0) {
log_err ("set_instance_level_attr");
goto error;
}
if (under_flux) {
if (attr_add (attrs, "jobid", info.name, ATTR_IMMUTABLE) < 0) {
log_err ("error setting jobid attribute");
goto error;

Check warning on line 294 in src/broker/boot_pmi.c

View check run for this annotation

Codecov / codecov/patch

src/broker/boot_pmi.c#L293-L294

Added lines #L293 - L294 were not covered by tests
}
}
if (set_tbon_interface_hint_attr (upmi, attrs, overlay, under_flux) < 0) {
log_err ("error setting tbon.interface-hint attribute");
goto error;

Check warning on line 299 in src/broker/boot_pmi.c

View check run for this annotation

Codecov / codecov/patch

src/broker/boot_pmi.c#L298-L299

Added lines #L298 - L299 were not covered by tests
}
if (set_broker_mapping_attr (upmi, info.size, attrs) < 0) {
log_err ("error setting broker.mapping attribute");
goto error;
Expand Down
81 changes: 45 additions & 36 deletions src/broker/overlay.c
Original file line number Diff line number Diff line change
Expand Up @@ -2134,51 +2134,64 @@
return -1;
}

/* This is called from boot_*.c with a default value to set if the attribute
* is not set already.
*/
int overlay_set_tbon_interface_hint (struct overlay *ov, const char *val)
{
if (attr_get (ov->attrs, "tbon.interface-hint", NULL, NULL) == 0)
return 0;
if (!val)
val = default_interface_hint;
return attr_add (ov->attrs, "tbon.interface-hint", val, 0);
}

/* Set attribute with the following precedence:
* 1. broker attribute
* 2. TOML config
* 3. legacy environment variables
* Leave it unset if none of those are available.
* The bootstrap methods set it later, but only if not already set.
*/
static int overlay_configure_interface_hint (struct overlay *ov,
const char *table,
const char *name,
const char *default_value)
const char *name)
{
char long_name[128];
const char *val;
const char *s;
const char *val = NULL;
const char *config_val = NULL;
const char *attr_val = NULL;
int flags;
const flux_conf_t *cf;
flux_error_t error;

(void)snprintf (long_name, sizeof (long_name), "%s.%s", table, name);

/* Take initial value from compiled-in default or legacy
* environment variable settings.
*/
if ((s = getenv ("FLUX_IPADDR_INTERFACE")))
val = s;
else if (getenv ("FLUX_IPADDR_HOSTNAME"))
val = "hostname";
else
val = default_value;

/* Override with config file settings, if any.
*/
if ((cf = flux_get_conf (ov->h))) {
flux_error_t error;

s = NULL;
if (flux_conf_unpack (flux_get_conf (ov->h),
if (flux_conf_unpack (cf,
&error,
"{s?{s?s}}",
table,
name, &s) < 0) {
name, &config_val) < 0) {
log_msg ("Config file error [%s]: %s", table, error.text);
return -1;
}
if (s)
val = s;
}
/* Override with broker attribute, if any.
* Then set broker attribute to reflect current value.
*/
if (overlay_configure_attr (ov->attrs, long_name, val, NULL) < 0) {
log_err ("Error setting %s attribute", long_name);
return -1;
(void)snprintf (long_name, sizeof (long_name), "%s.%s", table, name);
(void)attr_get (ov->attrs, long_name, &attr_val, &flags);

if (attr_val)
val = attr_val;
else if (config_val)
val = config_val;
else if ((val = getenv ("FLUX_IPADDR_INTERFACE")))
;
else if (getenv ("FLUX_IPADDR_HOSTNAME"))
val = "hostname";

Check warning on line 2188 in src/broker/overlay.c

View check run for this annotation

Codecov / codecov/patch

src/broker/overlay.c#L2188

Added line #L2188 was not covered by tests

if (val && !attr_val) {
if (attr_add (ov->attrs, long_name, val, 0) < 0) {
log_err ("Error setting %s attribute value", long_name);
return -1;

Check warning on line 2193 in src/broker/overlay.c

View check run for this annotation

Codecov / codecov/patch

src/broker/overlay.c#L2192-L2193

Added lines #L2192 - L2193 were not covered by tests
}
}
return 0;
}
Expand Down Expand Up @@ -2502,10 +2515,7 @@
goto nomem;
if (overlay_configure_attr_int (ov->attrs, "tbon.prefertcp", 0, NULL) < 0)
goto error;
if (overlay_configure_interface_hint (ov,
"tbon",
"interface-hint",
default_interface_hint) < 0)
if (overlay_configure_interface_hint (ov, "tbon", "interface-hint") < 0)
goto error;
if (overlay_configure_torpid (ov) < 0)
goto error;
Expand Down Expand Up @@ -2567,7 +2577,6 @@
return NULL;
}


/*
* vi:tabstop=4 shiftwidth=4 expandtab
*/
1 change: 1 addition & 0 deletions src/broker/overlay.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ const char *overlay_get_uuid (struct overlay *ov);
bool overlay_uuid_is_parent (struct overlay *ov, const char *uuid);
bool overlay_uuid_is_child (struct overlay *ov, const char *uuid);
void overlay_set_ipv6 (struct overlay *ov, int enable);
int overlay_set_tbon_interface_hint (struct overlay *ov, const char *val);

/* Return an idset of critical ranks, i.e. non-leaf brokers
*/
Expand Down
11 changes: 11 additions & 0 deletions src/shell/pmi/pmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,16 @@
return 0;
}

static int set_flux_tbon_interface_hint (struct shell_pmi *pmi)
{
const char *hint;

if (!(hint = flux_attr_get (pmi->shell->h, "tbon.interface-hint")))
return 0;

Check warning on line 431 in src/shell/pmi/pmi.c

View check run for this annotation

Codecov / codecov/patch

src/shell/pmi/pmi.c#L431

Added line #L431 was not covered by tests
put_dict (pmi->locals, "flux.tbon-interface-hint", hint);
return 0;
}

static int set_flux_instance_level (struct shell_pmi *pmi)
{
char *p;
Expand Down Expand Up @@ -575,6 +585,7 @@
if (!nomap && init_clique (pmi) < 0)
goto error;
if (set_flux_instance_level (pmi) < 0
|| set_flux_tbon_interface_hint (pmi) < 0
|| (!nomap && set_flux_taskmap (pmi) < 0))
goto error;
return pmi;
Expand Down
1 change: 1 addition & 0 deletions t/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ TESTSCRIPTS = \
t0016-cron-faketime.t \
t0017-security.t \
t0018-content-files.t \
t0019-tbon-config.t \
t0020-terminus.t \
t0021-archive-cmd.t \
t0022-jj-reader.t \
Expand Down
Loading
Loading