From d7e5d496689c104a8c599c43dc472b149f8baa08 Mon Sep 17 00:00:00 2001 From: "Mark A. Grondona" Date: Tue, 14 Jun 2022 16:49:59 -0700 Subject: [PATCH] shell: affinity: use cached hwloc XML Problem: Loading hwloc topology can be very slow, especially on a system with many cores and when possibly many processes are trying to simultaneously call hwloc_topology_load(3). This can occur when many short running jobs are being launched by Flux, since the job shell loads topology by default in the affinity plugin. Since the job shell now caches the hwloc XML in the shell info object, fetch this XML and use it to load topology, avoiding redundant scans of ths sytem. This may greatly improve job throughput on many core systems. Fixes #4365 --- src/shell/affinity.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/shell/affinity.c b/src/shell/affinity.c index 98602fd66224..7f14289811ff 100644 --- a/src/shell/affinity.c +++ b/src/shell/affinity.c @@ -158,10 +158,30 @@ static void shell_affinity_destroy (void *arg) /* Initialize topology object for affinity processing. */ -static int shell_affinity_topology_init (struct shell_affinity *sa) +static int shell_affinity_topology_init (flux_shell_t *shell, + struct shell_affinity *sa) { + const char *xml; + + /* Fetch hwloc XML cached in job shell to avoid heavyweight + * hwloc topology load (Issue #4365) + */ + if (flux_shell_info_unpack (shell, "{s:s}", "hwloc", &xml) < 0) + return shell_log_errno ("failed to unpack hwloc object"); + if (hwloc_topology_init (&sa->topo) < 0) return shell_log_errno ("hwloc_topology_init"); + + if (hwloc_topology_set_xmlbuffer (sa->topo, xml, strlen (xml)) < 0) + return shell_log_errno ("hwloc_topology_set_xmlbuffer"); + + /* Tell hwloc that our XML loaded topology is from this system, + * O/w hwloc CPU binding will not work. + */ + if (hwloc_topology_set_flags (sa->topo, + HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM) < 0) + return shell_log_errno ("hwloc_topology_set_flags"); + if (hwloc_topology_load (sa->topo) < 0) return shell_log_errno ("hwloc_topology_load"); if (topology_restrict_current (sa->topo) < 0) @@ -178,7 +198,7 @@ static struct shell_affinity * shell_affinity_create (flux_shell_t *shell) struct shell_affinity *sa = calloc (1, sizeof (*sa)); if (!sa) return NULL; - if (shell_affinity_topology_init (sa) < 0) + if (shell_affinity_topology_init (shell, sa) < 0) goto err; if (flux_shell_rank_info_unpack (shell, -1,