Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for hwloc 2.0+ #677

Merged
merged 10 commits into from
Jul 14, 2020
Merged
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ matrix:
- ARGS="--prefix=/usr"
- TEST_INSTALL=t
- DOCKER_TAG=t
- name: "Ubuntu: 20.04"
compiler: gcc
env:
- ARGS="--prefix=/usr"
- IMG=focal
- DOCKER_TAG=t
- name: "Centos 7: docker-deploy"
compiler: gcc
env:
Expand Down
32 changes: 29 additions & 3 deletions resource/readers/resource_reader_hwloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ vtx_t resource_reader_hwloc_t::add_new_vertex (resource_graph_t &g,

void resource_reader_hwloc_t::walk_hwloc (resource_graph_t &g,
resource_graph_metadata_t &m,
const hwloc_topology_t topo,
const hwloc_obj_t obj,
const vtx_t parent, int rank)
{
Expand Down Expand Up @@ -280,8 +281,9 @@ void resource_reader_hwloc_t::walk_hwloc (resource_graph_t &g,
g[e].name[subsys] = rev_relation;
}

for (unsigned int i = 0; i < obj->arity; i++) {
walk_hwloc (g, m, obj->children[i], valid_ancestor, rank);
hwloc_obj_t curr_child = NULL;
while ((curr_child = hwloc_get_next_child (topo, obj, curr_child)) != NULL) {
walk_hwloc (g, m, topo, curr_child, valid_ancestor, rank);
}
}

Expand All @@ -300,11 +302,35 @@ int resource_reader_hwloc_t::unpack_internal (resource_graph_t &g,
m_err_msg += "Error initializing hwloc topology; ";
goto done;
}
#if HWLOC_API_VERSION < 0x20000
if ( hwloc_topology_set_flags (topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES) != 0) {
errno = EINVAL;
m_err_msg += "Error setting hwloc topology flag; ";
goto done;
}
#else
if (hwloc_topology_set_io_types_filter (topo,
HWLOC_TYPE_FILTER_KEEP_IMPORTANT)
< 0) {
errno = EINVAL;
m_err_msg += "hwloc_topology_set_io_types_filter; ";
goto done;
}
if (hwloc_topology_set_cache_types_filter (topo,
HWLOC_TYPE_FILTER_KEEP_STRUCTURE)
< 0) {
errno = EINVAL;
m_err_msg += "hwloc_topology_set_cache_types_filter; ";
goto done;
}
if (hwloc_topology_set_icache_types_filter (topo,
HWLOC_TYPE_FILTER_KEEP_STRUCTURE)
< 0) {
errno = EINVAL;
m_err_msg += "hwloc_topology_set_icache_types_filter; ";
goto done;
}
#endif
if ( hwloc_topology_set_xmlbuffer (topo, str.c_str (), len) != 0) {
errno = EINVAL;
m_err_msg += "Error setting xmlbuffer; ";
Expand All @@ -317,7 +343,7 @@ int resource_reader_hwloc_t::unpack_internal (resource_graph_t &g,
}

hwloc_root = hwloc_get_root_obj (topo);
walk_hwloc (g, m, hwloc_root, vtx, rank);
walk_hwloc (g, m, topo, hwloc_root, vtx, rank);
hwloc_topology_destroy (topo);
rc = 0;

Expand Down
3 changes: 2 additions & 1 deletion resource/readers/resource_reader_hwloc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ class resource_reader_hwloc_t : public resource_reader_base_t {
const std::string &subsys, const std::string &type,
const std::string &basename, int size, int rank = -1);
void walk_hwloc (resource_graph_t &g, resource_graph_metadata_t &m,
const hwloc_obj_t obj, const vtx_t parent, int rank);
const hwloc_topology_t topo, const hwloc_obj_t obj,
const vtx_t parent, int rank);
int unpack_internal (resource_graph_t &g, resource_graph_metadata_t &m,
vtx_t &vtx, const std::string &str, int rank = -1);
};
Expand Down
29 changes: 29 additions & 0 deletions src/test/docker/focal/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
FROM fluxrm/flux-core:focal

ARG USER=flux
ARG UID=1000

# Install extra buildrequires for flux-sched:
RUN sudo apt-get update
RUN sudo apt-get -qq install -y --no-install-recommends \
libboost-graph-dev \
libboost-system-dev \
libboost-filesystem-dev \
libboost-regex-dev \
libxml2-dev \
python-yaml \
libyaml-cpp-dev

# Add configured user to image with sudo access:
#
RUN \
if test "$USER" != "flux"; then \
sudo groupadd -g $UID $USER \
&& sudo useradd -g $USER -u $UID -d /home/$USER -m $USER \
&& sudo sh -c "printf \"$USER ALL= NOPASSWD: ALL\\n\" >> /etc/sudoers" \
&& sudo adduser $USER sudo ; \
fi

USER $USER
WORKDIR /home/$USER
RUN flux keygen
1,197 changes: 534 additions & 663 deletions t/data/hwloc-data/004N/exclusive/04-brokers-sierra2/0.xml

Large diffs are not rendered by default.

1,197 changes: 534 additions & 663 deletions t/data/hwloc-data/004N/exclusive/04-brokers-sierra2/1.xml

Large diffs are not rendered by default.

1,203 changes: 536 additions & 667 deletions t/data/hwloc-data/004N/exclusive/04-brokers-sierra2/2.xml

Large diffs are not rendered by default.

1,203 changes: 536 additions & 667 deletions t/data/hwloc-data/004N/exclusive/04-brokers-sierra2/3.xml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion t/data/resource/commands/basics/cmds10.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 2x slot[1]->numanode[1]->gpu[2]
# 2x slot[1]->gpu[2]
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test010.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test010.yaml
quit
2 changes: 1 addition & 1 deletion t/data/resource/commands/basics/cmds11.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# 2x slot[2]->numanode[1]->gpu[1]
# 2x slot[2]->socket[1]->gpu[1]
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test012.yaml
match allocate @TEST_SRCDIR@/data/resource/jobspecs/basics/test012.yaml
quit
32 changes: 8 additions & 24 deletions t/data/resource/expected/basics/017.R.out
Original file line number Diff line number Diff line change
@@ -1,48 +1,32 @@
------------------------core3[1:x]
---------------------L1cache3[32:s]
------------------L2cache3[256:s]
---------------L3cache0[20480:s]
------------socket0[1:s]
---------numanode0[1:s]
------------core3[1:x]
---------socket0[1:s]
------cab1251[1:s]
---cluster0[1:s]
INFO: =============================
INFO: JOBID=1
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
------------------------core2[1:x]
---------------------L1cache2[32:s]
------------------L2cache2[256:s]
---------------L3cache0[20480:s]
------------socket0[1:s]
---------numanode0[1:s]
------------core2[1:x]
---------socket0[1:s]
------cab1251[1:s]
---cluster0[1:s]
INFO: =============================
INFO: JOBID=2
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
------------------------core1[1:x]
---------------------L1cache1[32:s]
------------------L2cache1[256:s]
---------------L3cache0[20480:s]
------------socket0[1:s]
---------numanode0[1:s]
------------core1[1:x]
---------socket0[1:s]
------cab1251[1:s]
---cluster0[1:s]
INFO: =============================
INFO: JOBID=3
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
------------------------core0[1:x]
---------------------L1cache0[32:s]
------------------L2cache0[256:s]
---------------L3cache0[20480:s]
------------socket0[1:s]
---------numanode0[1:s]
------------core0[1:x]
---------socket0[1:s]
------cab1251[1:s]
---cluster0[1:s]
INFO: =============================
Expand Down
14 changes: 6 additions & 8 deletions t/data/resource/expected/basics/018.R.out
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
---------------gpu2[1:x]
---------------gpu3[1:x]
------------numanode1[1:s]
---------group0[1:s]
------------gpu2[1:x]
------------gpu3[1:x]
---------socket1[1:s]
------sierra3682[1:s]
---cluster0[1:s]
INFO: =============================
INFO: JOBID=1
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
---------------gpu0[1:x]
---------------gpu1[1:x]
------------numanode0[1:s]
---------group0[1:s]
------------gpu0[1:x]
------------gpu1[1:x]
---------socket0[1:s]
------sierra3682[1:s]
---cluster0[1:s]
INFO: =============================
Expand Down
12 changes: 4 additions & 8 deletions t/data/resource/expected/basics/019.R.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
---------------gpu2[1:x]
------------numanode5[1:x]
---------------gpu3[1:x]
------------numanode7[1:x]
------------gpu2[1:x]
------------gpu3[1:x]
---------socket1[1:s]
------corona11[1:s]
---cluster0[1:s]
Expand All @@ -10,10 +8,8 @@ INFO: JOBID=1
INFO: RESOURCES=ALLOCATED
INFO: SCHEDULED AT=Now
INFO: =============================
---------------gpu0[1:x]
------------numanode1[1:x]
---------------gpu1[1:x]
------------numanode2[1:x]
------------gpu0[1:x]
------------gpu1[1:x]
---------socket0[1:s]
------corona11[1:s]
---cluster0[1:s]
Expand Down
6 changes: 2 additions & 4 deletions t/data/resource/expected/basics/020.R.out
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
---------------gpu0[1:x]
------------numanode0[1:x]
---------------gpu0[1:x]
------------numanode1[1:x]
------------gpu0[1:x]
------------gpu0[1:x]
---------socket0[1:s]
------chimera[1:s]
---cluster0[1:s]
Expand Down
8 changes: 4 additions & 4 deletions t/data/resource/expected/basics/021.R.out
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
------------core41[1:x]
------------core42[1:x]
------------core43[1:x]
------------gpu2[1:x]
------------gpu3[1:x]
---------socket1[1:x]
---------gpu2[1:x]
---------gpu3[1:x]
------sierra3682[1:s]
---cluster0[1:s]
INFO: =============================
Expand Down Expand Up @@ -52,9 +52,9 @@ INFO: =============================
------------core19[1:x]
------------core20[1:x]
------------core21[1:x]
------------gpu0[1:x]
------------gpu1[1:x]
---------socket0[1:x]
---------gpu0[1:x]
---------gpu1[1:x]
------sierra3682[1:s]
---cluster0[1:s]
INFO: =============================
Expand Down
5 changes: 1 addition & 4 deletions t/data/resource/jobspecs/basics/test009.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,8 @@ resources:
count: 2
label: default
with:
- type: numanode
- type: socket
count: 1
with:
- type: socket
count: 1

# a comment
attributes:
Expand Down
10 changes: 5 additions & 5 deletions t/data/resource/jobspecs/basics/test012.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
version: 1
resources:
- type: slot
count: 2
label: default
- type: socket
count: 1
with:
- type: numanode
count: 1
- type: slot
count: 2
label: default
with:
- type: gpu
count: 1
Expand Down
4 changes: 2 additions & 2 deletions t/data/resource/jobspecs/basics/test013.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ resources:
with:
- type: core
count: 22
- type: gpu
count: 2
- type: gpu
count: 2
# a comment
attributes:
system:
Expand Down
7 changes: 4 additions & 3 deletions t/scripts/flux-ion-resource.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/false
#
# Run script as `flux ion-resource` with properly configured
# FLUX_EXEC_PATH or `flux python flux-ion-resource` if not to
Expand Down Expand Up @@ -78,7 +79,7 @@ def rpc_get_property (self, gp_resource_path, gp_key):
"""
def match_alloc_action (args):
with open (args.jobspec, 'r') as stream:
jobspec_str = yaml.dump (yaml.load (stream))
jobspec_str = yaml.dump (yaml.safe_load (stream))
SteVwonder marked this conversation as resolved.
Show resolved Hide resolved
r = ResourceModuleInterface ()
resp = r.rpc_allocate (r.rpc_next_jobid (), jobspec_str)
print (heading ())
Expand All @@ -92,7 +93,7 @@ def match_alloc_action (args):
"""
def match_alloc_sat_action (args):
with open (args.jobspec, 'r') as stream:
jobspec_str = yaml.dump (yaml.load (stream))
jobspec_str = yaml.dump (yaml.safe_load (stream))
r = ResourceModuleInterface ()
resp = r.rpc_allocate_with_satisfiability (r.rpc_next_jobid (),
jobspec_str)
Expand All @@ -107,7 +108,7 @@ def match_alloc_sat_action (args):
"""
def match_reserve_action (args):
with open (args.jobspec, 'r') as stream:
jobspec_str = yaml.dump (yaml.load (stream))
jobspec_str = yaml.dump (yaml.safe_load (stream))
r = ResourceModuleInterface ()
resp = r.rpc_reserve (r.rpc_next_jobid (), jobspec_str)
print (heading ())
Expand Down
1 change: 1 addition & 0 deletions t/scripts/flux-jsonschemalint.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/bin/false
#
# Run script as `flux jsonschemalint` with properly configured
# FLUX_EXEC_PATH or `flux python flux-jsonschemalint` if not to
Expand Down
4 changes: 2 additions & 2 deletions t/t2000-tree-basic.t
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ test_expect_success 'flux-tree: multi cmdline works' '
FLUX_TREE_NCORES_PER_NODE=1
FLUX_TREE_NGPUS_PER_NODE=0
FLUX_TREE_NNODES=1
eval python hostname.py
eval flux python hostname.py
EOF
flux tree --dry-run --leaf -N 1 -c 1 python hostname.py > out.01.2 &&
flux tree --dry-run --leaf -N 1 -c 1 flux python hostname.py > out.01.2 &&
remove_prefix out.01.2 out.01.2.a &&
sed -i "s/[ \t]*$//g" out.01.2.a &&
test_cmp cmp.01.2 out.01.2.a
Expand Down
Loading