From 3e63a3458c2077c07e94c763fae127f71614d627 Mon Sep 17 00:00:00 2001 From: Todd Kordenbrock Date: Thu, 24 Sep 2015 19:19:06 -0500 Subject: [PATCH] portals4: add support for dynamic add_procs() to all Portals4 components In the default mode of operation, the Portals4 components support dynamic add_procs(). The Portals4 components have two alternate modes (flow control and logical-to-physical) that require knowledge of all procs at startup. In these modes, mtl-portals4 sets the MCA_MTL_BASE_FLAG_REQUIRE_WORLD flag and btl-portals4 sets the MCA_BTL_FLAGS_SINGLE_ADD_PROCS flag to tell the PML that we need all the procs in one add_procs() call. --- ompi/mca/coll/portals4/coll_portals4.h | 8 +- ompi/mca/mtl/portals4/mtl_portals4.c | 282 +++++++++++++----- ompi/mca/mtl/portals4/mtl_portals4.h | 31 ++ .../mca/mtl/portals4/mtl_portals4_component.c | 8 + ompi/mca/mtl/portals4/mtl_portals4_endpoint.h | 10 + ompi/mca/mtl/portals4/mtl_portals4_probe.c | 5 +- ompi/mca/mtl/portals4/mtl_portals4_recv.c | 3 +- ompi/mca/mtl/portals4/mtl_portals4_send.c | 3 +- ompi/mca/osc/portals4/osc_portals4.h | 12 +- .../osc/portals4/osc_portals4_active_target.c | 2 - ompi/mca/osc/portals4/osc_portals4_comm.c | 2 - .../portals4/osc_portals4_passive_target.c | 2 - opal/mca/btl/portals4/btl_portals4.c | 267 ++++++++++++----- opal/mca/btl/portals4/btl_portals4.h | 3 + .../mca/btl/portals4/btl_portals4_component.c | 11 + 15 files changed, 483 insertions(+), 166 deletions(-) diff --git a/ompi/mca/coll/portals4/coll_portals4.h b/ompi/mca/coll/portals4/coll_portals4.h index 9aa370dc13a..c23b4d088b2 100644 --- a/ompi/mca/coll/portals4/coll_portals4.h +++ b/ompi/mca/coll/portals4/coll_portals4.h @@ -29,6 +29,8 @@ #include "ompi/communicator/communicator.h" #include "ompi/mca/coll/base/base.h" +#include "ompi/mca/mtl/portals4/mtl_portals4.h" + BEGIN_C_DECLS #define COLL_PORTALS4_NO_OP ((ptl_op_t)-1) @@ -178,11 +180,7 @@ ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *re static inline ptl_process_t ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank) { - ompi_proc_t *proc = ompi_comm_peer_lookup(comm, rank); - if (proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] == NULL) { - printf("ompi_coll_portals4_get_peer failure\n"); - } - return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + return ompi_mtl_portals4_get_peer(comm, rank); } diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index e80e3fe2c27..41a9a6d6652 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -22,6 +22,7 @@ #include +#include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" #include "ompi/mca/mtl/mtl.h" #include "opal/class/opal_list.h" @@ -241,45 +242,26 @@ portals4_init_interface(void) return OMPI_ERROR; } -int -ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, - size_t nprocs, - struct ompi_proc_t** procs) +static int +create_maptable(size_t nprocs, + ompi_proc_t **procs) { - int ret, me; + int ret; size_t i; - bool new_found = false; ptl_process_t *maptable; - if (ompi_mtl_portals4.use_logical) { - maptable = malloc(sizeof(ptl_process_t) * nprocs); - if (NULL == maptable) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: malloc failed\n", - __FILE__, __LINE__); - return OMPI_ERR_OUT_OF_RESOURCE; - } + maptable = malloc(sizeof(ptl_process_t) * nprocs); + if (NULL == maptable) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: malloc failed\n", + __FILE__, __LINE__); + return OMPI_ERR_OUT_OF_RESOURCE; } - /* Get the list of ptl_process_id_t from the runtime and copy into structure */ - for (i = 0 ; i < nprocs ; ++i) { + for (i=0;isuper.proc_arch != ompi_proc_local()->super.proc_arch) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Portals 4 MTL does not support heterogeneous operations."); - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "Proc %s architecture %x, mine %x.", - OMPI_NAME_PRINT(&procs[i]->super.proc_name), - procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch); - return OMPI_ERR_NOT_SUPPORTED; - } - OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size); if (OMPI_SUCCESS != ret) { @@ -294,40 +276,159 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, return OMPI_ERR_BAD_PARAM; } - if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { - ptl_process_t *peer_id; - peer_id = malloc(sizeof(ptl_process_t)); - if (NULL == peer_id) { + maptable[i].phys.pid = modex_id->phys.pid; + maptable[i].phys.nid = modex_id->phys.nid; + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "logical: global rank=%d pid=%d nid=%d\n", + (int)i, maptable[i].phys.pid, maptable[i].phys.nid); + } + + ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: logical mapping failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "logical mapping OK\n"); + + free(maptable); + + return OMPI_SUCCESS; +} + +static int +create_endpoint(ompi_proc_t *proc) +{ + ptl_process_t *endpoint; + + endpoint = malloc(sizeof(ptl_process_t)); + if (NULL == endpoint) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: malloc failed: %s\n", + __FILE__, __LINE__, strerror(errno)); + return OMPI_ERR_OUT_OF_RESOURCE; + } else { + if (ompi_mtl_portals4.use_logical) { + endpoint->rank = proc->super.proc_name.vpid; + } else { + int ret; + ptl_process_t *modex_id; + size_t size; + + OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, + &proc->super.proc_name, (uint8_t**)&modex_id, &size); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_modex_recv failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } else if (sizeof(ptl_process_t) != size) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: malloc failed: %d\n", + "%s:%d: ompi_modex_recv failed (size mismatch): %d\n", __FILE__, __LINE__, ret); - return OMPI_ERR_OUT_OF_RESOURCE; + return OMPI_ERR_BAD_PARAM; } - if (ompi_mtl_portals4.use_logical) { - peer_id->rank = i; - maptable[i].phys.pid = modex_id->phys.pid; - maptable[i].phys.nid = modex_id->phys.nid; - opal_output_verbose(50, ompi_mtl_base_framework.framework_output, - "logical: global rank=%d pid=%d nid=%d\n", - (int)i, maptable[i].phys.pid, maptable[i].phys.nid); - } else { - *peer_id = *modex_id; + + *endpoint = *modex_id; + } + } + + proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = endpoint; + + return OMPI_SUCCESS; +} + +ompi_proc_t * +ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank) +{ + int ret; + + ompi_proc_t *proc = ompi_group_peer_lookup (group, rank); + if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { + ret = create_endpoint(proc); + if (OMPI_SUCCESS != ret) { + return NULL; + } +#if 0 + } else { + /* + * sanity check + */ + int ret; + ptl_process_t *modex_id; + size_t size; + + OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, + &proc->super.proc_name, (uint8_t**)&modex_id, &size); + + ptl_process_t *peer = (ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; + if (ompi_mtl_portals4.use_logical) { + if ((size_t)peer->rank != proc->super.proc_name.vpid) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: existing peer and rank don't match\n", + __FILE__, __LINE__); + return OMPI_ERROR; } + } + else if (peer->phys.nid != modex_id->phys.nid || + peer->phys.pid != modex_id->phys.pid) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: existing peer and modex peer don't match\n", + __FILE__, __LINE__); + return OMPI_ERROR; + } +#endif + } - procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4] = peer_id; + return proc; +} + +static int +add_endpoints(size_t nprocs, + ompi_proc_t **procs) +{ + int ret; + size_t i; - new_found = true; + /* Get the list of ptl_process_id_t from the runtime and copy into structure */ + for (i = 0 ; i < nprocs ; ++i) { + if (procs[i]->super.proc_arch != ompi_proc_local()->super.proc_arch) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Portals 4 MTL does not support heterogeneous operations."); + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "Proc %s architecture %x, mine %x.", + OMPI_NAME_PRINT(&procs[i]->super.proc_name), + procs[i]->super.proc_arch, ompi_proc_local()->super.proc_arch); + return OMPI_ERR_NOT_SUPPORTED; + } + + if (NULL == procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { + ret = create_endpoint(procs[i]); + if (OMPI_SUCCESS != ret) { + return ret; + } +#if 0 } else { + /* + * sanity check + */ + int ret; + ptl_process_t *modex_id; + size_t size; + + OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, + &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size); + ptl_process_t *proc = (ptl_process_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; if (ompi_mtl_portals4.use_logical) { - if ((size_t)proc->rank != i) { + if ((size_t)proc->rank != procs[i]->super.proc_name.vpid) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: existing peer and rank don't match\n", __FILE__, __LINE__); return OMPI_ERROR; } - maptable[i].phys.pid = modex_id->phys.pid; - maptable[i].phys.nid = modex_id->phys.nid; } else if (proc->phys.nid != modex_id->phys.nid || proc->phys.pid != modex_id->phys.pid) { @@ -336,45 +437,82 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, __FILE__, __LINE__); return OMPI_ERROR; } +#endif } } - if (ompi_mtl_portals4.use_logical) { - ret = PtlSetMap(ompi_mtl_portals4.ni_h, nprocs, maptable); - if (OMPI_SUCCESS != ret) { - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: logical mapping failed: %d\n", - __FILE__, __LINE__, ret); - return ret; - } - opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "logical mapping OK\n"); - free(maptable); - } + return OMPI_SUCCESS; +} - portals4_init_interface(); +#define NEED_ALL_PROCS (ompi_mtl_portals4.use_logical || ompi_mtl_portals4.use_flowctl) - /* activate progress callback */ - ret = opal_progress_register(ompi_mtl_portals4_progress); +int +ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, + size_t nprocs, + struct ompi_proc_t** procs) +{ + int ret; + + /* + * The PML handed us a list of procs that need Portals4 + * peer info. Complete those procs here. + */ + ret = add_endpoints(nprocs, + procs); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, - "%s:%d: opal_progress_register failed: %d\n", + "%s:%d: add_endpoints failed: %d\n", __FILE__, __LINE__, ret); return ret; } + if (1 == ompi_mtl_portals4.need_init) { + if (1 == ompi_mtl_portals4.use_logical) { + ret = create_maptable(nprocs, procs); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: ompi_mtl_portals4_add_procs::create_maptable() failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + } + + /* + * This is the first time through here. Initialize + * Portals4 and register the progress thread. + */ + portals4_init_interface(); + + /* activate progress callback */ + ret = opal_progress_register(ompi_mtl_portals4_progress); + if (OMPI_SUCCESS != ret) { + opal_output_verbose(1, ompi_mtl_base_framework.framework_output, + "%s:%d: opal_progress_register failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + #if OMPI_MTL_PORTALS4_FLOW_CONTROL - if (new_found) { - ret = ompi_mtl_portals4_flowctl_add_procs(me, nprocs, procs); + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "add_procs() - me=%d\n", ompi_proc_local_proc->super.proc_name.vpid); + + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "add_procs() - adding flowctl procs\n"); + + ret = ompi_mtl_portals4_flowctl_add_procs(ompi_proc_local_proc->super.proc_name.vpid, + nprocs, + procs); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: flowctl_add_procs failed: %d\n", __FILE__, __LINE__, ret); return ret; } - } #endif + ompi_mtl_portals4.need_init = 0; + } + return OMPI_SUCCESS; } @@ -386,6 +524,9 @@ ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t *mtl, { size_t i; + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "del_procs() - enter\n"); + for (i = 0 ; i < nprocs ; ++i) { if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]) { free(procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); @@ -393,6 +534,9 @@ ompi_mtl_portals4_del_procs(struct mca_mtl_base_module_t *mtl, } } + opal_output_verbose(50, ompi_mtl_base_framework.framework_output, + "del_procs() - exit\n"); + return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4.h b/ompi/mca/mtl/portals4/mtl_portals4.h index 6d7fd596d56..88da025796d 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.h +++ b/ompi/mca/mtl/portals4/mtl_portals4.h @@ -26,9 +26,12 @@ #include "opal/class/opal_free_list.h" #include "opal/class/opal_list.h" #include "opal/datatype/opal_convertor.h" +#include "ompi/proc/proc.h" #include "ompi/mca/mtl/mtl.h" #include "ompi/mca/mtl/base/base.h" +#include "ompi/communicator/communicator.h" + #include "mtl_portals4_flowctl.h" BEGIN_C_DECLS @@ -38,8 +41,13 @@ struct mca_mtl_portals4_send_request_t; struct mca_mtl_portals4_module_t { mca_mtl_base_module_t base; + /* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */ + int need_init; + /* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */ int use_logical; + /* Use flow control: 1 (true) : 0 (false) */ + int use_flowctl; /** Eager limit; messages greater than this use a rendezvous protocol */ unsigned long long eager_limit; @@ -209,6 +217,29 @@ extern mca_mtl_portals4_module_t ompi_mtl_portals4; #define MTL_PORTALS4_IS_SYNC_MSG(hdr_data) \ (0 != (MTL_PORTALS4_SYNC_MSG & hdr_data)) +/* mtl-portals4 helpers */ +OMPI_DECLSPEC ompi_proc_t * +ompi_mtl_portals4_get_proc_group(struct ompi_group_t *group, int rank); + +static inline ptl_process_t +ompi_mtl_portals4_get_peer_group(struct ompi_group_t *group, int rank) +{ + return *((ptl_process_t*)ompi_mtl_portals4_get_proc_group(group, rank)); +} + +static inline ompi_proc_t * +ompi_mtl_portals4_get_proc(struct ompi_communicator_t *comm, int rank) +{ + return ompi_mtl_portals4_get_proc_group(comm->c_remote_group, rank); +} + +static inline ptl_process_t +ompi_mtl_portals4_get_peer(struct ompi_communicator_t *comm, int rank) +{ + return *((ptl_process_t*)ompi_mtl_portals4_get_proc(comm, rank)); +} + + /* MTL interface functions */ extern int ompi_mtl_portals4_finalize(struct mca_mtl_base_module_t *mtl); diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index 1d4d13cbbf7..b0ee9ae9b59 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -229,6 +229,14 @@ ompi_mtl_portals4_component_open(void) ompi_mtl_portals4.recv_idx = (ptl_pt_index_t) ~0UL; ompi_mtl_portals4.read_idx = (ptl_pt_index_t) ~0UL; + ompi_mtl_portals4.need_init=1; + +#if OMPI_MTL_PORTALS4_FLOW_CONTROL + ompi_mtl_portals4.use_flowctl=1; +#else + ompi_mtl_portals4.use_flowctl=0; +#endif + return OMPI_SUCCESS; } diff --git a/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h b/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h index 41d27246a52..5670c908a8c 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h +++ b/ompi/mca/mtl/portals4/mtl_portals4_endpoint.h @@ -25,4 +25,14 @@ struct mca_mtl_base_endpoint_t { }; typedef struct mca_mtl_base_endpoint_t mca_mtl_base_endpoint_t; +static inline mca_mtl_base_endpoint_t * +ompi_mtl_portals4_get_endpoint (struct mca_mtl_base_module_t* mtl, ompi_proc_t *ompi_proc) +{ + if (OPAL_UNLIKELY(NULL == ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4])) { + ompi_mtl_portals4_add_procs (mtl, 1, &ompi_proc); + } + + return ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]; +} + #endif diff --git a/ompi/mca/mtl/portals4/mtl_portals4_probe.c b/ompi/mca/mtl/portals4/mtl_portals4_probe.c index c58813edf3d..fbeda2124e7 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_probe.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_probe.c @@ -22,6 +22,7 @@ #include "ompi/message/message.h" #include "mtl_portals4.h" +#include "mtl_portals4_endpoint.h" #include "mtl_portals4_request.h" #include "mtl_portals4_message.h" @@ -78,7 +79,7 @@ ompi_mtl_portals4_iprobe(struct mca_mtl_base_module_t* mtl, remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); - remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, @@ -156,7 +157,7 @@ ompi_mtl_portals4_improbe(struct mca_mtl_base_module_t *mtl, remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); - remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_recv.c b/ompi/mca/mtl/portals4/mtl_portals4_recv.c index 190fbf7581f..de4b4834533 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_recv.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_recv.c @@ -29,6 +29,7 @@ #include "ompi/message/message.h" #include "mtl_portals4.h" +#include "mtl_portals4_endpoint.h" #include "mtl_portals4_request.h" #include "mtl_portals4_recv_short.h" #include "mtl_portals4_message.h" @@ -367,7 +368,7 @@ ompi_mtl_portals4_irecv(struct mca_mtl_base_module_t* mtl, remote_proc.rank = src; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); - remote_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + remote_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } MTL_PORTALS4_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, diff --git a/ompi/mca/mtl/portals4/mtl_portals4_send.c b/ompi/mca/mtl/portals4/mtl_portals4_send.c index 636584e86c4..4ee2e775322 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_send.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_send.c @@ -28,6 +28,7 @@ #include "ompi/mca/mtl/base/mtl_base_datatype.h" #include "mtl_portals4.h" +#include "mtl_portals4_endpoint.h" #include "mtl_portals4_request.h" #if OMPI_MTL_PORTALS4_FLOW_CONTROL #include "mtl_portals4_flowctl.h" @@ -405,7 +406,7 @@ ompi_mtl_portals4_send_start(struct mca_mtl_base_module_t* mtl, ptl_proc.rank = dest; } else { ompi_proc_t *ompi_proc = ompi_comm_peer_lookup(comm, dest); - ptl_proc = *((ptl_process_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + ptl_proc = *((ptl_process_t*) ompi_mtl_portals4_get_endpoint (mtl, ompi_proc)); } ret = ompi_mtl_datatype_pack(convertor, &start, &length, &free_after); diff --git a/ompi/mca/osc/portals4/osc_portals4.h b/ompi/mca/osc/portals4/osc_portals4.h index fcba31ffad1..4eb7eec0f6d 100644 --- a/ompi/mca/osc/portals4/osc_portals4.h +++ b/ompi/mca/osc/portals4/osc_portals4.h @@ -19,6 +19,8 @@ #include "ompi/group/group.h" #include "ompi/communicator/communicator.h" +#include "ompi/mca/mtl/portals4/mtl_portals4.h" + #define OSC_PORTALS4_MB_DATA 0x0000000000000000ULL #define OSC_PORTALS4_MB_CONTROL 0x1000000000000000ULL @@ -290,17 +292,15 @@ ompi_osc_portals4_complete_all(ompi_osc_portals4_module_t *module) } static inline ptl_process_t -ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank) +ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank) { - ompi_proc_t *proc = ompi_comm_peer_lookup(module->comm, rank); - return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + return ompi_mtl_portals4_get_peer_group(group, rank); } static inline ptl_process_t -ompi_osc_portals4_get_peer_group(struct ompi_group_t *group, int rank) +ompi_osc_portals4_get_peer(ompi_osc_portals4_module_t *module, int rank) { - ompi_proc_t *proc = ompi_group_get_proc_ptr(group, rank, true); - return *((ptl_process_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4]); + return ompi_osc_portals4_get_peer_group(module->comm->c_remote_group, rank); } #endif diff --git a/ompi/mca/osc/portals4/osc_portals4_active_target.c b/ompi/mca/osc/portals4/osc_portals4_active_target.c index 8d4b5057900..92b605fb15f 100644 --- a/ompi/mca/osc/portals4/osc_portals4_active_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_active_target.c @@ -15,8 +15,6 @@ #include "osc_portals4.h" -#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" - int ompi_osc_portals4_fence(int assert, struct ompi_win_t *win) diff --git a/ompi/mca/osc/portals4/osc_portals4_comm.c b/ompi/mca/osc/portals4/osc_portals4_comm.c index 1c9f6016d1a..4978435c2b0 100644 --- a/ompi/mca/osc/portals4/osc_portals4_comm.c +++ b/ompi/mca/osc/portals4/osc_portals4_comm.c @@ -21,8 +21,6 @@ #include "osc_portals4.h" #include "osc_portals4_request.h" -#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" - static int ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op) diff --git a/ompi/mca/osc/portals4/osc_portals4_passive_target.c b/ompi/mca/osc/portals4/osc_portals4_passive_target.c index e0cfc2e15e1..b39d4d904fe 100644 --- a/ompi/mca/osc/portals4/osc_portals4_passive_target.c +++ b/ompi/mca/osc/portals4/osc_portals4_passive_target.c @@ -18,8 +18,6 @@ #include "osc_portals4.h" -#include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h" - enum locktype_t { lock_nocheck, lock_exclusive, diff --git a/opal/mca/btl/portals4/btl_portals4.c b/opal/mca/btl/portals4/btl_portals4.c index 9cf02049486..6594e4c0d29 100644 --- a/opal/mca/btl/portals4/btl_portals4.c +++ b/opal/mca/btl/portals4/btl_portals4.c @@ -217,114 +217,228 @@ btl_portals4_init_interface(void) return OPAL_ERROR; } -int -mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, - size_t nprocs, - struct opal_proc_t **procs, - struct mca_btl_base_endpoint_t** btl_peer_data, - opal_bitmap_t* reachable) +static int +create_endpoint(int interface, + opal_proc_t *proc, + mca_btl_base_endpoint_t **endpoint) { - struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; int ret; - struct opal_proc_t *curr_proc = NULL; + size_t size; ptl_process_t *id; - size_t i, size; - bool need_activate = false; - ptl_process_t *maptable; - opal_output_verbose(50, opal_btl_base_framework.framework_output, - "mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d", (int) nprocs, - (int) portals4_btl->portals_num_procs, portals4_btl->interface_num); + OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, + &proc->proc_name, (void**) &id, &size); + + if (OPAL_ERR_NOT_FOUND == ret) { + OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, + "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); + return ret; + } else if (OPAL_SUCCESS != ret) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); + return ret; + } + if (size < sizeof(ptl_process_t)) { /* no available connection */ + return OPAL_ERROR; + } + if ((size % sizeof(ptl_process_t)) != 0) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: invalid format in modex"); + return OPAL_ERROR; + } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); - if (mca_btl_portals4_component.use_logical) { - maptable = malloc(sizeof(ptl_process_t) * nprocs); - if (NULL == maptable) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: malloc failed\n", - __FILE__, __LINE__); + *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); + if (NULL == *endpoint) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + (*endpoint)->ptl_proc = id[interface]; + + return OPAL_SUCCESS; +} + +static int +create_peer_and_endpoint(int interface, + opal_proc_t *proc, + ptl_process_t *phys_peer, + mca_btl_base_endpoint_t **endpoint) +{ + int ret; + size_t size; + ptl_process_t *id; + + OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, + &proc->proc_name, (void**) &id, &size); + + if (OPAL_ERR_NOT_FOUND == ret) { + OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, + "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); + return ret; + } else if (OPAL_SUCCESS != ret) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); + return ret; + } + if (size < sizeof(ptl_process_t)) { /* no available connection */ + return OPAL_ERROR; + } + if ((size % sizeof(ptl_process_t)) != 0) { + opal_output_verbose(0, opal_btl_base_framework.framework_output, + "btl/portals4: invalid format in modex"); + return OPAL_ERROR; + } + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); + + if (NULL == *endpoint) { + *endpoint = malloc(sizeof(mca_btl_base_endpoint_t)); + if (NULL == *endpoint) { return OPAL_ERR_OUT_OF_RESOURCE; } + (*endpoint)->ptl_proc.rank = proc->proc_name.vpid; } - if (0 == portals4_btl->portals_num_procs) { - need_activate = true; + phys_peer->phys.pid = id[interface].phys.pid; + phys_peer->phys.nid = id[interface].phys.nid; + opal_output_verbose(50, opal_btl_base_framework.framework_output, + "logical: global rank=%d pid=%d nid=%d\n", + proc->proc_name.vpid, phys_peer->phys.pid, phys_peer->phys.nid); + + return OPAL_SUCCESS; +} + +static int +create_maptable(struct mca_btl_portals4_module_t *portals4_btl, + size_t nprocs, + opal_proc_t **procs, + mca_btl_base_endpoint_t **endpoint) +{ + int ret; + ptl_process_t *maptable; + + maptable = malloc(sizeof(ptl_process_t) * nprocs); + if (NULL == maptable) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: malloc failed\n", + __FILE__, __LINE__); + return OPAL_ERR_OUT_OF_RESOURCE; } - for (i = 0 ; i < nprocs ; ++i) { + for (uint32_t i = 0 ; i < nprocs ; i++) { + struct opal_proc_t *curr_proc; + curr_proc = procs[i]; /* portals doesn't support heterogeneous yet... */ if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) { - continue; + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Portals 4 BTL does not support heterogeneous operations."); + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Proc %s architecture %x, mine %x.", + OPAL_NAME_PRINT(curr_proc->proc_name), + curr_proc->proc_arch, opal_proc_local_get()->proc_arch); + return OPAL_ERR_NOT_SUPPORTED; } - OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version, - &curr_proc->proc_name, (void**) &id, &size); - - if (OPAL_ERR_NOT_FOUND == ret) { - OPAL_OUTPUT_VERBOSE((30, opal_btl_base_framework.framework_output, - "btl/portals4: Portals 4 BTL not available on peer: %s", opal_strerror(ret))); - continue; - } else if (OPAL_SUCCESS != ret) { - opal_output_verbose(0, opal_btl_base_framework.framework_output, - "btl/portals4: opal_modex_recv failed: %s", opal_strerror(ret)); + ret = create_peer_and_endpoint(portals4_btl->interface_num, + curr_proc, + &maptable[i], + &endpoint[i]); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: create_maptable::create_peer_and_endpoint failed: %d\n", + __FILE__, __LINE__, ret); return ret; } - if (size < sizeof(ptl_process_t)) { /* no available connection */ - return OPAL_ERROR; - } - if ((size % sizeof(ptl_process_t)) != 0) { - opal_output_verbose(0, opal_btl_base_framework.framework_output, - "btl/portals4: invalid format in modex"); - return OPAL_ERROR; - } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "btl/portals4: %d NI(s) declared in the modex", (int) (size/sizeof(ptl_process_t)))); + } - btl_peer_data[i] = malloc(sizeof(mca_btl_base_endpoint_t)); - if (NULL == btl_peer_data[i]) return OPAL_ERROR; + ret = PtlSetMap(portals4_btl->portals_ni_h, + nprocs, + maptable); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: logical mapping failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + opal_output_verbose(90, opal_btl_base_framework.framework_output, + "logical mapping OK\n"); + free(maptable); - /* The modex may receive more than one id (this is the - normal case if there is more than one interface). Store the id of the corresponding - interface */ + return OPAL_SUCCESS; +} - if (mca_btl_portals4_component.use_logical) { - btl_peer_data[i]->ptl_proc.rank = i; - maptable[i].phys.pid = id[portals4_btl->interface_num].phys.pid; - maptable[i].phys.nid = id[portals4_btl->interface_num].phys.nid; - opal_output_verbose(50, opal_btl_base_framework.framework_output, - "logical: global rank=%d pid=%d nid=%d\n", - (int)i, maptable[i].phys.pid, maptable[i].phys.nid); - } else { - btl_peer_data[i]->ptl_proc = id[portals4_btl->interface_num]; +#define NEED_ALL_PROCS (mca_btl_portals4_component.use_logical) + +int +mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, + size_t nprocs, + struct opal_proc_t **procs, + struct mca_btl_base_endpoint_t** btl_peer_data, + opal_bitmap_t* reachable) +{ + struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base; + int ret; + size_t i; + bool need_activate = false; + + opal_output_verbose(50, opal_btl_base_framework.framework_output, + "mca_btl_portals4_add_procs: Adding %d procs (%d) for NI %d", + (int) nprocs, + (int) portals4_btl->portals_num_procs, + portals4_btl->interface_num); + + if (0 == portals4_btl->portals_num_procs) { + need_activate = true; + } + + /* + * The PML handed us a list of procs that need Portals4 + * peer info. Complete those procs here. + */ + for (i = 0 ; i < nprocs ; ++i) { + struct opal_proc_t *curr_proc = procs[i]; + + /* portals doesn't support heterogeneous yet... */ + if (opal_proc_local_get()->proc_arch != curr_proc->proc_arch) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Portals 4 BTL does not support heterogeneous operations."); + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "Proc %s architecture %x, mine %x.", + OPAL_NAME_PRINT(curr_proc->proc_name), + curr_proc->proc_arch, opal_proc_local_get()->proc_arch); + return OPAL_ERR_NOT_SUPPORTED; } - OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, - "add_procs: rank=%x nid=%x pid=%x for NI %d\n", - btl_peer_data[i]->ptl_proc.rank, - btl_peer_data[i]->ptl_proc.phys.nid, - btl_peer_data[i]->ptl_proc.phys.pid, - portals4_btl->interface_num)); + ret = create_endpoint(portals4_btl->interface_num, + curr_proc, + &btl_peer_data[i]); OPAL_THREAD_ADD32(&portals4_btl->portals_num_procs, 1); /* and here we can reach */ opal_bitmap_set_bit(reachable, i); - } - if (mca_btl_portals4_component.use_logical) { - ret = PtlSetMap(portals4_btl->portals_ni_h, nprocs, maptable); - if (OPAL_SUCCESS != ret) { - opal_output_verbose(1, opal_btl_base_framework.framework_output, - "%s:%d: logical mapping failed: %d\n", - __FILE__, __LINE__, ret); - return ret; - } - opal_output_verbose(90, opal_btl_base_framework.framework_output, - "logical mapping OK\n"); - free(maptable); + OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, + "add_procs: rank=%x nid=%x pid=%x for NI %d\n", + i, + btl_peer_data[i]->ptl_proc.phys.nid, + btl_peer_data[i]->ptl_proc.phys.pid, + portals4_btl->interface_num)); } if (need_activate && portals4_btl->portals_num_procs > 0) { + if (mca_btl_portals4_component.use_logical) { + ret = create_maptable(portals4_btl, nprocs, procs, btl_peer_data); + if (OPAL_SUCCESS != ret) { + opal_output_verbose(1, opal_btl_base_framework.framework_output, + "%s:%d: mca_btl_portals4_add_procs::create_maptable() failed: %d\n", + __FILE__, __LINE__, ret); + return ret; + } + } + ret = btl_portals4_init_interface(); if (OPAL_SUCCESS != ret) { opal_output_verbose(1, opal_btl_base_framework.framework_output, @@ -333,6 +447,7 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base, return ret; } } + return OPAL_SUCCESS; } diff --git a/opal/mca/btl/portals4/btl_portals4.h b/opal/mca/btl/portals4/btl_portals4.h index 42f39d081d5..81bc75735b1 100644 --- a/opal/mca/btl/portals4/btl_portals4.h +++ b/opal/mca/btl/portals4/btl_portals4.h @@ -48,6 +48,9 @@ struct mca_btl_portals4_component_t { struct mca_btl_portals4_module_t** btls; /* array of available BTL modules */ + /* add_procs() can get called multiple times. this prevents multiple calls to portals4_init_interface(). */ + int need_init; + /* Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false) */ int use_logical; diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index 8e4f2864f1e..2c6bb192f83 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -252,6 +252,15 @@ mca_btl_portals4_component_open(void) mca_btl_portals4_module.portals_outstanding_ops = 0; mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL; + if (1 == mca_btl_portals4_component.use_logical) { + /* + * set the MCA_BTL_FLAGS_SINGLE_ADD_PROCS flag here in the default + * module, so it gets copied into the module for each Portals4 + * interface during init(). + */ + mca_btl_portals4_module.super.btl_flags |= MCA_BTL_FLAGS_SINGLE_ADD_PROCS; + } + return OPAL_SUCCESS; } @@ -442,6 +451,8 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, opal_output_verbose(1, opal_btl_base_framework.framework_output, "The btl portals4 component has been initialized and uses %d NI(s)", mca_btl_portals4_component.num_btls); + mca_btl_portals4_component.need_init = 1; + return btls; error: