From 6c4cb758da7b2d4ddaa2e9b241988bd0773eaa80 Mon Sep 17 00:00:00 2001 From: Wei Zhang Date: Sun, 26 Sep 2021 03:31:27 +0000 Subject: [PATCH] osc/rdma: use pointer to btl to replace btl_index in peer Currently, each peer keep a state_btl_index and a data_btl_index. The btl_index is used to retrive btl used for data transfer and state updating, by calling function mpi_osc_rdma_selected_btl(). This patch simplify the code by directly storing the pointer to the btl inside peer, thus bypassing the call to mpi_osc_rdma_selected_btl(). The function mpi_osc_rdma_selected_btl() is then removed. Signed-off-by: Wei Zhang --- ompi/mca/osc/rdma/osc_rdma.h | 5 ---- ompi/mca/osc/rdma/osc_rdma_accumulate.c | 22 ++++++++-------- ompi/mca/osc/rdma/osc_rdma_comm.c | 14 +++++----- ompi/mca/osc/rdma/osc_rdma_comm.h | 4 ++- ompi/mca/osc/rdma/osc_rdma_component.c | 10 ++++---- ompi/mca/osc/rdma/osc_rdma_dynamic.c | 4 +-- ompi/mca/osc/rdma/osc_rdma_lock.h | 20 +++++++-------- ompi/mca/osc/rdma/osc_rdma_peer.c | 34 +++++++++++++------------ ompi/mca/osc/rdma/osc_rdma_peer.h | 13 ++++++---- 9 files changed, 64 insertions(+), 62 deletions(-) diff --git a/ompi/mca/osc/rdma/osc_rdma.h b/ompi/mca/osc/rdma/osc_rdma.h index 19a2f577d19..f2243180073 100644 --- a/ompi/mca/osc/rdma/osc_rdma.h +++ b/ompi/mca/osc/rdma/osc_rdma.h @@ -645,11 +645,6 @@ static inline bool ompi_osc_rdma_oor (int rc) return (OPAL_SUCCESS != rc && (OPAL_ERR_OUT_OF_RESOURCE == rc || OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc)); } -__opal_attribute_always_inline__ -static inline mca_btl_base_module_t *ompi_osc_rdma_selected_btl (ompi_osc_rdma_module_t *module, uint8_t btl_index) { - return module->selected_btls[btl_index]; -} - __opal_attribute_always_inline__ static inline void ompi_osc_rdma_selected_btl_insert (ompi_osc_rdma_module_t *module, struct mca_btl_base_module_t *btl, uint8_t btl_index) { if(btl_index == module->selected_btls_size) { diff --git a/ompi/mca/osc/rdma/osc_rdma_accumulate.c b/ompi/mca/osc/rdma/osc_rdma_accumulate.c index 15f0a80714e..6824668beb1 100644 --- a/ompi/mca/osc/rdma/osc_rdma_accumulate.c +++ b/ompi/mca/osc/rdma/osc_rdma_accumulate.c @@ -156,7 +156,7 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *selected_btl = peer->data_btl; int32_t atomic_flags = selected_btl->btl_atomic_flags; int btl_op, flags; int64_t origin; @@ -176,7 +176,7 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const origin = (8 == extent) ? ((int64_t *) origin_addr)[0] : ((int32_t *) origin_addr)[0]; - return ompi_osc_rdma_btl_fop (module, peer->data_btl_index, peer->data_endpoint, target_address, target_handle, btl_op, origin, flags, + return ompi_osc_rdma_btl_fop (module, peer->data_btl, peer->data_endpoint, target_address, target_handle, btl_op, origin, flags, result_addr, true, NULL, NULL, NULL); } @@ -198,7 +198,7 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating fetch-and-op using compare-and-swap"); - ret = ompi_osc_get_data_blocking (module, peer->data_btl_index, peer->data_endpoint, address, target_handle, &old_value, 8); + ret = ompi_osc_get_data_blocking (module, peer->data_btl, peer->data_endpoint, address, target_handle, &old_value, 8); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } @@ -213,7 +213,7 @@ static int ompi_osc_rdma_fetch_and_op_cas (ompi_osc_rdma_sync_t *sync, const voi ompi_op_reduce (op, (void *) ((intptr_t) origin_addr + dt->super.true_lb), (void*)((intptr_t) &new_value + offset), 1, dt); } - ret = ompi_osc_rdma_btl_cswap (module, peer->data_btl_index, peer->data_endpoint, address, target_handle, + ret = ompi_osc_rdma_btl_cswap (module, peer->data_btl, peer->data_endpoint, address, target_handle, old_value, new_value, 0, (int64_t*)&new_value); if (OPAL_SUCCESS != ret || new_value == old_value) { break; @@ -234,7 +234,7 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo ompi_op_t *op, ompi_osc_rdma_request_t *req) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *selected_btl = peer->data_btl; int32_t atomic_flags = selected_btl->btl_atomic_flags; int btl_op, flags; int64_t origin; @@ -262,7 +262,7 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo *((int64_t *) origin_addr)); /* if we locked the peer its best to wait for completion before returning */ - return ompi_osc_rdma_btl_op (module, peer->data_btl_index, peer->data_endpoint, target_address, target_handle, btl_op, origin, + return ompi_osc_rdma_btl_op (module, peer->data_btl, peer->data_endpoint, target_address, target_handle, btl_op, origin, flags, true, NULL, NULL, NULL); } @@ -375,7 +375,7 @@ static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const v /* set up the request */ request->to_free = ptr; - ret = ompi_osc_get_data_blocking (module, peer->data_btl_index, peer->data_endpoint, + ret = ompi_osc_get_data_blocking (module, peer->data_btl, peer->data_endpoint, target_address, target_handle, ptr, len); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; @@ -661,7 +661,7 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo bool lock_acquired) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; int32_t atomic_flags = btl->btl_atomic_flags; const size_t size = datatype->super.size; int64_t compare, source; @@ -679,7 +679,7 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating compare-and-swap using %d-bit btl atomics. compare: 0x%" PRIx64 ", origin: 0x%" PRIx64, (int) size * 8, *((int64_t *) compare_addr), *((int64_t *) source_addr)); - ret = ompi_osc_rdma_btl_cswap (module, peer->data_btl_index, peer->data_endpoint, target_address, target_handle, + ret = ompi_osc_rdma_btl_cswap (module, peer->data_btl, peer->data_endpoint, target_address, target_handle, compare, source, flags, result_addr); if (OPAL_LIKELY(OMPI_SUCCESS == ret)) { ompi_osc_rdma_peer_accumulate_cleanup (module, peer, lock_acquired); @@ -715,7 +715,7 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, mca_btl_base_registration_handle_t *target_handle, bool lock_acquired) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; unsigned long len = datatype->super.size; mca_btl_base_registration_handle_t *local_handle = NULL; ompi_osc_rdma_frag_t *frag = NULL; @@ -728,7 +728,7 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr, ", sync %p", len, target_address, (void *) sync); OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "RDMA compare-and-swap initiating blocking btl get..."); - ret = ompi_osc_get_data_blocking (module, peer->data_btl_index, peer->data_endpoint, target_address, + ret = ompi_osc_get_data_blocking (module, peer->data_btl, peer->data_endpoint, target_address, target_handle, result_addr, len); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.c b/ompi/mca/osc/rdma/osc_rdma_comm.c index 449bbea0641..46a012da5dc 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.c +++ b/ompi/mca/osc/rdma/osc_rdma_comm.c @@ -54,11 +54,11 @@ static void ompi_osc_get_data_complete (struct mca_btl_base_module_t *btl, struc ((bool *) context)[0] = true; } -int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, uint8_t btl_index, +int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, + struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, void *data, size_t len) { - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, btl_index); const size_t btl_alignment_mask = ALIGNMENT_MASK(btl->btl_get_alignment); mca_btl_base_registration_handle_t *local_handle = NULL; ompi_osc_rdma_frag_t *frag = NULL; @@ -444,7 +444,7 @@ static int ompi_osc_rdma_put_real (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_pee mca_btl_base_registration_handle_t *local_handle, size_t size, mca_btl_base_rdma_completion_fn_t cb, void *context, void *cbdata) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; int ret; OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "initiating btl put of %lu bytes to remote address %" PRIx64 ", sync " @@ -481,7 +481,7 @@ int ompi_osc_rdma_put_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_peer_t * ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; mca_btl_base_registration_handle_t *local_handle = NULL; mca_btl_base_rdma_completion_fn_t cbfunc = NULL; ompi_osc_rdma_frag_t *frag = NULL; @@ -600,7 +600,7 @@ static int ompi_osc_rdma_get_contig (ompi_osc_rdma_sync_t *sync, ompi_osc_rdma_p ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; const size_t btl_alignment_mask = ALIGNMENT_MASK(btl->btl_get_alignment); mca_btl_base_registration_handle_t *local_handle = NULL; ompi_osc_rdma_frag_t *frag = NULL; @@ -736,7 +736,7 @@ static inline int ompi_osc_rdma_put_w_req (ompi_osc_rdma_sync_t *sync, const voi ompi_datatype_t *target_datatype, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; mca_btl_base_registration_handle_t *target_handle; uint64_t target_address; int ret; @@ -779,7 +779,7 @@ static inline int ompi_osc_rdma_get_w_req (ompi_osc_rdma_sync_t *sync, void *ori ompi_datatype_t *source_datatype, ompi_osc_rdma_request_t *request) { ompi_osc_rdma_module_t *module = sync->module; - mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index); + mca_btl_base_module_t *btl = peer->data_btl; mca_btl_base_registration_handle_t *source_handle; uint64_t source_address; ptrdiff_t source_span, source_lb; diff --git a/ompi/mca/osc/rdma/osc_rdma_comm.h b/ompi/mca/osc/rdma/osc_rdma_comm.h index efb305a571e..6427272a450 100644 --- a/ompi/mca/osc/rdma/osc_rdma_comm.h +++ b/ompi/mca/osc/rdma/osc_rdma_comm.h @@ -103,6 +103,7 @@ int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *or * @brief read data from a remote memory region (blocking) * * @param[in] module osc rdma module + * @param[in] btl btl module * @param[in] endpoint btl endpoint * @param[in] source_address remote address to read from * @param[in] source_handle btl registration handle for remote region (must be valid for the entire region) @@ -113,7 +114,8 @@ int ompi_osc_rdma_rget (void *origin_addr, int origin_count, ompi_datatype_t *or * data that is stored on the remote peer. The peer object does not have to be fully initialized to * work. Only the btl endpoint is needed. */ -int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, uint8_t btl_index, +int ompi_osc_get_data_blocking (ompi_osc_rdma_module_t *module, + struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t source_address, mca_btl_base_registration_handle_t *source_handle, void *data, size_t len); diff --git a/ompi/mca/osc/rdma/osc_rdma_component.c b/ompi/mca/osc/rdma/osc_rdma_component.c index 10da47f3053..80a8c7da23d 100644 --- a/ompi/mca/osc/rdma/osc_rdma_component.c +++ b/ompi/mca/osc/rdma/osc_rdma_component.c @@ -575,7 +575,7 @@ static int allocate_state_single (ompi_osc_rdma_module_t *module, void **base, s } else { /* use my endpoint handle to modify the peer's state */ my_peer->state_handle = module->state_handle; - my_peer->state_btl_index = my_peer->data_btl_index; + my_peer->state_btl = my_peer->data_btl; my_peer->state_endpoint = my_peer->data_endpoint; } @@ -845,17 +845,17 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s peer->state = (osc_rdma_counter_t) ((uintptr_t) state_region->base + state_base + module->state_size * i); if (i==0) { peer->state_endpoint = peer->data_endpoint; - peer->state_btl_index = peer->data_btl_index; + peer->state_btl = peer->data_btl; } else { peer->state_endpoint = local_leader->state_endpoint; - peer->state_btl_index = local_leader->state_btl_index; + peer->state_btl = local_leader->state_btl; } } else { assert (!module->use_memory_registration); assert (NULL != module->peer_state_array); peer->state = (osc_rdma_counter_t)module->peer_state_array[peer_rank]; peer->state_endpoint = peer->data_endpoint; - peer->state_btl_index = peer->data_btl_index; + peer->state_btl = peer->data_btl; } } @@ -867,7 +867,7 @@ static int allocate_state_shared (ompi_osc_rdma_module_t *module, void **base, s !module->use_cpu_atomics && temp[i].size && i > 0) { /* use the local leader's endpoint */ peer->data_endpoint = local_leader->data_endpoint; - peer->data_btl_index = local_leader->data_btl_index; + peer->data_btl = local_leader->data_btl; } ompi_osc_module_add_peer (module, peer); diff --git a/ompi/mca/osc/rdma/osc_rdma_dynamic.c b/ompi/mca/osc/rdma/osc_rdma_dynamic.c index 8adfa7f8159..73e80eb8dc7 100644 --- a/ompi/mca/osc/rdma/osc_rdma_dynamic.c +++ b/ompi/mca/osc/rdma/osc_rdma_dynamic.c @@ -392,7 +392,7 @@ static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module, osc_rdma_counter_t remote_value; source_address = (uint64_t)(intptr_t) peer->super.state + offsetof (ompi_osc_rdma_state_t, region_count); - ret = ompi_osc_get_data_blocking (module, peer->super.state_btl_index, peer->super.state_endpoint, + ret = ompi_osc_get_data_blocking (module, peer->super.state_btl, peer->super.state_endpoint, source_address, peer->super.state_handle, &remote_value, sizeof (remote_value)); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -433,7 +433,7 @@ static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module, OMPI_OSC_RDMA_LOCK_EXCLUSIVE); source_address = (uint64_t)(intptr_t) peer->super.state + offsetof (ompi_osc_rdma_state_t, regions); - ret = ompi_osc_get_data_blocking (module, peer->super.state_btl_index, peer->super.state_endpoint, + ret = ompi_osc_get_data_blocking (module, peer->super.state_btl, peer->super.state_endpoint, source_address, peer->super.state_handle, peer->regions, region_len); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_THREAD_UNLOCK(&module->lock); diff --git a/ompi/mca/osc/rdma/osc_rdma_lock.h b/ompi/mca/osc/rdma/osc_rdma_lock.h index 36a30a1cc0b..0ffdcde190f 100644 --- a/ompi/mca/osc/rdma/osc_rdma_lock.h +++ b/ompi/mca/osc/rdma/osc_rdma_lock.h @@ -37,14 +37,14 @@ void ompi_osc_rdma_atomic_complete (mca_btl_base_module_t *btl, struct mca_btl_b void *context, void *data, int status); __opal_attribute_always_inline__ -static inline int ompi_osc_rdma_btl_fop (ompi_osc_rdma_module_t *module, uint8_t btl_index, +static inline int ompi_osc_rdma_btl_fop (ompi_osc_rdma_module_t *module, + struct mca_btl_base_module_t *selected_btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t address, mca_btl_base_registration_handle_t *address_handle, int op, int64_t operand, int flags, int64_t *result, const bool wait_for_completion, ompi_osc_rdma_pending_op_cb_fn_t cbfunc, void *cbdata, void *cbcontext) { ompi_osc_rdma_pending_op_t *pending_op; - mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, btl_index); int ret = OPAL_ERROR; pending_op = OBJ_NEW(ompi_osc_rdma_pending_op_t); @@ -110,23 +110,23 @@ static inline int ompi_osc_rdma_lock_btl_fop (ompi_osc_rdma_module_t *module, om int op, ompi_osc_rdma_lock_t operand, ompi_osc_rdma_lock_t *result, const bool wait_for_completion) { - return ompi_osc_rdma_btl_fop (module, peer->state_btl_index, peer->state_endpoint, address, peer->state_handle, op, + return ompi_osc_rdma_btl_fop (module, peer->state_btl, peer->state_endpoint, address, peer->state_handle, op, operand, 0, result, wait_for_completion, NULL, NULL, NULL); } __opal_attribute_always_inline__ -static inline int ompi_osc_rdma_btl_op (ompi_osc_rdma_module_t *module, uint8_t btl_index, +static inline int ompi_osc_rdma_btl_op (ompi_osc_rdma_module_t *module, + struct mca_btl_base_module_t *selected_btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t address, mca_btl_base_registration_handle_t *address_handle, int op, int64_t operand, int flags, const bool wait_for_completion, ompi_osc_rdma_pending_op_cb_fn_t cbfunc, void *cbdata, void *cbcontext) { ompi_osc_rdma_pending_op_t *pending_op; - mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, btl_index); int ret; if (!(selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) { - return ompi_osc_rdma_btl_fop (module, btl_index, endpoint, address, address_handle, op, operand, flags, + return ompi_osc_rdma_btl_fop (module, selected_btl, endpoint, address, address_handle, op, operand, flags, NULL, wait_for_completion, cbfunc, cbdata, cbcontext); } @@ -181,18 +181,18 @@ __opal_attribute_always_inline__ static inline int ompi_osc_rdma_lock_btl_op (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, int op, ompi_osc_rdma_lock_t operand, const bool wait_for_completion) { - return ompi_osc_rdma_btl_op (module, peer->state_btl_index, peer->state_endpoint, address, peer->state_handle, op, + return ompi_osc_rdma_btl_op (module, peer->state_btl, peer->state_endpoint, address, peer->state_handle, op, operand, 0, wait_for_completion, NULL, NULL, NULL); } __opal_attribute_always_inline__ -static inline int ompi_osc_rdma_btl_cswap (ompi_osc_rdma_module_t *module, uint8_t btl_index, +static inline int ompi_osc_rdma_btl_cswap (ompi_osc_rdma_module_t *module, + struct mca_btl_base_module_t *selected_btl, struct mca_btl_base_endpoint_t *endpoint, uint64_t address, mca_btl_base_registration_handle_t *address_handle, int64_t compare, int64_t value, int flags, int64_t *result) { ompi_osc_rdma_pending_op_t *pending_op; - mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, btl_index); int ret; pending_op = OBJ_NEW(ompi_osc_rdma_pending_op_t); @@ -244,7 +244,7 @@ __opal_attribute_always_inline__ static inline int ompi_osc_rdma_lock_btl_cswap (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t address, ompi_osc_rdma_lock_t compare, ompi_osc_rdma_lock_t value, ompi_osc_rdma_lock_t *result) { - return ompi_osc_rdma_btl_cswap (module, peer->state_btl_index, peer->state_endpoint, address, peer->state_handle, compare, value, + return ompi_osc_rdma_btl_cswap (module, peer->state_btl, peer->state_endpoint, address, peer->state_handle, compare, value, 0, result); } diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.c b/ompi/mca/osc/rdma/osc_rdma_peer.c index 41e90072c0c..30592d873d2 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.c +++ b/ompi/mca/osc/rdma/osc_rdma_peer.c @@ -30,13 +30,15 @@ * * @param[in] module osc rdma module * @param[in] peer_id process rank in the module communicator - * @param[in] module_btl_index btl index to use + * @param[in] btl_out btl to be used + * @param[in] endpoint endpoint to be used * - * @returns NULL on error - * @returns btl endpoint on success + * @returns OMPI_SUCCESS on success + * @returns ompi error code on error */ static int ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *module, - int peer_id, uint8_t *btl_index_out, + int peer_id, + struct mca_btl_base_module_t **btl_out, struct mca_btl_base_endpoint_t **endpoint) { ompi_proc_t *proc = ompi_comm_peer_lookup (module->comm, peer_id); @@ -51,7 +53,7 @@ static int ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *modul for (int module_btl_index = 0 ; module_btl_index < module->btls_in_use ; ++module_btl_index) { for (int btl_index = 0 ; btl_index < num_btls ; ++btl_index) { if (bml_endpoint->btl_rdma.bml_btls[btl_index].btl == module->selected_btls[module_btl_index]) { - *btl_index_out = module_btl_index; + *btl_out = bml_endpoint->btl_rdma.bml_btls[btl_index].btl; *endpoint = bml_endpoint->btl_rdma.bml_btls[btl_index].btl_endpoint; return OMPI_SUCCESS; } @@ -64,7 +66,7 @@ static int ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *modul for (int module_btl_index = 0 ; module_btl_index < module->btls_in_use ; ++module_btl_index) { for (int btl_index = 0 ; btl_index < num_btls ; ++btl_index) { if (bml_endpoint->btl_eager.bml_btls[btl_index].btl == module->selected_btls[module_btl_index]) { - *btl_index_out = module_btl_index; + *btl_out = bml_endpoint->btl_eager.bml_btls[btl_index].btl; *endpoint = bml_endpoint->btl_eager.bml_btls[btl_index].btl_endpoint; return OMPI_SUCCESS; } @@ -77,13 +79,13 @@ static int ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *modul int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, ompi_osc_rdma_peer_t **peer_out) { struct mca_btl_base_endpoint_t *endpoint; + struct mca_btl_base_module_t *btl; ompi_osc_rdma_peer_t *peer; - uint8_t module_btl_index = UINT8_MAX; *peer_out = NULL; /* find a btl/endpoint to use for this peer */ - int ret = ompi_osc_rdma_peer_btl_endpoint (module, peer_id, &module_btl_index, &endpoint); + int ret = ompi_osc_rdma_peer_btl_endpoint (module, peer_id, &btl, &endpoint); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret && !(module->selected_btls[0]->btl_atomic_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) && (peer_id != ompi_comm_rank (module->comm)))) { @@ -100,7 +102,7 @@ int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, } peer->data_endpoint = endpoint; - peer->data_btl_index = module_btl_index; + peer->data_btl = btl; peer->rank = peer_id; *peer_out = peer; @@ -128,7 +130,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd ompi_osc_rdma_rank_data_t rank_data; int registration_handle_size = 0; int node_id, node_rank, array_index; - uint8_t array_btl_index; + struct mca_btl_base_module_t *array_btl; int ret, disp_unit; char *peer_data; @@ -152,7 +154,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd array_pointer = array_peer_data->base + array_index * sizeof (rank_data); /* lookup the btl endpoint needed to retrieve the mapping */ - ret = ompi_osc_rdma_peer_btl_endpoint (module, node_rank, &array_btl_index, &array_endpoint); + ret = ompi_osc_rdma_peer_btl_endpoint (module, node_rank, &array_btl, &array_endpoint); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OMPI_ERR_UNREACH; } @@ -160,7 +162,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "reading region data for %d from rank: %d, index: %d, pointer: 0x%" PRIx64 ", size: %lu", peer->rank, node_rank, array_index, array_pointer, sizeof (rank_data)); - ret = ompi_osc_get_data_blocking (module, array_btl_index, array_endpoint, array_pointer, + ret = ompi_osc_get_data_blocking (module, array_btl, array_endpoint, array_pointer, (mca_btl_base_registration_handle_t *) array_peer_data->btl_handle_data, &rank_data, sizeof (rank_data)); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -179,7 +181,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd } ret = ompi_osc_rdma_peer_btl_endpoint (module, NODE_ID_TO_RANK(module, node_peer_data, rank_data.node_id), - &peer->state_btl_index, &peer->state_endpoint); + &peer->state_btl, &peer->state_endpoint); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return OPAL_ERR_UNREACH; } @@ -194,7 +196,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd * same endpoint were used to transfer data and * update state */ - peer->state_btl_index = peer->data_btl_index; + peer->state_btl = peer->data_btl; peer->state_endpoint = peer->data_endpoint; } @@ -215,7 +217,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd peer_data = alloca (peer_data_size); /* read window data from the end of the target's state structure */ - ret = ompi_osc_get_data_blocking (module, peer->state_btl_index, peer->state_endpoint, + ret = ompi_osc_get_data_blocking (module, peer->state_btl, peer->state_endpoint, peer->state + peer_data_offset, peer->state_handle, peer_data, peer_data_size); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { @@ -264,7 +266,7 @@ static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rd if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) { ex_peer->super.super.data_endpoint = ex_peer->super.super.state_endpoint; - ex_peer->super.super.data_btl_index = ex_peer->super.super.state_btl_index; + ex_peer->super.super.data_btl = ex_peer->super.super.state_btl; } } diff --git a/ompi/mca/osc/rdma/osc_rdma_peer.h b/ompi/mca/osc/rdma/osc_rdma_peer.h index ef8f8e0605c..86a717babf6 100644 --- a/ompi/mca/osc/rdma/osc_rdma_peer.h +++ b/ompi/mca/osc/rdma/osc_rdma_peer.h @@ -45,11 +45,14 @@ struct ompi_osc_rdma_peer_t { /** peer flags */ opal_atomic_int32_t flags; - /** index into BTL array */ - uint8_t data_btl_index; - - /** index into BTL array */ - uint8_t state_btl_index; + /** btl used for rdma */ + struct mca_btl_base_module_t *data_btl; + + /** btl used for reading/modifying peer state. + * When the local leader optimization is used, + * peer state are read/modified through a different + * btl then the one used for rdma data */ + struct mca_btl_base_module_t *state_btl; }; typedef struct ompi_osc_rdma_peer_t ompi_osc_rdma_peer_t;