Skip to content

Commit

Permalink
osc/rdma: Use BTL am-rdma explicit interface
Browse files Browse the repository at this point in the history
Switch from using the implicit BTL interface (where the am-rdma
interface just extends missing functionality in the BTL) to the
new explicit interface (where the OSC RDMA interface is the
only maintainer of the BTL list.

With this change, alternate BTLs do not have to support
REMOTE_COMPLETION to be selected (because the AM RDMA interface
always provides remote completion when we request it, as this
patch does).  Any BTL that supports Active Messages (ie, all of
them) should be able to support the OSC RDMA required semantics,
eliminating the problem of creating windows with no servicable
BTLs.

Signed-off-by: Brian Barrett <[email protected]>
  • Loading branch information
bwbarrett committed Feb 9, 2022
1 parent 451c792 commit 4bb33a2
Show file tree
Hide file tree
Showing 6 changed files with 240 additions and 148 deletions.
29 changes: 24 additions & 5 deletions ompi/mca/osc/rdma/osc_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include "ompi/mca/osc/osc.h"
#include "ompi/mca/osc/base/base.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/btl_base_am_rdma.h"
#include "ompi/memchecker.h"
#include "ompi/op/op.h"
#include "opal/align.h"
Expand Down Expand Up @@ -255,6 +256,8 @@ struct ompi_osc_rdma_module_t {
/** lock for peer hash table/array */
opal_mutex_t peer_lock;

/* ******************* communication *********************** */

/* we currently support two modes of operation, a single
* accelerated btl (which can use memory registration and can use
* btl_flush() and one or more alternate btls, which cannot use
Expand All @@ -265,18 +268,27 @@ struct ompi_osc_rdma_module_t {

union {
struct {
struct mca_btl_base_module_t *accelerated_btl;
mca_btl_base_module_t *accelerated_btl;
};
struct {
struct mca_btl_base_module_t **alternate_btls;
mca_btl_base_am_rdma_module_t **alternate_am_rdmas;
uint8_t alternate_btl_count;
};
};

/** Only true if one BTL is in use. Memory registration is only supported when
* using a single BTL. */
/** Does the selected BTL require memory registration? This field
will be false when alternate BTLs are used, and the value
when an accelerated BTL is used depends on the registration
requirements of the underlying BTL. */
bool use_memory_registration;

size_t put_alignment;
size_t get_alignment;
size_t put_limit;
size_t get_limit;

uint32_t atomic_flags;

/** registered fragment used for locally buffered RDMA transfers */
struct ompi_osc_rdma_frag_t *rdma_frag;

Expand Down Expand Up @@ -650,8 +662,15 @@ static inline mca_btl_base_module_t *ompi_osc_rdma_selected_btl (ompi_osc_rdma_m
return module->accelerated_btl;
} else {
assert(btl_index < module->alternate_btl_count);
return module->alternate_btls[btl_index];
return module->alternate_am_rdmas[btl_index]->btl;
}
}


static inline mca_btl_base_am_rdma_module_t *ompi_osc_rdma_selected_am_rdma(ompi_osc_rdma_module_t *module, uint8_t btl_index) {
assert(!module->use_accelerated_btl);
assert(btl_index < module->alternate_btl_count);
return module->alternate_am_rdmas[btl_index];
}

#endif /* OMPI_OSC_RDMA_H */
40 changes: 18 additions & 22 deletions ompi/mca/osc/rdma/osc_rdma_accumulate.c
Original file line number Diff line number Diff line change
Expand Up @@ -164,13 +164,11 @@ static int ompi_osc_rdma_fetch_and_op_atomic (ompi_osc_rdma_sync_t *sync, const
mca_btl_base_registration_handle_t *target_handle, ompi_op_t *op, ompi_osc_rdma_request_t *req)
{
ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
int32_t atomic_flags = selected_btl->btl_atomic_flags;
int btl_op, flags;
int64_t origin;

if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module->atomic_flags)) ||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
return OMPI_ERR_NOT_SUPPORTED;
}
Expand Down Expand Up @@ -242,13 +240,11 @@ static int ompi_osc_rdma_acc_single_atomic (ompi_osc_rdma_sync_t *sync, const vo
ompi_op_t *op, ompi_osc_rdma_request_t *req)
{
ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
int32_t atomic_flags = selected_btl->btl_atomic_flags;
int btl_op, flags;
int64_t origin;

if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & atomic_flags)) ||
if ((8 != extent && !((MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->atomic_flags) && 4 == extent)) ||
(!(OMPI_DATATYPE_FLAG_DATA_INT & dt->super.flags) && !(MCA_BTL_ATOMIC_SUPPORTS_FLOAT & module->atomic_flags)) ||
!ompi_op_is_intrinsic (op) || (0 == ompi_osc_rdma_op_mapping[op->op_type])) {
return OMPI_ERR_NOT_SUPPORTED;
}
Expand Down Expand Up @@ -663,13 +659,11 @@ static inline int ompi_osc_rdma_cas_atomic (ompi_osc_rdma_sync_t *sync, const vo
bool lock_acquired)
{
ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
int32_t atomic_flags = btl->btl_atomic_flags;
const size_t size = datatype->super.size;
int64_t compare, source;
int flags, ret;

if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & atomic_flags))) {
if (8 != size && !(4 == size && (MCA_BTL_ATOMIC_SUPPORTS_32BIT & module->atomic_flags))) {
return OMPI_ERR_NOT_SUPPORTED;
}

Expand Down Expand Up @@ -717,7 +711,6 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
mca_btl_base_registration_handle_t *target_handle, bool lock_acquired)
{
ompi_osc_rdma_module_t *module = sync->module;
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
unsigned long len = datatype->super.size;
mca_btl_base_registration_handle_t *local_handle = NULL;
ompi_osc_rdma_frag_t *frag = NULL;
Expand All @@ -742,18 +735,21 @@ static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_addr,
return OMPI_SUCCESS;
}

if (btl->btl_register_mem && len > btl->btl_put_local_registration_threshold) {
do {
ret = ompi_osc_rdma_frag_alloc (module, len, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
break;
}
if (module->use_memory_registration) {
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl (module, peer->data_btl_index);
if (len > btl->btl_put_local_registration_threshold) {
do {
ret = ompi_osc_rdma_frag_alloc(module, len, &frag, &ptr);
if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
break;
}

ompi_osc_rdma_progress (module);
} while (1);
ompi_osc_rdma_progress (module);
} while (1);

memcpy (ptr, source_addr, len);
local_handle = frag->handle;
memcpy(ptr, source_addr, len);
local_handle = frag->handle;
}
}

OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "RDMA compare-and-swap initiating blocking btl put...");
Expand Down
77 changes: 55 additions & 22 deletions ompi/mca/osc/rdma/osc_rdma_btl_comm.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,17 @@ ompi_osc_rdma_btl_put(ompi_osc_rdma_module_t *module, uint8_t btl_index,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);

return btl->btl_put(btl, endpoint, local_address, remote_address,
local_handle, remote_handle, size, flags, order,
cbfunc, cbcontext, cbdata);
if (module->use_accelerated_btl) {
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
return btl->btl_put(btl, endpoint, local_address, remote_address,
local_handle, remote_handle, size, flags, order,
cbfunc, cbcontext, cbdata);
} else {
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
return am_rdma->am_btl_put(am_rdma, endpoint, local_address, remote_address,
local_handle, remote_handle, size, flags, order,
cbfunc, cbcontext, cbdata);
}
}


Expand All @@ -54,11 +60,18 @@ ompi_osc_rdma_btl_get(ompi_osc_rdma_module_t *module, uint8_t btl_index,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);

return btl->btl_get(btl, endpoint, local_address, remote_address,
local_handle, remote_handle, size, flags, order,
cbfunc, cbcontext, cbdata);
if (module->use_accelerated_btl) {
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
return btl->btl_get(btl, endpoint, local_address, remote_address,
local_handle, remote_handle, size, flags, order,
cbfunc, cbcontext, cbdata);
} else {
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
return am_rdma->am_btl_get(am_rdma, endpoint, local_address, remote_address,
local_handle, remote_handle, size, flags, order,
cbfunc, cbcontext, cbdata);
}
}


Expand All @@ -71,6 +84,9 @@ ompi_osc_rdma_btl_atomic_op(ompi_osc_rdma_module_t *module, uint8_t btl_index,
{
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);

/* the AM BTL interface does not currently support op calls */
assert(module->use_accelerated_btl);

return btl->btl_atomic_op(btl, endpoint, remote_address, remote_handle,
op, operand, flags, order,
cbfunc, cbcontext, cbdata);
Expand All @@ -87,12 +103,19 @@ ompi_osc_rdma_btl_atomic_fop(ompi_osc_rdma_module_t *module, uint8_t btl_index,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)

{
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);

return btl->btl_atomic_fop(btl, endpoint, local_address, remote_address,
local_handle, remote_handle,
op, operand, flags, order,
cbfunc, cbcontext, cbdata);
if (module->use_accelerated_btl) {
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
return btl->btl_atomic_fop(btl, endpoint, local_address, remote_address,
local_handle, remote_handle,
op, operand, flags, order,
cbfunc, cbcontext, cbdata);
} else {
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
return am_rdma->am_btl_atomic_fop(am_rdma, endpoint, local_address, remote_address,
local_handle, remote_handle,
op, operand, flags, order,
cbfunc, cbcontext, cbdata);
}
}


Expand All @@ -105,12 +128,19 @@ ompi_osc_rdma_btl_atomic_cswap(ompi_osc_rdma_module_t *module, uint8_t btl_index
uint64_t compare, uint64_t value, int flags, int order,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);

return btl->btl_atomic_cswap(btl, endpoint, local_address, remote_address,
local_handle, remote_handle,
compare, value, flags, order,
cbfunc, cbcontext, cbdata);
if (module->use_accelerated_btl) {
mca_btl_base_module_t *btl = ompi_osc_rdma_selected_btl(module, btl_index);
return btl->btl_atomic_cswap(btl, endpoint, local_address, remote_address,
local_handle, remote_handle,
compare, value, flags, order,
cbfunc, cbcontext, cbdata);
} else {
mca_btl_base_am_rdma_module_t *am_rdma = ompi_osc_rdma_selected_am_rdma(module, btl_index);
return am_rdma->am_btl_atomic_cswap(am_rdma, endpoint, local_address, remote_address,
local_handle, remote_handle,
compare, value, flags, order,
cbfunc, cbcontext, cbdata);
}
}


Expand Down Expand Up @@ -195,7 +225,10 @@ ompi_osc_rdma_btl_op(ompi_osc_rdma_module_t *module, uint8_t btl_index,
mca_btl_base_module_t *selected_btl = ompi_osc_rdma_selected_btl (module, btl_index);
int ret;

if (!(selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
/* if using the AM RDMA interface with alternate BTLs or if the
accelerated BTL does not support atomic ops, emulate the atomic
op over a fetch and atomic op */
if (!module->use_accelerated_btl || !(selected_btl->btl_flags & MCA_BTL_FLAGS_ATOMIC_OPS)) {
return ompi_osc_rdma_btl_fop (module, btl_index, endpoint, address, address_handle, op, operand, flags,
NULL, wait_for_completion, cbfunc, cbdata, cbcontext);
}
Expand Down
Loading

0 comments on commit 4bb33a2

Please sign in to comment.