Skip to content

Commit

Permalink
ADAPT: fallback to previous coll module on non-commutative operations (
Browse files Browse the repository at this point in the history
…open-mpi#30)

Signed-off-by: Joseph Schuchart <[email protected]>
  • Loading branch information
devreal authored Sep 8, 2020
1 parent 5c271b9 commit e7f0d53
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 0 deletions.
33 changes: 33 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,44 @@ typedef struct mca_coll_adapt_component_t {

} mca_coll_adapt_component_t;

/*
* Structure used to store what is necessary for the collective operations
* routines in case of fallback.
*/
typedef struct mca_coll_adapt_collective_fallback_s {
union {
mca_coll_base_module_reduce_fn_t reduce;
mca_coll_base_module_ireduce_fn_t ireduce;
} previous_routine;
mca_coll_base_module_t *previous_module;
} mca_coll_adapt_collective_fallback_t;


typedef enum mca_coll_adapt_colltype {
ADAPT_REDUCE = 0,
ADAPT_IREDUCE = 1,
ADAPT_COLLCOUNT
} mca_coll_adapt_colltype_t;

/*
* Some defines to stick to the naming used in the other components in terms of
* fallback routines
*/
#define previous_reduce previous_routines[ADAPT_REDUCE].previous_routine.reduce
#define previous_ireduce previous_routines[ADAPT_IREDUCE].previous_routine.ireduce

#define previous_reduce_module previous_routines[ADAPT_REDUCE].previous_module
#define previous_ireduce_module previous_routines[ADAPT_IREDUCE].previous_module


/* Coll adapt module per communicator*/
struct mca_coll_adapt_module_t {
/* Base module */
mca_coll_base_module_t super;

/* To be able to fallback when the cases are not supported */
struct mca_coll_adapt_collective_fallback_s previous_routines[ADAPT_COLLCOUNT];

/* Whether this module has been lazily initialized or not yet */
bool adapt_enabled;
};
Expand Down
12 changes: 12 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt_ireduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,18 @@ int ompi_coll_adapt_ireduce(const void *sbuf, void *rbuf, int count, struct ompi
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
ompi_request_t ** request, mca_coll_base_module_t * module)
{

/* Fall-back if operation is commutative */
if (!ompi_op_is_commute(op)){
mca_coll_adapt_module_t *adapt_module = (mca_coll_adapt_module_t *) module;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"ADAPT cannot handle reduce with this (non-commutative) operation. It needs to fall back on another component\n"));
return adapt_module->previous_ireduce(sbuf, rbuf, count, dtype, op, root,
comm, request,
adapt_module->previous_reduce_module);
}


OPAL_OUTPUT_VERBOSE((10, mca_coll_adapt_component.adapt_output,
"ireduce root %d, algorithm %d, coll_adapt_ireduce_segment_size %zu, coll_adapt_ireduce_max_send_requests %d, coll_adapt_ireduce_max_recv_requests %d\n",
root, mca_coll_adapt_component.adapt_ireduce_algorithm,
Expand Down
25 changes: 25 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,37 @@ OBJ_CLASS_INSTANCE(mca_coll_adapt_module_t,
adapt_module_construct,
adapt_module_destruct);

/*
* In this macro, the following variables are supposed to have been declared
* in the caller:
* . ompi_communicator_t *comm
* . mca_coll_adapt_module_t *adapt_module
*/
#define ADAPT_SAVE_PREV_COLL_API(__api) \
do { \
adapt_module->previous_ ## __api = comm->c_coll->coll_ ## __api; \
adapt_module->previous_ ## __api ## _module = comm->c_coll->coll_ ## __api ## _module; \
if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) { \
opal_output_verbose(1, ompi_coll_base_framework.framework_output, \
"(%d/%s): no underlying " # __api"; disqualifying myself", \
comm->c_contextid, comm->c_name); \
return OMPI_ERROR; \
} \
OBJ_RETAIN(adapt_module->previous_ ## __api ## _module); \
} while(0)


/*
* Init module on the communicator
*/
static int adapt_module_enable(mca_coll_base_module_t * module,
struct ompi_communicator_t *comm)
{
mca_coll_adapt_module_t * adapt_module = (mca_coll_adapt_module_t*) module;

ADAPT_SAVE_PREV_COLL_API(reduce);
ADAPT_SAVE_PREV_COLL_API(ireduce);

return OMPI_SUCCESS;
}

Expand Down
12 changes: 12 additions & 0 deletions ompi/mca/coll/adapt/coll_adapt_reduce.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
* $HEADER$
*/


#include "ompi/op/op.h"
#include "coll_adapt.h"
#include "coll_adapt_algorithms.h"

Expand All @@ -17,6 +19,16 @@ int ompi_coll_adapt_reduce(const void *sbuf, void *rbuf, int count, struct ompi_
struct ompi_op_t *op, int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t * module)
{
/* Fall-back if operation is commutative */
if (!ompi_op_is_commute(op)){
mca_coll_adapt_module_t *adapt_module = (mca_coll_adapt_module_t *) module;
OPAL_OUTPUT_VERBOSE((30, mca_coll_adapt_component.adapt_output,
"ADAPT cannot handle reduce with this (commutative) operation. It needs to fall back on another component\n"));
return adapt_module->previous_reduce(sbuf, rbuf, count, dtype, op, root,
comm,
adapt_module->previous_reduce_module);
}

ompi_request_t *request = NULL;
int err = ompi_coll_adapt_ireduce(sbuf, rbuf, count, dtype, op, root, comm, &request, module);
if( MPI_SUCCESS != err ) {
Expand Down

0 comments on commit e7f0d53

Please sign in to comment.