-
Notifications
You must be signed in to change notification settings - Fork 189
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes #3812. Description of changes: - Cleanup the mpi init code - GIve the python interface shared ownership of the mpi env
- Loading branch information
Showing
5 changed files
with
86 additions
and
50 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -81,7 +81,7 @@ using namespace std; | |
|
||
namespace Communication { | ||
auto const &mpi_datatype_cache = boost::mpi::detail::mpi_datatype_cache(); | ||
std::unique_ptr<boost::mpi::environment> mpi_env; | ||
std::shared_ptr<boost::mpi::environment> mpi_env; | ||
} // namespace Communication | ||
|
||
boost::mpi::communicator comm_cart; | ||
|
@@ -137,67 +137,62 @@ int mpi_check_runtime_errors(); | |
**********************************************/ | ||
|
||
#if defined(OPEN_MPI) | ||
namespace { | ||
/** Workaround for "Read -1, expected XXXXXXX, errno = 14" that sometimes | ||
* appears when CUDA is used. This is a bug in OpenMPI 2.0-2.1.2 and 3.0.0 | ||
* according to | ||
* https://www.mail-archive.com/[email protected]/msg32357.html, | ||
* so we set btl_vader_single_copy_mechanism = none. | ||
*/ | ||
static void openmpi_fix_vader() { | ||
if (OMPI_MAJOR_VERSION < 2 || OMPI_MAJOR_VERSION > 3) | ||
return; | ||
if (OMPI_MAJOR_VERSION == 2 && OMPI_MINOR_VERSION == 1 && | ||
OMPI_RELEASE_VERSION >= 3) | ||
return; | ||
if (OMPI_MAJOR_VERSION == 3 && | ||
(OMPI_MINOR_VERSION > 0 || OMPI_RELEASE_VERSION > 0)) | ||
return; | ||
|
||
std::string varname = "btl_vader_single_copy_mechanism"; | ||
std::string varval = "none"; | ||
|
||
setenv((std::string("OMPI_MCA_") + varname).c_str(), varval.c_str(), 0); | ||
void openmpi_fix_vader() { | ||
if ((OMPI_MAJOR_VERSION == 2 && OMPI_MINOR_VERSION == 1 && | ||
OMPI_RELEASE_VERSION < 3) or | ||
(OMPI_MAJOR_VERSION == 3 && OMPI_MINOR_VERSION == 0 && | ||
OMPI_RELEASE_VERSION == 0)) { | ||
setenv("OMPI_MCA_btl_vader_single_copy_mechanism", "none", 0); | ||
} | ||
} | ||
#endif | ||
|
||
void mpi_init() { | ||
#ifdef OPEN_MPI | ||
openmpi_fix_vader(); | ||
|
||
void *handle = nullptr; | ||
int mode = RTLD_NOW | RTLD_GLOBAL; | ||
/** | ||
* @brief Assure that openmpi is loaded to the global namespace. | ||
* | ||
* This was originally inspired by mpi4py | ||
* (https://github.com/mpi4py/mpi4py/blob/4e3f47b6691c8f5a038e73f84b8d43b03f16627f/src/lib-mpi/compat/openmpi.h). | ||
* It's needed because OpenMPI dlopens its submodules. These are unable to find | ||
* the top-level OpenMPI library if that was dlopened itself, i.e. when the | ||
* Python interpreter dlopens a module that is linked against OpenMPI. It's | ||
* about some weird two-level symbol namespace thing. | ||
*/ | ||
void openmpi_global_namespace() { | ||
#ifdef RTLD_NOLOAD | ||
mode |= RTLD_NOLOAD; | ||
const int mode = RTLD_NOW | RTLD_GLOBAL | RTLD_NOLOAD; | ||
#else | ||
const int mode = RTLD_NOW | RTLD_GLOBAL; | ||
#endif | ||
void *_openmpi_symbol = dlsym(RTLD_DEFAULT, "MPI_Init"); | ||
|
||
const void *_openmpi_symbol = dlsym(RTLD_DEFAULT, "MPI_Init"); | ||
if (!_openmpi_symbol) { | ||
fprintf(stderr, "%d: Aborting because unable to find OpenMPI symbol.\n", | ||
this_node); | ||
fprintf(stderr, "Aborting because unable to find OpenMPI symbol.\n"); | ||
errexit(); | ||
} | ||
|
||
Dl_info _openmpi_info; | ||
dladdr(_openmpi_symbol, &_openmpi_info); | ||
|
||
if (!handle) | ||
handle = dlopen(_openmpi_info.dli_fname, mode); | ||
const void *handle = dlopen(_openmpi_info.dli_fname, mode); | ||
|
||
if (!handle) { | ||
fprintf(stderr, | ||
"%d: Aborting because unable to load libmpi into the " | ||
"global symbol space.\n", | ||
this_node); | ||
fprintf(stderr, "Aborting because unable to load libmpi into the " | ||
"global symbol space.\n"); | ||
errexit(); | ||
} | ||
} | ||
} // namespace | ||
#endif | ||
|
||
#ifdef BOOST_MPI_HAS_NOARG_INITIALIZATION | ||
Communication::mpi_env = std::make_unique<boost::mpi::environment>(); | ||
#else | ||
int argc{}; | ||
char **argv{}; | ||
Communication::mpi_env = | ||
std::make_unique<boost::mpi::environment>(argc, argv); | ||
#endif | ||
namespace Communication { | ||
void init(std::shared_ptr<boost::mpi::environment> mpi_env) { | ||
Communication::mpi_env = std::move(mpi_env); | ||
|
||
MPI_Comm_size(MPI_COMM_WORLD, &n_nodes); | ||
node_grid = Utils::Mpi::dims_create<3>(n_nodes); | ||
|
@@ -218,6 +213,16 @@ void mpi_init() { | |
|
||
on_program_start(); | ||
} | ||
} // namespace Communication | ||
|
||
std::shared_ptr<boost::mpi::environment> mpi_init() { | ||
#ifdef OPEN_MPI | ||
openmpi_fix_vader(); | ||
openmpi_global_namespace(); | ||
#endif | ||
|
||
return std::make_shared<boost::mpi::environment>(); | ||
} | ||
|
||
/****************** PLACE/PLACE NEW PARTICLE ************/ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from libcpp.memory cimport shared_ptr | ||
from boost cimport environment | ||
|
||
cdef extern from "MpiCallbacks.hpp" namespace "Communication": | ||
cppclass MpiCallbacks: | ||
pass | ||
|
||
cdef extern from "communication.hpp": | ||
shared_ptr[environment] mpi_init() | ||
void mpi_loop() | ||
int this_node | ||
|
||
cdef extern from "communication.hpp" namespace "Communication": | ||
MpiCallbacks & mpiCallbacks() | ||
void init(shared_ptr[environment]) |