Skip to content

Commit

Permalink
squashme -- try again with cuda support
Browse files Browse the repository at this point in the history
  • Loading branch information
jsquyres committed Feb 23, 2015
1 parent eb79693 commit 0915ae2
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 94 deletions.
127 changes: 41 additions & 86 deletions opal/mca/common/cuda/common_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ;
int mca_common_cuda_stage_one_init(void)
{
int retval, i, j;
int advise_support = 1;
char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL};
char *searchpaths[] = {"", "/usr/lib64", NULL};
char **errmsgs = NULL;
Expand Down Expand Up @@ -337,107 +336,63 @@ int mca_common_cuda_stage_one_init(void)
}

if (!OPAL_HAVE_DL_SUPPORT) {
if (OPAL_ERR_NOT_SUPPORTED == retval) {
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
} else {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dlinit", retval, opal_lt_dlerror());
}
opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true);
return 1;
}

/* Now walk through all the potential names libcuda and find one
* that works. If it does, all is good. If not, print out all
* the messages about why things failed. This code was careful
* to try and save away all error messages if the loading ultimately
* failed to help with debugging.
* failed to help with debugging.
*
* NOTE: On the first loop we just utilize the default loading
* paths from the system. For the second loop, set /usr/lib64 to
* the search path and try again. This is done to handle the case
* where we have both 32 and 64 bit libcuda.so libraries installed.
* Even when running in 64-bit mode, the /usr/lib directory
* is searched first and we may find a 32-bit libcuda.so.1 library.
* Loading of this library will fail as libtool does not handle having
* the wrong ABI in the search path (unlike ld or ld.so). Note that
* we only set this search path after the original search. This is
* so that LD_LIBRARY_PATH and run path settings are respected.
* Setting this search path overrides them (rather then being appended). */
if (advise_support) {
if (0 != (retval = opal_lt_dladvise_global(&advise))) {
opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true,
"opal_lt_dladvise_global", retval, opal_lt_dlerror());
opal_lt_dladvise_destroy(&advise);
return 1;
* where we have both 32 and 64 bit libcuda.so libraries
* installed. Even when running in 64-bit mode, the /usr/lib
* directory is searched first and we may find a 32-bit
* libcuda.so.1 library. Loading of this library will fail as the
* OPAL DL framework does not handle having the wrong ABI in the
* search path (unlike ld or ld.so). Note that we only set this
* search path after the original search. This is so that
* LD_LIBRARY_PATH and run path settings are respected. Setting
* this search path overrides them (rather then being
* appended). */
j = 0;
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
j = 0;
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise);
if (NULL == libcuda_handle) {
str = opal_lt_dlerror();
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
}
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
retval = opal_dl_open(cudalibs[i], true, false,
&libcuda_handle, &str);
if (OPAL_SUCCESS != retval || NULL == libcuda_handle) {
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
stage_one_init_passed = true;
break;
opal_argv_append(&errsize, &errmsgs,
"opal_dl_open() returned NULL.");
}
i++;
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);
} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
stage_one_init_passed = true;
break;
}
if (true == stage_one_init_passed) break; /* Break out of outer loop */
j++;
i++;
}
opal_lt_dladvise_destroy(&advise);
} else {
j = 0;
/* No lt_dladvise support. This should rarely happen. */
while (searchpaths[j] != NULL) {
/* Set explicit search path if entry is not empty string */
if (strcmp("", searchpaths[j])) {
opal_lt_dlsetsearchpath(searchpaths[j]);
}
i = 0;
while (cudalibs[i] != NULL) {
const char *str;
libcuda_handle = opal_lt_dlopen(cudalibs[i]);
if (NULL == libcuda_handle) {
str = opal_lt_dlerror();
if (NULL != str) {
opal_argv_append(&errsize, &errmsgs, str);
} else {
opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL.");
}

opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library open error: %s",
errmsgs[errsize-1]);

} else {
opal_output_verbose(10, mca_common_cuda_output,
"CUDA: Library successfully opened %s",
cudalibs[i]);
stage_one_init_passed = true;
break;
}
i++;
}
if (true == stage_one_init_passed) break; /* Break out of outer loop */
j++;
if (true == stage_one_init_passed) {
break; /* Break out of outer loop */
}
j++;
}

if (true != stage_one_init_passed) {
Expand Down
9 changes: 1 addition & 8 deletions opal/mca/common/cuda/help-mpi-common-cuda.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- text -*-
#
# Copyright (c) 2011-2014 NVIDIA. All rights reserved.
# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down Expand Up @@ -158,14 +159,6 @@ Open MPI was compiled without dynamic library support (e.g., with the

If you need CUDA support, reconfigure Open MPI with dynamic library support enabled.
#
[unknown ltdl error]
While attempting to load the supporting libcuda.so library, an error
occurred. This really should rarely happen. Please notify the Open
MPI developers.
Function: %s
Return Value: %d
Error string: %s
#
[dlopen failed]
The library attempted to open the following supporting CUDA libraries,
but each of them failed. CUDA-aware support is disabled.
Expand Down

0 comments on commit 0915ae2

Please sign in to comment.