diff --git a/opal/mca/common/cuda/common_cuda.c b/opal/mca/common/cuda/common_cuda.c index 56183043c26..14169678bb8 100644 --- a/opal/mca/common/cuda/common_cuda.c +++ b/opal/mca/common/cuda/common_cuda.c @@ -232,7 +232,6 @@ static void cuda_dump_memhandle(int, void *, char *) __opal_attribute_unused__ ; int mca_common_cuda_stage_one_init(void) { int retval, i, j; - int advise_support = 1; char *cudalibs[] = {"libcuda.so.1", "libcuda.dylib", NULL}; char *searchpaths[] = {"", "/usr/lib64", NULL}; char **errmsgs = NULL; @@ -337,12 +336,7 @@ int mca_common_cuda_stage_one_init(void) } if (!OPAL_HAVE_DL_SUPPORT) { - if (OPAL_ERR_NOT_SUPPORTED == retval) { - opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true); - } else { - opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true, - "opal_lt_dlinit", retval, opal_lt_dlerror()); - } + opal_show_help("help-mpi-common-cuda.txt", "dlopen disabled", true); return 1; } @@ -350,94 +344,55 @@ int mca_common_cuda_stage_one_init(void) * that works. If it does, all is good. If not, print out all * the messages about why things failed. This code was careful * to try and save away all error messages if the loading ultimately - * failed to help with debugging. + * failed to help with debugging. + * * NOTE: On the first loop we just utilize the default loading * paths from the system. For the second loop, set /usr/lib64 to * the search path and try again. This is done to handle the case - * where we have both 32 and 64 bit libcuda.so libraries installed. - * Even when running in 64-bit mode, the /usr/lib directory - * is searched first and we may find a 32-bit libcuda.so.1 library. - * Loading of this library will fail as libtool does not handle having - * the wrong ABI in the search path (unlike ld or ld.so). Note that - * we only set this search path after the original search. This is - * so that LD_LIBRARY_PATH and run path settings are respected. - * Setting this search path overrides them (rather then being appended). */ - if (advise_support) { - if (0 != (retval = opal_lt_dladvise_global(&advise))) { - opal_show_help("help-mpi-common-cuda.txt", "unknown ltdl error", true, - "opal_lt_dladvise_global", retval, opal_lt_dlerror()); - opal_lt_dladvise_destroy(&advise); - return 1; + * where we have both 32 and 64 bit libcuda.so libraries + * installed. Even when running in 64-bit mode, the /usr/lib + * directory is searched first and we may find a 32-bit + * libcuda.so.1 library. Loading of this library will fail as the + * OPAL DL framework does not handle having the wrong ABI in the + * search path (unlike ld or ld.so). Note that we only set this + * search path after the original search. This is so that + * LD_LIBRARY_PATH and run path settings are respected. Setting + * this search path overrides them (rather then being + * appended). */ + j = 0; + while (searchpaths[j] != NULL) { + /* Set explicit search path if entry is not empty string */ + if (strcmp("", searchpaths[j])) { + opal_lt_dlsetsearchpath(searchpaths[j]); } - j = 0; - while (searchpaths[j] != NULL) { - /* Set explicit search path if entry is not empty string */ - if (strcmp("", searchpaths[j])) { - opal_lt_dlsetsearchpath(searchpaths[j]); - } - i = 0; - while (cudalibs[i] != NULL) { - const char *str; - libcuda_handle = opal_lt_dlopenadvise(cudalibs[i], advise); - if (NULL == libcuda_handle) { - str = opal_lt_dlerror(); - if (NULL != str) { - opal_argv_append(&errsize, &errmsgs, str); - } else { - opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL."); - } - opal_output_verbose(10, mca_common_cuda_output, - "CUDA: Library open error: %s", - errmsgs[errsize-1]); + i = 0; + while (cudalibs[i] != NULL) { + const char *str; + retval = opal_dl_open(cudalibs[i], true, false, + &libcuda_handle, &str); + if (OPAL_SUCCESS != retval || NULL == libcuda_handle) { + if (NULL != str) { + opal_argv_append(&errsize, &errmsgs, str); } else { - opal_output_verbose(10, mca_common_cuda_output, - "CUDA: Library successfully opened %s", - cudalibs[i]); - stage_one_init_passed = true; - break; + opal_argv_append(&errsize, &errmsgs, + "opal_dl_open() returned NULL."); } - i++; + opal_output_verbose(10, mca_common_cuda_output, + "CUDA: Library open error: %s", + errmsgs[errsize-1]); + } else { + opal_output_verbose(10, mca_common_cuda_output, + "CUDA: Library successfully opened %s", + cudalibs[i]); + stage_one_init_passed = true; + break; } - if (true == stage_one_init_passed) break; /* Break out of outer loop */ - j++; + i++; } - opal_lt_dladvise_destroy(&advise); - } else { - j = 0; - /* No lt_dladvise support. This should rarely happen. */ - while (searchpaths[j] != NULL) { - /* Set explicit search path if entry is not empty string */ - if (strcmp("", searchpaths[j])) { - opal_lt_dlsetsearchpath(searchpaths[j]); - } - i = 0; - while (cudalibs[i] != NULL) { - const char *str; - libcuda_handle = opal_lt_dlopen(cudalibs[i]); - if (NULL == libcuda_handle) { - str = opal_lt_dlerror(); - if (NULL != str) { - opal_argv_append(&errsize, &errmsgs, str); - } else { - opal_argv_append(&errsize, &errmsgs, "lt_dlerror() returned NULL."); - } - - opal_output_verbose(10, mca_common_cuda_output, - "CUDA: Library open error: %s", - errmsgs[errsize-1]); - - } else { - opal_output_verbose(10, mca_common_cuda_output, - "CUDA: Library successfully opened %s", - cudalibs[i]); - stage_one_init_passed = true; - break; - } - i++; - } - if (true == stage_one_init_passed) break; /* Break out of outer loop */ - j++; + if (true == stage_one_init_passed) { + break; /* Break out of outer loop */ } + j++; } if (true != stage_one_init_passed) { diff --git a/opal/mca/common/cuda/help-mpi-common-cuda.txt b/opal/mca/common/cuda/help-mpi-common-cuda.txt index f91826dee40..b4b5b96b0dc 100644 --- a/opal/mca/common/cuda/help-mpi-common-cuda.txt +++ b/opal/mca/common/cuda/help-mpi-common-cuda.txt @@ -1,6 +1,7 @@ # -*- text -*- # # Copyright (c) 2011-2014 NVIDIA. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -158,14 +159,6 @@ Open MPI was compiled without dynamic library support (e.g., with the If you need CUDA support, reconfigure Open MPI with dynamic library support enabled. # -[unknown ltdl error] -While attempting to load the supporting libcuda.so library, an error -occurred. This really should rarely happen. Please notify the Open -MPI developers. - Function: %s - Return Value: %d - Error string: %s -# [dlopen failed] The library attempted to open the following supporting CUDA libraries, but each of them failed. CUDA-aware support is disabled.