Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

"add nccl cmake enforce" #4818

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(external/eigen) # download eigen3
include(external/pybind11) # download pybind11
include(external/nccl) # download nccl1

include(cudnn) # set cudnn libraries, must before configure
include(configure) # add paddle env configuration
Expand Down
67 changes: 67 additions & 0 deletions cmake/external/nccl.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) 2017 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

IF(NOT ${WITH_GPU})
MESSAGE(WARNING
"WITH NCCL is not supported without GPU."
"Force WITH_GPU=ON")
return()
ENDIF(NOT ${WITH_GPU})

INCLUDE(ExternalProject)


SET(NCCL_SOURCES_DIR ${THIRD_PARTY_PATH}/nccl)
SET(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
SET(NCCL_INCLUDE_DIR "${NCCL_INSTALL_DIR}/include" CACHE PATH "nccl include directory." FORCE)

INCLUDE_DIRECTORIES(${NCCL_SOURCES_DIR}/src/extern_nccl/build/include)
INCLUDE_DIRECTORIES(${NCCL_INCLUDE_DIR})

IF(WIN32)
SET(NCCL_LIBRARIES "${NCCL_INSTALL_DIR}/lib/libnccl.lib" CACHE FILEPATH "nccl library." FORCE)
ENDIF(WIN32)
SET(NCCL_LIBRARIES "${NCCL_INSTALL_DIR}/lib/libnccl.a" CACHE FILEPATH "nccl library." FORCE)


# currently, nccl2 is not support in docker. So we use nccl1.
# the progress of nccl2 can be tracked in https://gitlab.com/nvidia/cuda/issues/10

ExternalProject_Add(
extern_nccl
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${NCCL_SOURCES_DIR}
DOWNLOAD_DIR ${NCCL_SOURCES_DIR}
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
GIT_TAG "v1.3.4-1"
CONFIGURE_COMMAND ""
CMAKE_COMMAND ""
UPDATE_COMMAND ""
BUILD_IN_SOURCE 1
BUILD_COMMAND make -j 8
INSTALL_COMMAND make install
INSTALL_DIR ${NCCL_INSTALL_DIR}
TEST_COMMAND ""
)

MESSAGE(STATUS "nccl include: ${NCCL_INCLUDE_DIR}")
MESSAGE(STATUS "nccl source: ${NCCL_SOURCES_DIR}")

MESSAGE(STATUS "nccl library: ${NCCL_LIBRARIES}")

add_library(nccl INTERFACE)

add_dependencies(nccl extern_nccl)

LIST(APPEND external_project_dependencies nccl)
2 changes: 1 addition & 1 deletion paddle/platform/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cc_library(cpu_info SRCS cpu_info.cc DEPS gflags glog)
cc_test(cpu_info_test SRCS cpu_info_test.cc DEPS cpu_info)

nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog)
nv_library(gpu_info SRCS gpu_info.cc DEPS gflags glog nccl)

cc_library(place SRCS place.cc)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
Expand Down
9 changes: 7 additions & 2 deletions paddle/platform/dynload/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader)
IF(WITH_GPU)
nv_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags nccl)
ELSE()
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
ENDIF()

nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc DEPS dynamic_loader)
13 changes: 13 additions & 0 deletions paddle/platform/dynload/dynamic_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,11 @@ DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so.");

DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so.");

DEFINE_string(nccl_dir, "",
"Specify path for loading nccl library, such as libcublas, "
"libcurand. For instance, /usr/local/cuda/lib64. If default, "
"dlopen will search cuda from LD_LIBRARY_PATH");

namespace paddle {
namespace platform {
namespace dynload {
Expand Down Expand Up @@ -157,6 +162,14 @@ void GetLapackDsoHandle(void** dso_handle) {
#endif
}

void GetNcclDsoHandle(void** dso_handle) {
#if defined(__APPLE__) || defined(__OSX__)
GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib", dso_handle);
#else
GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so", dso_handle);
#endif
}

} // namespace dynload
} // namespace platform
} // namespace paddle
8 changes: 8 additions & 0 deletions paddle/platform/dynload/dynamic_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,14 @@ void GetWarpCTCDsoHandle(void** dso_handle);
*/
void GetLapackDsoHandle(void** dso_handle);

/**
* @brief load the DSO of NVIDIA nccl
*
* @param **dso_handle dso handler
*
*/
void GetNcclDsoHandle(void** dso_handle);

} // namespace dynload
} // namespace platform
} // namespace paddle
30 changes: 30 additions & 0 deletions paddle/platform/dynload/nccl.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/platform/dynload/nccl.h"

namespace paddle {
namespace platform {
namespace dynload {

std::once_flag nccl_dso_flag;
void *nccl_dso_handle;

#define DEFINE_WRAP(__name) DynLoad__##__name __name

NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);

} // namespace dynload
} // namespace platform
} // namespace paddle
72 changes: 72 additions & 0 deletions paddle/platform/dynload/nccl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#pragma once

#include <dlfcn.h>
#include <nccl.h>
#include <mutex>
#include "paddle/platform/dynload/dynamic_loader.h"

namespace paddle {
namespace platform {
namespace dynload {

extern std::once_flag nccl_dso_flag;
extern void* nccl_dso_handle;

#ifdef PADDLE_USE_DSO
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
auto operator()(Args... args) -> decltype(__name(args...)) { \
using nccl_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(nccl_dso_flag, \
paddle::platform::dynload::GetNcclDsoHandle, \
&nccl_dso_handle); \
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
} \
}; \
extern DynLoad__##__name __name
#else
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
struct DynLoad__##__name { \
template <typename... Args> \
ncclResult_t operator()(Args... args) { \
return __name(args...); \
} \
}; \
extern DynLoad__##__name __name
#endif

#define NCCL_RAND_ROUTINE_EACH(__macro) \
__macro(ncclCommInitAll); \
__macro(ncclGetUniqueId); \
__macro(ncclCommInitRank); \
__macro(ncclCommDestroy); \
__macro(ncclCommCount); \
__macro(ncclCommCuDevice); \
__macro(ncclCommUserRank); \
__macro(ncclAllReduce); \
__macro(ncclBcast); \
__macro(ncclAllGather); \
__macro(ncclReduce); \
__macro(ncclGetErrorString);

NCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)

} // namespace dynload
} // namespace platform
} // namespace paddle
14 changes: 14 additions & 0 deletions paddle/platform/enforce.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ limitations under the License. */
#include <cxxabi.h> // for __cxa_demangle
#endif

#include <glog/logging.h>

#ifdef PADDLE_WITH_CUDA

#include "paddle/platform/dynload/cublas.h"
#include "paddle/platform/dynload/cudnn.h"
#include "paddle/platform/dynload/curand.h"
#include "paddle/platform/dynload/nccl.h"

#include <cublas_v2.h>
#include <cudnn.h>
Expand Down Expand Up @@ -172,6 +175,17 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
throw std::runtime_error(err + string::Sprintf(args...));
}

template <typename... Args>
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
ncclResult_t stat, const Args&... args) {
if (stat == ncclSuccess) {
return;
} else {
throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) +
string::Sprintf(args...));
}
}

#endif // PADDLE_ONLY_CPU

template <typename T>
Expand Down
1 change: 1 addition & 0 deletions paddle/platform/place.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ struct GPUPlace {
GPUPlace() : GPUPlace(0) {}
explicit GPUPlace(int d) : device(d) {}

inline int GetDeviceId() const { return device; }
// needed for variant equality comparison
inline bool operator==(const GPUPlace &o) const { return device == o.device; }
inline bool operator!=(const GPUPlace &o) const { return !(*this == o); }
Expand Down