-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #66 from microsoft/olli/api-extension
Olli/api extension
- Loading branch information
Showing
45 changed files
with
2,600 additions
and
1,224 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
cmake_minimum_required(VERSION 3.26) | ||
|
||
project(mscclpp LANGUAGES CUDA CXX) | ||
|
||
set(CMAKE_CXX_STANDARD 17) | ||
set(CMAKE_CUDA_STANDARD 17) | ||
|
||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules) | ||
|
||
find_package(CUDAToolkit REQUIRED) | ||
find_package(IBVerbs REQUIRED) | ||
find_package(NUMA REQUIRED) | ||
find_package(GDRCopy) | ||
|
||
option(USE_MPI_FOR_TESTS "Use MPI for tests" ON) | ||
if(USE_MPI_FOR_TESTS) | ||
find_package(MPI REQUIRED) | ||
add_definitions(-DMSCCLPP_USE_MPI_FOR_TESTS) | ||
endif() | ||
|
||
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) | ||
|
||
add_library(mscclpp SHARED) | ||
add_subdirectory(src) # This adds the srouces to the mscclpp target | ||
target_include_directories(mscclpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src/include) | ||
set_target_properties(mscclpp PROPERTIES LINKER_LANGUAGE CXX) | ||
target_link_libraries(mscclpp PRIVATE MSCCLPP::ibverbs MSCCLPP::numa CUDA::cudart CUDA::cuda_driver) | ||
if(GDRCOPY_FOUND) | ||
target_link_libraries(mscclpp PRIVATE MSCCLPP::gdrcopy) | ||
endif() | ||
|
||
add_subdirectory(tests) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
# Core API extraction | ||
|
||
- Add a test for host side Communicator/RegisteredMemory/Connection use. | ||
- Implement a standalone "epoch" synchronization construct that can be used as a component in custom proxies. epoch.hpp/cc has the beginnings of this. | ||
- Reimplement the "standard" proxy service + DeviceConnection on top of the new Communicator/RegisteredMemory/Connection core API. Remants of the old code is in channel.hpp, basic_proxy_handler.hpp/cc and host_connection.hpp/cc. Probably need a manager class to wrap all of this. | ||
- Change the new IBConnection and Communicator to use the new C++ IbCtx and IbQp classes. | ||
- Implement IbQp::~IbQp() | ||
- Fix RegisteredMemory::Impl::Impl to get the IPC handle from the base pointer, not the derived pointer. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Find the GDRCopy libraries | ||
# | ||
# The following variables are optionally searched for defaults | ||
# GDRCOPY_ROOT_DIR: Base directory where all GDRCopy components are found | ||
# GDRCOPY_INCLUDE_DIR: Directory where GDRCopy headers are found | ||
# GDRCOPY_LIB_DIR: Directory where GDRCopy libraries are found | ||
|
||
# The following are set after configuration is done: | ||
# GDRCOPY_FOUND | ||
# GDRCOPY_INCLUDE_DIRS | ||
# GDRCOPY_LIBRARIES | ||
|
||
# An imported target MSCCLPP::gdrcopy is created if the library is found. | ||
|
||
find_path(GDRCOPY_INCLUDE_DIRS | ||
NAMES gdrapi.h | ||
HINTS | ||
${GDRCOPY_INCLUDE_DIR} | ||
${GDRCOPY_ROOT_DIR} | ||
${GDRCOPY_ROOT_DIR}/include) | ||
|
||
find_library(GDRCOPY_LIBRARIES | ||
NAMES gdrapi | ||
HINTS | ||
${GDRCOPY_LIB_DIR} | ||
${GDRCOPY_ROOT_DIR} | ||
${GDRCOPY_ROOT_DIR}/lib) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(GDRCopy DEFAULT_MSG GDRCOPY_INCLUDE_DIRS GDRCOPY_LIBRARIES) | ||
mark_as_advanced(GDRCOPY_INCLUDE_DIR GDRCOPY_LIBRARIES) | ||
|
||
if(GDRCOPY_FOUND) | ||
if(NOT TARGET MSCCLPP::gdrcopy) | ||
add_library(MSCCLPP::gdrcopy UNKNOWN IMPORTED) | ||
endif() | ||
set_target_properties(MSCCLPP::gdrcopy PROPERTIES | ||
INTERFACE_INCLUDE_DIRECTORIES "${GDRCOPY_INCLUDE_DIR}" | ||
IMPORTED_LINK_INTERFACE_LANGUAGES "C" | ||
IMPORTED_LOCATION "${GDRCOPY_LIBRARIES}") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Find the IB Verbs libraries | ||
# | ||
# The following variables are optionally searched for defaults | ||
# IBVERBS_ROOT_DIR: Base directory where all ibverbs components are found | ||
# IBVERBS_INCLUDE_DIR: Directory where ibverbs headers are found | ||
# IBVERBS_LIB_DIR: Directory where ibverbs libraries are found | ||
|
||
# The following are set after configuration is done: | ||
# IBVERBS_FOUND | ||
# IBVERBS_INCLUDE_DIRS | ||
# IBVERBS_LIBRARIES | ||
|
||
# An imported target MSCCLPP::ibverbs is created if the library is found. | ||
|
||
find_path(IBVERBS_INCLUDE_DIRS | ||
NAMES infiniband/verbs.h | ||
HINTS | ||
${IBVERBS_INCLUDE_DIR} | ||
${IBVERBS_ROOT_DIR} | ||
${IBVERBS_ROOT_DIR}/include) | ||
|
||
find_library(IBVERBS_LIBRARIES | ||
NAMES ibverbs | ||
HINTS | ||
${IBVERBS_LIB_DIR} | ||
${IBVERBS_ROOT_DIR} | ||
${IBVERBS_ROOT_DIR}/lib) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(IBVerbs DEFAULT_MSG IBVERBS_INCLUDE_DIRS IBVERBS_LIBRARIES) | ||
mark_as_advanced(IBVERBS_INCLUDE_DIR IBVERBS_LIBRARIES) | ||
|
||
if(IBVERBS_FOUND) | ||
if(NOT TARGET MSCCLPP::ibverbs) | ||
add_library(MSCCLPP::ibverbs UNKNOWN IMPORTED) | ||
endif() | ||
set_target_properties(MSCCLPP::ibverbs PROPERTIES | ||
INTERFACE_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}" | ||
IMPORTED_LINK_INTERFACE_LANGUAGES "C" | ||
IMPORTED_LOCATION "${IBVERBS_LIBRARIES}") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Find the numa libraries | ||
# | ||
# The following variables are optionally searched for defaults | ||
# NUMA_ROOT_DIR: Base directory where all numa components are found | ||
# NUMA_INCLUDE_DIR: Directory where numa headers are found | ||
# NUMA_LIB_DIR: Directory where numa libraries are found | ||
|
||
# The following are set after configuration is done: | ||
# NUMA_FOUND | ||
# NUMA_INCLUDE_DIRS | ||
# NUMA_LIBRARIES | ||
|
||
# An imported target MSCCLPP::numa is created if the library is found. | ||
|
||
find_path(NUMA_INCLUDE_DIRS | ||
NAMES numa.h | ||
HINTS | ||
${NUMA_INCLUDE_DIR} | ||
${NUMA_ROOT_DIR} | ||
${NUMA_ROOT_DIR}/include) | ||
|
||
find_library(NUMA_LIBRARIES | ||
NAMES numa | ||
HINTS | ||
${NUMA_LIB_DIR} | ||
${NUMA_ROOT_DIR} | ||
${NUMA_ROOT_DIR}/lib) | ||
|
||
include(FindPackageHandleStandardArgs) | ||
find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_INCLUDE_DIRS NUMA_LIBRARIES) | ||
mark_as_advanced(NUMA_INCLUDE_DIR NUMA_LIBRARIES) | ||
|
||
if(NUMA_FOUND) | ||
if(NOT TARGET MSCCLPP::numa) | ||
add_library(MSCCLPP::numa UNKNOWN IMPORTED) | ||
endif() | ||
set_target_properties(MSCCLPP::numa PROPERTIES | ||
INTERFACE_INCLUDE_DIRECTORIES "${NUMA_INCLUDE_DIR}" | ||
IMPORTED_LINK_INTERFACE_LANGUAGES "C" | ||
IMPORTED_LOCATION "${NUMA_LIBRARIES}") | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cc *.h) | ||
file(GLOB to_remove gdr.cc) | ||
list(REMOVE_ITEM SOURCES ${to_remove}) | ||
|
||
target_sources(mscclpp PRIVATE ${SOURCES}) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#include "channel.hpp" | ||
#include "utils.h" | ||
#include "checks.hpp" | ||
#include "api.h" | ||
#include "debug.h" | ||
|
||
namespace mscclpp { | ||
namespace channel { | ||
|
||
MSCCLPP_API_CPP DeviceChannelService::DeviceChannelService(Communicator& communicator) : communicator_(communicator), | ||
proxy_([&](ProxyTrigger triggerRaw) { return handleTrigger(triggerRaw); }, [&]() { bindThread(); }) { | ||
int cudaDevice; | ||
CUDATHROW(cudaGetDevice(&cudaDevice)); | ||
MSCCLPPTHROW(getDeviceNumaNode(cudaDevice, &deviceNumaNode)); | ||
} | ||
|
||
MSCCLPP_API_CPP void DeviceChannelService::bindThread() | ||
{ | ||
if (deviceNumaNode >= 0) { | ||
MSCCLPPTHROW(numaBind(deviceNumaNode)); | ||
INFO(MSCCLPP_INIT, "NUMA node of DeviceChannelService proxy thread is set to %d", deviceNumaNode); | ||
} | ||
} | ||
|
||
} // namespace channel | ||
} // namespace mscclpp |
Oops, something went wrong.