Skip to content

Commit

Permalink
Merge pull request #66 from microsoft/olli/api-extension
Browse files Browse the repository at this point in the history
Olli/api extension
  • Loading branch information
saeedmaleki authored May 4, 2023
2 parents 8fc822c + ddc9e68 commit 9ecf1f9
Show file tree
Hide file tree
Showing 45 changed files with 2,600 additions and 1,224 deletions.
32 changes: 32 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
cmake_minimum_required(VERSION 3.26)

project(mscclpp LANGUAGES CUDA CXX)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 17)

list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/modules)

find_package(CUDAToolkit REQUIRED)
find_package(IBVerbs REQUIRED)
find_package(NUMA REQUIRED)
find_package(GDRCopy)

option(USE_MPI_FOR_TESTS "Use MPI for tests" ON)
if(USE_MPI_FOR_TESTS)
find_package(MPI REQUIRED)
add_definitions(-DMSCCLPP_USE_MPI_FOR_TESTS)
endif()

include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

add_library(mscclpp SHARED)
add_subdirectory(src) # This adds the srouces to the mscclpp target
target_include_directories(mscclpp PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src/include)
set_target_properties(mscclpp PROPERTIES LINKER_LANGUAGE CXX)
target_link_libraries(mscclpp PRIVATE MSCCLPP::ibverbs MSCCLPP::numa CUDA::cudart CUDA::cuda_driver)
if(GDRCOPY_FOUND)
target_link_libraries(mscclpp PRIVATE MSCCLPP::gdrcopy)
endif()

add_subdirectory(tests)
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ endif

NVCUFLAGS := -ccbin $(CXX) $(NVCC_GENCODE) -std=c++11 --expt-extended-lambda -Xfatbin -compress-all
# Use addprefix so that we can specify more than one path
NVLDFLAGS := -L$(CUDA_LIB) -lcudart -lrt
NVLDFLAGS := -L$(CUDA_LIB) -lcudart -lrt -lcuda

ifeq ($(DEBUG), 0)
NVCUFLAGS += -O3
Expand Down Expand Up @@ -120,7 +120,8 @@ LDFLAGS := $(NVLDFLAGS) $(GDRCOPY_LDFLAGS) -libverbs -lnuma

LIBSRCS := $(addprefix src/,debug.cc utils.cc init.cc proxy.cc ib.cc config.cc)
LIBSRCS += $(addprefix src/bootstrap/,bootstrap.cc socket.cc)
LIBSRCS += $(addprefix src/,communicator.cc fifo.cc host_connection.cc proxy_cpp.cc basic_proxy_handler.cc)
LIBSRCS += $(addprefix src/,communicator.cc connection.cc registered_memory.cc)
LIBSRCS += $(addprefix src/,epoch.cc proxy_cpp.cc fifo.cc channel.cc)
ifneq ($(NPKIT), 0)
LIBSRCS += $(addprefix src/misc/,npkit.cc)
endif
Expand All @@ -134,7 +135,7 @@ HEADERS := $(wildcard src/include/*.h)
CPPSOURCES := $(shell find ./ -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)' -not -path "./build/*" -not -path "./python/*")
PYTHONCPPSOURCES := $(shell find ./python/src/ -regextype posix-extended -regex '.*\.(c|cpp|h|hpp|cc|cxx|cu)')

INCEXPORTS := mscclpp.h mscclppfifo.h mscclpp.hpp mscclppfifo.hpp
INCEXPORTS := mscclpp.h mscclppfifo.h mscclpp.hpp mscclppfifo.hpp epoch.hpp
INCTARGETS := $(INCEXPORTS:%=$(BUILDDIR)/$(INCDIR)/%)

LIBNAME := libmscclpp.so
Expand All @@ -148,7 +149,7 @@ UTOBJTARGETS := $(UTOBJS:%=$(BUILDDIR)/$(OBJDIR)/%)
UTBINS := $(patsubst %.o,$(BUILDDIR)/$(BINDIR)/%,$(UTOBJS))

TESTSDIR := tests
TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test_standalone.cu allgather_test_cpp.cu bootstrap_test_cpp.cc)
TESTSSRCS := $(addprefix $(TESTSDIR)/,bootstrap_test.cc allgather_test_standalone.cu communicator_test_cpp.cu bootstrap_test_cpp.cc allgather_test_cpp.cu)
TESTSOBJS := $(patsubst %.cc,%.o,$(TESTSSRCS)) $(patsubst %.cu,%.o,$(TESTSSRCS))
TESTSOBJTARGETS := $(TESTSOBJS:%=$(BUILDDIR)/$(OBJDIR)/%)
TESTSBINS := $(patsubst %.o,$(BUILDDIR)/$(BINDIR)/%,$(TESTSOBJS))
Expand Down
8 changes: 8 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Core API extraction

- Add a test for host side Communicator/RegisteredMemory/Connection use.
- Implement a standalone "epoch" synchronization construct that can be used as a component in custom proxies. epoch.hpp/cc has the beginnings of this.
- Reimplement the "standard" proxy service + DeviceConnection on top of the new Communicator/RegisteredMemory/Connection core API. Remants of the old code is in channel.hpp, basic_proxy_handler.hpp/cc and host_connection.hpp/cc. Probably need a manager class to wrap all of this.
- Change the new IBConnection and Communicator to use the new C++ IbCtx and IbQp classes.
- Implement IbQp::~IbQp()
- Fix RegisteredMemory::Impl::Impl to get the IPC handle from the base pointer, not the derived pointer.
41 changes: 41 additions & 0 deletions cmake/modules/FindGDRCopy.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Find the GDRCopy libraries
#
# The following variables are optionally searched for defaults
# GDRCOPY_ROOT_DIR: Base directory where all GDRCopy components are found
# GDRCOPY_INCLUDE_DIR: Directory where GDRCopy headers are found
# GDRCOPY_LIB_DIR: Directory where GDRCopy libraries are found

# The following are set after configuration is done:
# GDRCOPY_FOUND
# GDRCOPY_INCLUDE_DIRS
# GDRCOPY_LIBRARIES

# An imported target MSCCLPP::gdrcopy is created if the library is found.

find_path(GDRCOPY_INCLUDE_DIRS
NAMES gdrapi.h
HINTS
${GDRCOPY_INCLUDE_DIR}
${GDRCOPY_ROOT_DIR}
${GDRCOPY_ROOT_DIR}/include)

find_library(GDRCOPY_LIBRARIES
NAMES gdrapi
HINTS
${GDRCOPY_LIB_DIR}
${GDRCOPY_ROOT_DIR}
${GDRCOPY_ROOT_DIR}/lib)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(GDRCopy DEFAULT_MSG GDRCOPY_INCLUDE_DIRS GDRCOPY_LIBRARIES)
mark_as_advanced(GDRCOPY_INCLUDE_DIR GDRCOPY_LIBRARIES)

if(GDRCOPY_FOUND)
if(NOT TARGET MSCCLPP::gdrcopy)
add_library(MSCCLPP::gdrcopy UNKNOWN IMPORTED)
endif()
set_target_properties(MSCCLPP::gdrcopy PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${GDRCOPY_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${GDRCOPY_LIBRARIES}")
endif()
41 changes: 41 additions & 0 deletions cmake/modules/FindIBVerbs.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Find the IB Verbs libraries
#
# The following variables are optionally searched for defaults
# IBVERBS_ROOT_DIR: Base directory where all ibverbs components are found
# IBVERBS_INCLUDE_DIR: Directory where ibverbs headers are found
# IBVERBS_LIB_DIR: Directory where ibverbs libraries are found

# The following are set after configuration is done:
# IBVERBS_FOUND
# IBVERBS_INCLUDE_DIRS
# IBVERBS_LIBRARIES

# An imported target MSCCLPP::ibverbs is created if the library is found.

find_path(IBVERBS_INCLUDE_DIRS
NAMES infiniband/verbs.h
HINTS
${IBVERBS_INCLUDE_DIR}
${IBVERBS_ROOT_DIR}
${IBVERBS_ROOT_DIR}/include)

find_library(IBVERBS_LIBRARIES
NAMES ibverbs
HINTS
${IBVERBS_LIB_DIR}
${IBVERBS_ROOT_DIR}
${IBVERBS_ROOT_DIR}/lib)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(IBVerbs DEFAULT_MSG IBVERBS_INCLUDE_DIRS IBVERBS_LIBRARIES)
mark_as_advanced(IBVERBS_INCLUDE_DIR IBVERBS_LIBRARIES)

if(IBVERBS_FOUND)
if(NOT TARGET MSCCLPP::ibverbs)
add_library(MSCCLPP::ibverbs UNKNOWN IMPORTED)
endif()
set_target_properties(MSCCLPP::ibverbs PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${IBVERBS_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${IBVERBS_LIBRARIES}")
endif()
41 changes: 41 additions & 0 deletions cmake/modules/FindNUMA.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Find the numa libraries
#
# The following variables are optionally searched for defaults
# NUMA_ROOT_DIR: Base directory where all numa components are found
# NUMA_INCLUDE_DIR: Directory where numa headers are found
# NUMA_LIB_DIR: Directory where numa libraries are found

# The following are set after configuration is done:
# NUMA_FOUND
# NUMA_INCLUDE_DIRS
# NUMA_LIBRARIES

# An imported target MSCCLPP::numa is created if the library is found.

find_path(NUMA_INCLUDE_DIRS
NAMES numa.h
HINTS
${NUMA_INCLUDE_DIR}
${NUMA_ROOT_DIR}
${NUMA_ROOT_DIR}/include)

find_library(NUMA_LIBRARIES
NAMES numa
HINTS
${NUMA_LIB_DIR}
${NUMA_ROOT_DIR}
${NUMA_ROOT_DIR}/lib)

include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_INCLUDE_DIRS NUMA_LIBRARIES)
mark_as_advanced(NUMA_INCLUDE_DIR NUMA_LIBRARIES)

if(NUMA_FOUND)
if(NOT TARGET MSCCLPP::numa)
add_library(MSCCLPP::numa UNKNOWN IMPORTED)
endif()
set_target_properties(MSCCLPP::numa PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${NUMA_INCLUDE_DIR}"
IMPORTED_LINK_INTERFACE_LANGUAGES "C"
IMPORTED_LOCATION "${NUMA_LIBRARIES}")
endif()
5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS *.cc *.h)
file(GLOB to_remove gdr.cc)
list(REMOVE_ITEM SOURCES ${to_remove})

target_sources(mscclpp PRIVATE ${SOURCES})
29 changes: 0 additions & 29 deletions src/basic_proxy_handler.cc

This file was deleted.

10 changes: 5 additions & 5 deletions src/bootstrap/bootstrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,8 @@ Bootstrap::Impl::~Impl()
}
}

void Bootstrap::Impl::getRemoteAddresses(mscclppSocket* listenSock,
std::vector<mscclppSocketAddress>& rankAddresses,
std::vector<mscclppSocketAddress>& rankAddressesRoot, int& rank)
void Bootstrap::Impl::getRemoteAddresses(mscclppSocket* listenSock, std::vector<mscclppSocketAddress>& rankAddresses,
std::vector<mscclppSocketAddress>& rankAddressesRoot, int& rank)
{
mscclppSocket sock;
ExtInfo info;
Expand Down Expand Up @@ -211,7 +210,7 @@ void Bootstrap::Impl::getRemoteAddresses(mscclppSocket* listenSock,
}

void Bootstrap::Impl::sendHandleToPeer(int peer, const std::vector<mscclppSocketAddress>& rankAddresses,
const std::vector<mscclppSocketAddress>& rankAddressesRoot)
const std::vector<mscclppSocketAddress>& rankAddressesRoot)
{
mscclppSocket sock;
int next = (peer + 1) % this->nRanks_;
Expand All @@ -226,7 +225,8 @@ void Bootstrap::Impl::bootstrapCreateRoot()
mscclppSocket listenSock;

// mscclppSocket* listenSock = new mscclppSocket(); // TODO(saemal) make this a shared ptr
MSCCLPPTHROW(mscclppSocketInit(&listenSock, &uniqueId_.addr, uniqueId_.magic, mscclppSocketTypeBootstrap, nullptr, 0));
MSCCLPPTHROW(
mscclppSocketInit(&listenSock, &uniqueId_.addr, uniqueId_.magic, mscclppSocketTypeBootstrap, nullptr, 0));
MSCCLPPTHROW(mscclppSocketListen(&listenSock));
MSCCLPPTHROW(mscclppSocketGetAddr(&listenSock, &uniqueId_.addr));
auto lambda = [this, listenSock]() { this->bootstrapRoot(listenSock); };
Expand Down
26 changes: 26 additions & 0 deletions src/channel.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#include "channel.hpp"
#include "utils.h"
#include "checks.hpp"
#include "api.h"
#include "debug.h"

namespace mscclpp {
namespace channel {

MSCCLPP_API_CPP DeviceChannelService::DeviceChannelService(Communicator& communicator) : communicator_(communicator),
proxy_([&](ProxyTrigger triggerRaw) { return handleTrigger(triggerRaw); }, [&]() { bindThread(); }) {
int cudaDevice;
CUDATHROW(cudaGetDevice(&cudaDevice));
MSCCLPPTHROW(getDeviceNumaNode(cudaDevice, &deviceNumaNode));
}

MSCCLPP_API_CPP void DeviceChannelService::bindThread()
{
if (deviceNumaNode >= 0) {
MSCCLPPTHROW(numaBind(deviceNumaNode));
INFO(MSCCLPP_INIT, "NUMA node of DeviceChannelService proxy thread is set to %d", deviceNumaNode);
}
}

} // namespace channel
} // namespace mscclpp
Loading

0 comments on commit 9ecf1f9

Please sign in to comment.