Skip to content

Commit

Permalink
BUG: Remove CudaContextManager class and use cudaSetDevice
Browse files Browse the repository at this point in the history
Use the primary context with cudaSetDevice() introduced by Cuda 7 (https://developer.download.nvidia.com/compute/cuda/7_0/Prod/doc/CUDA_Toolkit_Release_Notes.pdf) instead of a new one. cudaSetDevice is called before every memory transfer between the CPU and GPU to be sure that the context is set for the current thread, see https://developer.nvidia.com/blog/cuda-pro-tip-always-set-current-device-avoid-multithreading-bugs/.
  • Loading branch information
LAURENDEAU Matthieu authored and SimonRit committed Feb 14, 2024
1 parent 9d3fe9b commit 09a9645
Show file tree
Hide file tree
Showing 7 changed files with 9 additions and 209 deletions.
66 changes: 0 additions & 66 deletions include/itkCudaContextManager.h

This file was deleted.

5 changes: 2 additions & 3 deletions include/itkCudaDataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
#include "itkDataObject.h"
#include "itkObjectFactory.h"
#include "itkCudaUtil.h"
#include "itkCudaContextManager.h"
#include "CudaCommonExport.h"

#include <mutex>
Expand Down Expand Up @@ -232,6 +231,8 @@ class CudaCommon_EXPORT CudaDataManager : public Object
void
PrintSelf(std::ostream & os, Indent indent) const override;

int m_Device;

private:
CudaDataManager(const Self &) = delete; // purposely not implemented
void
Expand All @@ -240,8 +241,6 @@ class CudaCommon_EXPORT CudaDataManager : public Object
protected:
size_t m_BufferSize; // # of bytes

CudaContextManager * m_ContextManager;

/** buffer type */
int m_MemFlags;

Expand Down
1 change: 0 additions & 1 deletion include/itkCudaImageDataManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include <itkObjectFactory.h>
#include "itkCudaUtil.h"
#include "itkCudaDataManager.h"
#include "itkCudaContextManager.h"

namespace itk
{
Expand Down
8 changes: 2 additions & 6 deletions include/itkCudaImageDataManager.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@ CudaImageDataManager<ImageType>::MakeCPUBufferUpToDate()
std::cout << this << ": GPU->CPU data copy" << std::endl;
#endif

CUDA_CHECK(cuCtxSetCurrent(
*(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host
// CPU thread to the right context
CUDA_CHECK(cudaSetDevice(m_Device));
errid = cudaMemcpy(m_CPUBuffer, m_GPUBuffer->GetPointer(), m_BufferSize, cudaMemcpyDeviceToHost);
CudaCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);

Expand Down Expand Up @@ -117,9 +115,7 @@ CudaImageDataManager<ImageType>::MakeGPUBufferUpToDate()
std::cout << "CPU->GPU data copy" << std::endl;
#endif

CUDA_CHECK(cuCtxSetCurrent(
*(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host
// CPU thread to the right context
CUDA_CHECK(cudaSetDevice(m_Device));
errid = cudaMemcpy(m_GPUBuffer->GetPointer(), m_CPUBuffer, m_BufferSize, cudaMemcpyHostToDevice);
CudaCheckError(errid, __FILE__, __LINE__, ITK_LOCATION);

Expand Down
1 change: 0 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
set(CudaCommon_SRCS
itkCudaContextManager.cxx
itkCudaDataManager.cxx
itkCudaUtil.cxx
itkCudaMemoryProbe.cxx
Expand Down
109 changes: 0 additions & 109 deletions src/itkCudaContextManager.cxx

This file was deleted.

28 changes: 5 additions & 23 deletions src/itkCudaDataManager.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,8 @@ namespace itk
// constructor
CudaDataManager::CudaDataManager()
{
m_ContextManager = CudaContextManager::GetInstance();

// Creating the context in the constructor allows avoiding a memory leak.
// However, the cuda data manager is created even if there is no use of CUDA
// software and sometimes one compiles RTK with CUDA but wants to use it
// without CUDA. So if the context pointer is nullptr, which indicates that there
// is no CUDA device available, we just do not set the context (SR). This fixes
// the problem reported here:
// https://www.creatis.insa-lyon.fr/pipermail/rtk-users/2015-July/000570.html
CUcontext * ctx = m_ContextManager->GetCurrentContext();
if (ctx)
CUDA_CHECK(cuCtxSetCurrent(*ctx));
m_Device = itk::CudaGetMaxFlopsDev();
CUDA_CHECK(cudaSetDevice(m_Device));

m_CPUBuffer = nullptr;
m_GPUBuffer = GPUMemPointer::New();
Expand All @@ -56,7 +46,6 @@ CudaDataManager::CudaDataManager()
CudaDataManager::~CudaDataManager()
{
m_GPUBuffer = nullptr;
CudaContextManager::DestroyInstance();
}

void
Expand Down Expand Up @@ -91,9 +80,7 @@ CudaDataManager::Free()
{
try
{
CUDA_CHECK(cuCtxSetCurrent(
*(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host
// CPU thread to the right context
CUDA_CHECK(cudaSetDevice(m_Device));
m_GPUBuffer->Free();
}
catch (itk::ExceptionObject & e)
Expand Down Expand Up @@ -171,9 +158,7 @@ CudaDataManager::UpdateCPUBuffer()
std::cout << this << "::UpdateCPUBuffer GPU->CPU data copy " << m_GPUBuffer->GetPointer() << "->" << m_CPUBuffer
<< " : " << m_BufferSize << std::endl;
#endif
CUDA_CHECK(cuCtxSetCurrent(
*(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host
// CPU thread to the right context
CUDA_CHECK(cudaSetDevice(m_Device));
CUDA_CHECK(cudaMemcpy(m_CPUBuffer, m_GPUBuffer->GetPointer(), m_BufferSize, cudaMemcpyDeviceToHost));
m_IsCPUBufferDirty = false;
}
Expand Down Expand Up @@ -212,9 +197,7 @@ CudaDataManager::UpdateGPUBuffer()
std::cout << this << "::UpdateGPUBuffer CPU->GPU data copy " << m_CPUBuffer << "->" << m_GPUBuffer->GetPointer()
<< " : " << m_BufferSize << std::endl;
#endif
CUDA_CHECK(cuCtxSetCurrent(
*(this->m_ContextManager->GetCurrentContext()))); // This is necessary when running multithread to bind the host
// CPU thread to the right context
CUDA_CHECK(cudaSetDevice(m_Device));
CUDA_CHECK(cudaMemcpy(m_GPUBuffer->GetPointer(), m_CPUBuffer, m_BufferSize, cudaMemcpyHostToDevice));
}
m_IsGPUBufferDirty = false;
Expand Down Expand Up @@ -259,7 +242,6 @@ CudaDataManager::Graft(const CudaDataManager * data)
if (data)
{
m_BufferSize = data->m_BufferSize;
m_ContextManager = data->m_ContextManager;
m_GPUBuffer = data->m_GPUBuffer;
m_CPUBuffer = data->m_CPUBuffer;
m_IsCPUBufferDirty = data->m_IsCPUBufferDirty;
Expand Down

2 comments on commit 09a9645

@theysp
Copy link

@theysp theysp commented on 09a9645 Feb 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

itkCudaContextManager causes some synchronization problems with multiple thread programming. It's quite nice to make this revision. Would you please clarify the reason that pushes you to do so?

@SimonRit
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The motivation was that @laurendeaumatthieu had troubles combining CudaCommon and CuPy codes, as explained in #32. He pointed out the simplicity of the PrimaryContext which was not there when CudaCommon was created (I believe).
Do you see any drawback with this change that we might have missed?

Please sign in to comment.