Skip to content

Commit

Permalink
drive from service
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn committed May 15, 2022
1 parent 4743b8e commit e2d5802
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 34 deletions.
9 changes: 9 additions & 0 deletions HeterogeneousCore/CUDAServices/src/CUDAService.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "HeterogeneousCore/CUDAUtilities/interface/StreamCache.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cachingAllocators.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/currentDevice.h"
#include "HeterogeneousCore/CUDAUtilities/interface/device_unique_ptr.h"
#include "HeterogeneousCore/CUDAUtilities/interface/host_unique_ptr.h"
Expand Down Expand Up @@ -135,6 +136,8 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU
bool configEnabled = config.getUntrackedParameter<bool>("enabled");
if (not configEnabled) {
edm::LogInfo("CUDAService") << "CUDAService disabled by configuration";
// enable cpu memory pool
memoryPool::cuda::init(true);
return;
}

Expand Down Expand Up @@ -361,6 +364,9 @@ CUDAService::CUDAService(edm::ParameterSet const& config) : verbose_(config.getU
cms::cuda::getEventCache().clear();
cms::cuda::getStreamCache().clear();

// enable memory pool
memoryPool::cuda::init(false);

if (verbose_) {
log << '\n' << "CUDAService fully initialized";
}
Expand All @@ -381,6 +387,9 @@ CUDAService::~CUDAService() {
cms::cuda::getEventCache().clear();
cms::cuda::getStreamCache().clear();

// destroy cpu memory pool
memoryPool::cuda::shutdown();

for (int i = 0; i < numberOfDevices_; ++i) {
cudaCheck(cudaSetDevice(i));
cudaCheck(cudaDeviceSynchronize());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,10 @@ struct SimplePoolAllocatorImpl final : public SimplePoolAllocator {
SimplePoolAllocatorImpl(int maxSlots) : SimplePoolAllocator(maxSlots) {}

~SimplePoolAllocatorImpl() override {
// garbageCollect();
#ifdef MEMORY_POOL_DEBUG
garbageCollect();
//#ifdef MEMORY_POOL_DEBUG
dumpStat();
#endif
//#endif
}

Pointer doAlloc(size_t size) override { return Traits::alloc(size); }
Expand Down
3 changes: 3 additions & 0 deletions HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
namespace memoryPool {
namespace cuda {

void init(bool onlyCPU = false);
void shutdown();

void dumpStat();

SimplePoolAllocator *getPool(Where where);
Expand Down
72 changes: 41 additions & 31 deletions HeterogeneousCore/CUDAUtilities/src/cudaMemoryPool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,51 +74,61 @@ struct CudaHostAlloc : public CudaAlloc {

namespace {

constexpr int poolSize = 128 * 1024;

SimplePoolAllocatorImpl<PosixAlloc> cpuPool(poolSize);

SimplePoolAllocatorImpl<CudaHostAlloc> hostPool(poolSize);

struct DevicePools {
using Pool = SimplePoolAllocatorImpl<CudaDeviceAlloc>;
DevicePools(int size) {
int devices = 0;
auto status = cudaGetDeviceCount(&devices);
if (status == cudaSuccess && devices > 0) {
m_devicePools.reserve(devices);
for (int i = 0; i < devices; ++i)
m_devicePools.emplace_back(new Pool(size));
}
}
//return pool for current device
Pool &operator()() {
int dev = -1;
cudaGetDevice(&dev);
return *m_devicePools[dev];
}
// FIXME : move it in its own place
std::unique_ptr<SimplePoolAllocatorImpl<PosixAlloc>> cpuPool;

std::unique_ptr<SimplePoolAllocatorImpl<CudaHostAlloc>> hostPool;

std::vector<std::unique_ptr<Pool>> m_devicePools;
};
using DevicePool = SimplePoolAllocatorImpl<CudaDeviceAlloc>;
std::vector<std::unique_ptr<DevicePool>> devicePools;

DevicePools devicePool(poolSize);
void initDevicePools(int size) {
int devices = 0;
auto status = cudaGetDeviceCount(&devices);
if (status == cudaSuccess && devices > 0) {
devicePools.reserve(devices);
for (int i = 0; i < devices; ++i)
devicePools.emplace_back(new DevicePool(size));
}
}

DevicePool *getDevicePool() {
int dev = -1;
cudaGetDevice(&dev);
return devicePools[dev].get();
}

} // namespace

namespace memoryPool {
namespace cuda {

void init(bool onlyCPU) {
constexpr int poolSize = 128 * 1024;
cpuPool = std::make_unique<SimplePoolAllocatorImpl<PosixAlloc>>(poolSize);
if (onlyCPU)
return;
initDevicePools(poolSize);
hostPool = std::make_unique<SimplePoolAllocatorImpl<CudaHostAlloc>>(poolSize);
}

void shutdown() {
cpuPool.reset();
devicePools.clear();
hostPool.reset();
}

void dumpStat() {
std::cout << "device pool" << std::endl;
devicePool().dumpStat();
getDevicePool()->dumpStat();
std::cout << "host pool" << std::endl;
hostPool.dumpStat();
hostPool->dumpStat();
}

SimplePoolAllocator *getPool(Where where) {
return onCPU == where
? (SimplePoolAllocator *)(&cpuPool)
: (onDevice == where ? (SimplePoolAllocator *)(&devicePool()) : (SimplePoolAllocator *)(&hostPool));
return onCPU == where ? (SimplePoolAllocator *)(cpuPool.get())
: (onDevice == where ? (SimplePoolAllocator *)(getDevicePool())
: (SimplePoolAllocator *)(hostPool.get()));
}

// allocate either on current device or on host (actually anywhere, not cuda specific)
Expand Down
2 changes: 2 additions & 0 deletions HeterogeneousCore/CUDAUtilities/test/testPoolUI.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ int main() {
cudaStreamCreate(&(streams[i]));
}

memoryPool::cuda::init(false);
memoryPool::cuda::dumpStat();

auto& stream = streams[0];
Expand Down Expand Up @@ -94,6 +95,7 @@ int main() {

cudaStreamSynchronize(stream);
memoryPool::cuda::dumpStat();
memoryPool::cuda::shutdown();

return 0;
}
4 changes: 4 additions & 0 deletions HeterogeneousCore/CUDAUtilities/test/testPoolUImt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ void go() {

#endif

memoryPool::cuda::init(false);

bool stop = false;
bool bin24 = false;
Thread monitor([&] {
Expand Down Expand Up @@ -216,6 +218,8 @@ void go() {
cudaDeviceSynchronize();
std::cout << "\nfinished\n" << std::endl;
memoryPool::cuda::dumpStat();
std::cout << "\nshutdown\n" << std::endl;
memoryPool::cuda::shutdown();
}

#ifdef __CUDACC__
Expand Down

0 comments on commit e2d5802

Please sign in to comment.