make code checks happy

cms-sw · May 10, 2022 · 1487b88 · 1487b88
1 parent 5866970
commit 1487b88
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 96 deletions.
diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h
@@ -15,10 +15,10 @@ namespace memoryPool {
     SimplePoolAllocator *getPool(Where where);
 
     // allocate either on current device or on host
-    /* inline */ std::pair<void *, int> alloc(uint64_t size, SimplePoolAllocator &pool);
+    std::pair<void *, int> alloc(uint64_t size, SimplePoolAllocator &pool);
 
     // schedule free
-    /* inline */ void free(cudaStream_t stream, std::vector<int> buckets, SimplePoolAllocator &pool);
+    void free(cudaStream_t stream, std::vector<int> buckets, SimplePoolAllocator &pool);
 
     template <typename T>
     auto copy(Buffer<T> &dst, Buffer<T> const &src, uint64_t size, cudaStream_t stream) {
@@ -80,5 +80,3 @@ namespace memoryPool {
 
   }  // namespace cuda
 }  // namespace memoryPool
-
-// #include "cudaMemoryPoolImpl.h"
diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h
diff --git a/HeterogeneousCore/CUDAUtilities/src/cudaMemoryPool.cc b/HeterogeneousCore/CUDAUtilities/src/cudaMemoryPool.cc
@@ -1,15 +1,72 @@
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"
-#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h"
 
 #include "HeterogeneousCore/CUDAUtilities/interface/SimplePoolAllocator.h"
 
 #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"
 
+#include <cuda.h>
 #include <cuda_runtime.h>
 #include <cuda_runtime_api.h>
 
 #include <iostream>
 
+namespace {
+
+  //  free callback
+  void CUDART_CB freeCallback(cudaStream_t streamId, cudaError_t status, void *p) {
+    //void CUDART_CB freeCallback(void *p) {
+    if (status != cudaSuccess) {
+      std::cout << "Error in free callaback in stream " << streamId << std::endl;
+      auto error = cudaGetErrorName(status);
+      auto message = cudaGetErrorString(status);
+      std::cout << " error " << error << ": " << message << std::endl;
+    }
+    // std::cout << "free callaback for stream " << streamId << std::endl;
+    auto payload = (memoryPool::Payload *)(p);
+    memoryPool::scheduleFree(payload);
+  }
+
+}  // namespace
+
+struct CudaAlloc {
+  static void scheduleFree(memoryPool::Payload *payload, void *stream) {
+    // std::cout    << "schedule free for stream " <<  stream <<std::endl;
+    if (!stream)
+      std::cout << "???? schedule free for stream " << stream << std::endl;
+    cudaCheck(cudaStreamAddCallback((cudaStream_t)(stream), freeCallback, payload, 0));
+    // cudaCheck(cudaLaunchHostFunc(stream, freeCallback, payload));
+  }
+};
+
+struct CudaDeviceAlloc : public CudaAlloc {
+  using Pointer = void *;
+
+  static Pointer alloc(size_t size) {
+    Pointer p = nullptr;
+    auto err = cudaMalloc(&p, size);
+    // std::cout << "alloc " << size << ((err == cudaSuccess) ? " ok" : " err") << std::endl;
+    return err == cudaSuccess ? p : nullptr;
+  }
+  static void free(Pointer ptr) {
+    auto err = cudaFree(ptr);
+    // std::cout << "free" << ((err == cudaSuccess) ? " ok" : " err") <<std::endl;
+    if (err != cudaSuccess)
+      std::cout << " error in cudaFree??" << std::endl;
+  }
+};
+
+struct CudaHostAlloc : public CudaAlloc {
+  using Pointer = void *;
+
+  static Pointer alloc(size_t size) {
+    Pointer p = nullptr;
+    auto err = cudaMallocHost(&p, size);
+    // std::cout << "alloc H " << size << ((err == cudaSuccess) ? " ok" : " err") << std::endl;
+    return err == cudaSuccess ? p : nullptr;
+  }
+  static void free(Pointer ptr) { cudaFreeHost(ptr); }
+};
+
 namespace {
 
   constexpr int poolSize = 128 * 1024;
@@ -59,5 +116,18 @@ namespace memoryPool {
                  : (onDevice == where ? (SimplePoolAllocator *)(&devicePool()) : (SimplePoolAllocator *)(&hostPool));
     }
 
+    // allocate either on current device or on host (actually anywhere, not cuda specific)
+    std::pair<void *, int> alloc(uint64_t size, SimplePoolAllocator &pool) {
+      int i = pool.alloc(size);
+      void *p = pool.pointer(i);
+      return std::pair<void *, int>(p, i);
+    }
+
+    // schedule free
+    void free(cudaStream_t stream, std::vector<int> buckets, SimplePoolAllocator &pool) {
+      auto payload = new Payload{&pool, std::move(buckets)};
+      pool.scheduleFree(payload, stream);
+    }
+
   }  // namespace cuda
 }  // namespace memoryPool