diff --git a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h index f5e493edf3f9b..542c111d992bb 100644 --- a/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h +++ b/CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h @@ -8,14 +8,15 @@ class BeamSpotCUDA { public: - using buffer = memoryPool::buffer; // default constructor, required by cms::cuda::Product BeamSpotCUDA() = default; // constructor that allocates cached device memory on the given CUDA stream - BeamSpotCUDA(cudaStream_t stream) { data_d_ = memoryPool::cuda::make_buffer(1,stream, memoryPool::onDevice); } + BeamSpotCUDA(cudaStream_t stream) { + data_d_ = memoryPool::cuda::make_buffer(1, stream, memoryPool::onDevice); + } // movable, non-copiable BeamSpotCUDA(BeamSpotCUDA const&) = delete; @@ -26,7 +27,7 @@ class BeamSpotCUDA { BeamSpotPOD* data() { return data_d_.get(); } BeamSpotPOD const* data() const { return data_d_.get(); } - buffer & ptr() { return data_d_; } + buffer& ptr() { return data_d_; } buffer const& ptr() const { return data_d_; } private: diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h index 9f33bf076418d..8314ba3511f55 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h @@ -8,7 +8,6 @@ #include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h" #include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h" - class SiPixelDigiErrorsCUDA { public: using SiPixelErrorCompactVector = cms::cuda::SimpleVector; @@ -41,6 +40,4 @@ class SiPixelDigiErrorsCUDA { int nErrorWords_ = 0; }; - - #endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h index b3126097e15a4..9ae6d585b145c 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h @@ -41,7 +41,6 @@ class SiPixelDigisCUDA { uint32_t nDigis_h = 0; }; - // #include "SiPixelDigisCUDAImpl.h" #endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h diff --git a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h index 36b1f62a2eee9..94eda26217329 100644 --- a/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h +++ b/CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h @@ -4,21 +4,21 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream) : m_store(memoryPool::cuda::make_buffer( SiPixelDigisCUDASOAView::roundFor128ByteAlignment(maxFedWords) * static_cast(SiPixelDigisCUDASOAView::StorageLocation::kMAX), - stream,memoryPool::onDevice)), + stream, + memoryPool::onDevice)), m_view(m_store, maxFedWords, SiPixelDigisCUDASOAView::StorageLocation::kMAX) { assert(maxFedWords != 0); } -memoryPool::buffer SiPixelDigisCUDA::copyAllToHostAsync( - cudaStream_t stream) const { +memoryPool::buffer SiPixelDigisCUDA::copyAllToHostAsync(cudaStream_t stream) const { auto ret = memoryPool::cuda::make_buffer( m_view.roundFor128ByteAlignment(nDigis()) * static_cast(SiPixelDigisCUDASOAView::StorageLocationHost::kMAX), - stream,memoryPool::onHost); + stream, + memoryPool::onHost); cudaCheck(cudaMemcpyAsync(ret.get(), m_view.clus(), m_view.roundFor128ByteAlignment(nDigis()) * sizeof(SiPixelDigisCUDA::StoreType) * diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc index d9432c94edb5e..92cdd082d2067 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc @@ -3,19 +3,17 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - -SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) : - formatterErrors_h(std::move(errors)), - nErrorWords_(maxFedWords) { +SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) + : formatterErrors_h(std::move(errors)), nErrorWords_(maxFedWords) { assert(maxFedWords != 0); - memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); + memoryPool::Deleter deleter = + memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); assert(deleter.pool()); - data_d = memoryPool::cuda::make_buffer(maxFedWords, deleter); - error_d = memoryPool::cuda::make_buffer(1,deleter); - error_h = memoryPool::cuda::make_buffer(1,stream,memoryPool::onHost); - + data_d = memoryPool::cuda::make_buffer(maxFedWords, deleter); + error_d = memoryPool::cuda::make_buffer(1, deleter); + error_h = memoryPool::cuda::make_buffer(1, stream, memoryPool::onHost); cudaMemsetAsync(data_d.get(), 0x00, maxFedWords, stream); @@ -23,11 +21,11 @@ SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatte assert(error_h->empty()); assert(error_h->capacity() == static_cast(maxFedWords)); - cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1,stream)); + cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1, stream)); } void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) { - cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1,stream)); + cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1, stream)); } SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const { @@ -44,4 +42,3 @@ SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync err.set_data(data.get()); return HostDataError(err, std::move(data)); } - diff --git a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc index 700a4198bb293..28ef7ffd9c2f3 100644 --- a/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc +++ b/CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc @@ -1,3 +1,2 @@ #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h" #include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h" - diff --git a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h index 8b106ed8df960..d4f3f48efb48b 100644 --- a/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h +++ b/CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneous.h @@ -4,7 +4,6 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h" #include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h" - #include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h" class TrackingRecHit2DHeterogeneous { @@ -38,21 +37,21 @@ class TrackingRecHit2DHeterogeneous { TrackingRecHit2DHeterogeneous() = default; /*inline*/ TrackingRecHit2DHeterogeneous(uint32_t nHits, - bool isPhase2, - int32_t offsetBPIX2, - pixelCPEforGPU::ParamsOnGPU const* cpeParams, - uint32_t const* hitsModuleStart, - memoryPool::Where where, - cudaStream_t stream, - TrackingRecHit2DHeterogeneous const* input = nullptr); + bool isPhase2, + int32_t offsetBPIX2, + pixelCPEforGPU::ParamsOnGPU const* cpeParams, + uint32_t const* hitsModuleStart, + memoryPool::Where where, + cudaStream_t stream, + TrackingRecHit2DHeterogeneous const* input = nullptr); // used on CPU only /*inline*/ TrackingRecHit2DHeterogeneous(float* store32, - uint16_t* store16, - uint32_t* modules, - int nHits, - memoryPool::Where where = memoryPool::onCPU, - cudaStream_t stream = nullptr); + uint16_t* store16, + uint32_t* modules, + int nHits, + memoryPool::Where where = memoryPool::onCPU, + cudaStream_t stream = nullptr); ~TrackingRecHit2DHeterogeneous() = default; TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete; diff --git a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc index 115b80db650b4..8959668ec9932 100644 --- a/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc +++ b/CUDADataFormats/TrackingRecHit/src/TrackingRecHit2DHeterogeneous.cc @@ -1,2 +1 @@ #include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h" - diff --git a/HeterogeneousCore/CUDACore/src/ScopedContext.cc b/HeterogeneousCore/CUDACore/src/ScopedContext.cc index 5f809dcc506db..0b547c4baecce 100644 --- a/HeterogeneousCore/CUDACore/src/ScopedContext.cc +++ b/HeterogeneousCore/CUDACore/src/ScopedContext.cc @@ -27,8 +27,8 @@ namespace { try { auto error = cudaGetErrorName(status); auto message = cudaGetErrorString(status); - std::cout << "Callback of CUDA stream " << streamId << " in device " << device - << " error " << error << ": " << message << std::endl; + std::cout << "Callback of CUDA stream " << streamId << " in device " << device << " error " << error << ": " + << message << std::endl; throw cms::Exception("CUDAError") << "Callback of CUDA stream " << streamId << " in device " << device << " error " << error << ": " << message; } catch (cms::Exception&) { diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h index 897e5077e0659..b0c875a89121e 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h @@ -30,15 +30,15 @@ namespace memoryPool { struct CudaDeleterBase : public DeleterBase { CudaDeleterBase(cudaStream_t const &stream, Where where) : DeleterBase(getPool(where)), m_stream(stream) { -// if (stream) return; -// std::cout << "0 stream???" << std::endl; -// throw std::bad_alloc(); + // if (stream) return; + // std::cout << "0 stream???" << std::endl; + // throw std::bad_alloc(); } CudaDeleterBase(cudaStream_t const &stream, SimplePoolAllocator *pool) : DeleterBase(pool), m_stream(stream) { -// if (stream) return; -// std::cout << "0 stream???" << std::endl; -// throw std::bad_alloc(); + // if (stream) return; + // std::cout << "0 stream???" << std::endl; + // throw std::bad_alloc(); } ~CudaDeleterBase() override = default; diff --git a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h index 32e86d1e15268..5c0a00a905067 100644 --- a/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h +++ b/HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h @@ -13,13 +13,13 @@ namespace { // free callback - void CUDART_CB freeCallback(cudaStream_t streamId, cudaError_t status, void* p) { - //void CUDART_CB freeCallback(void *p) { + void CUDART_CB freeCallback(cudaStream_t streamId, cudaError_t status, void *p) { + //void CUDART_CB freeCallback(void *p) { if (status != cudaSuccess) { - std::cout << "Error in free callaback in stream " << streamId << std::endl; - auto error = cudaGetErrorName(status); - auto message = cudaGetErrorString(status); - std::cout << " error " << error << ": " << message << std::endl; + std::cout << "Error in free callaback in stream " << streamId << std::endl; + auto error = cudaGetErrorName(status); + auto message = cudaGetErrorString(status); + std::cout << " error " << error << ": " << message << std::endl; } // std::cout << "free callaback for stream " << streamId << std::endl; auto payload = (memoryPool::Payload *)(p); @@ -32,8 +32,8 @@ struct CudaAlloc { static void scheduleFree(memoryPool::Payload *payload, cudaStream_t stream) { // std::cout << "schedule free for stream " << stream < #include -#include +#include class SimplePoolAllocator; namespace memoryPool { @@ -31,10 +31,14 @@ namespace memoryPool { std::shared_ptr getDeleter() const { return me; } void operator()(void* p) { - if (!me) + if (!me) { + std::cout << "deleter w/o implementation!!!" << m_bucket << std::endl; throw std::bad_alloc(); - if(!p) std::cout << "delete null pointer!!! " << m_bucket << std::endl; - if (m_bucket<0) std::cout << "delete with negative bucket!!!" << std::endl; + } + if (!p) + std::cout << "delete null pointer!!! " << m_bucket << std::endl; + if (m_bucket < 0) + std::cout << "delete with negative bucket!!!" << std::endl; // assert(p == pool()->pointer(m_bucket)); (*me)(m_bucket); } diff --git a/HeterogeneousCore/CUDAUtilities/test/testPoolUImt.cu b/HeterogeneousCore/CUDAUtilities/test/testPoolUImt.cu index 0c2dbc2fb91ac..70f8669752861 100644 --- a/HeterogeneousCore/CUDAUtilities/test/testPoolUImt.cu +++ b/HeterogeneousCore/CUDAUtilities/test/testPoolUImt.cu @@ -34,7 +34,7 @@ void CUDART_CB myCallback(void *fun) { (*(F *)(fun))(); } -__global__ void kernel_set(int s, Node ** p, int me) { +__global__ void kernel_set(int s, Node **p, int me) { int first = blockIdx.x * blockDim.x + threadIdx.x; for (int i = first; i < s; i += gridDim.x * blockDim.x) { assert(p[i]); @@ -81,7 +81,6 @@ void go() { #endif - bool stop = false; bool bin24 = false; Thread monitor([&] { @@ -98,18 +97,17 @@ void go() { int s = 40; { - std::cout << "try to allocate " << s << std::endl; - auto stream = streams[0]; - { - auto pd = memoryPool::cuda::make_buffer(s, stream, where); - assert(pd.get()); - memoryPool::cuda::dumpStat(); - pd = memoryPool::cuda::make_buffer(s, stream, where); + std::cout << "try to allocate " << s << std::endl; + auto stream = streams[0]; + { + auto pd = memoryPool::cuda::make_buffer(s, stream, where); + assert(pd.get()); + memoryPool::cuda::dumpStat(); + pd = memoryPool::cuda::make_buffer(s, stream, where); + memoryPool::cuda::dumpStat(); + } + cudaStreamSynchronize(stream); memoryPool::cuda::dumpStat(); - } - cudaStreamSynchronize(stream); - memoryPool::cuda::dumpStat(); - } std::atomic nt = 0; @@ -140,7 +138,7 @@ void go() { iter++; auto &stream = streams[me]; - memoryPool::Deleter devDeleter(std::make_shared(stream,where)); + memoryPool::Deleter devDeleter(std::make_shared(stream, where)); auto n = rgen1(eng); bool large = 0 == (iter % (128 + me)); for (int k = 0; k < n; ++k) { @@ -152,19 +150,20 @@ void go() { } uint64_t s = 1LL << b; assert(s > 0); - auto p0 = memoryPool::cuda::make_buffer(s/sizeof(Node) + sizeof(Node), devDeleter); - if (!p0.get()) { + try { + auto p0 = memoryPool::cuda::make_buffer(s / sizeof(Node) + sizeof(Node), devDeleter); + auto p = p0.get(); + if (nullptr == p) { + std::cout << "error not detected??? " << b << ' ' << std::endl; + memoryPool::cuda::dumpStat(); + } + assert(p); + hp[k] = p; + } catch (...) { std::cout << "\n\n!!!Failed " << me << " at " << iter << std::endl; memoryPool::cuda::dumpStat(); return; } - auto p = p0.get(); - if (nullptr == p) { - std::cout << "error not detected??? " << b << ' ' << std::endl; - memoryPool::cuda::dumpStat(); - } - assert(p); - hp[k] = p; } #ifdef __CUDACC__ assert(n <= 100); diff --git a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu index 3bcda8004fd8e..bb1fdc9367187 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu @@ -12,11 +12,15 @@ void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv, auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize; // Fit internals - memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); - auto tkidGPU = memoryPool::cuda::make_buffer(maxNumberOfConcurrentFits_,deleter); - auto hitsGPU = memoryPool::cuda::make_buffer(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double), deleter); - auto hits_geGPU = memoryPool::cuda::make_buffer(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float), deleter); - auto fast_fit_resultsGPU = memoryPool::cuda::make_buffer(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), deleter); + memoryPool::Deleter deleter = + memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); + auto tkidGPU = memoryPool::cuda::make_buffer(maxNumberOfConcurrentFits_, deleter); + auto hitsGPU = memoryPool::cuda::make_buffer( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double), deleter); + auto hits_geGPU = memoryPool::cuda::make_buffer( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float), deleter); + auto fast_fit_resultsGPU = memoryPool::cuda::make_buffer( + maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), deleter); for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) { // fit triplets @@ -127,8 +131,8 @@ void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv, cudaCheck(cudaGetLastError()); } #ifdef GPU_DEBUG - cudaDeviceSynchronize(); - cudaCheck(cudaGetLastError()); + cudaDeviceSynchronize(); + cudaCheck(cudaGetLastError()); #endif } // loop on concurrent fits } diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc index 30fde94a1b1d6..aa5b446d62a90 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cc @@ -22,15 +22,17 @@ void CAHitNtupletGeneratorKernelsCPU::buildDoublets(HitsOnCPU const &hh, cudaStr #endif // use "nhits" to heuristically dimension the workspace - memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(nullptr, memoryPool::onCPU)); - device_isOuterHitOfCell_ = memoryPool::cuda::make_buffer(std::max(1U, nhits),deleter); + memoryPool::Deleter deleter = + memoryPool::Deleter(std::make_shared(nullptr, memoryPool::onCPU)); + device_isOuterHitOfCell_ = + memoryPool::cuda::make_buffer(std::max(1U, nhits), deleter); assert(device_isOuterHitOfCell_.get()); isOuterHitOfCell_ = GPUCACell::OuterHitOfCell{device_isOuterHitOfCell_.get(), hh.offsetBPIX2()}; auto cellStorageSize = caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors) + caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellTracks); // no need to use the Traits allocations, since we know this is being compiled for the CPU - cellStorage_ = memoryPool::cuda::make_buffer(cellStorageSize,deleter); + cellStorage_ = memoryPool::cuda::make_buffer(cellStorageSize, deleter); device_theCellNeighborsContainer_ = (GPUCACell::CellNeighbors *)cellStorage_.get(); device_theCellTracksContainer_ = (GPUCACell::CellTracks *)(cellStorage_.get() + caConstants::maxNumOfActiveDoublets * sizeof(GPUCACell::CellNeighbors)); @@ -42,7 +44,7 @@ void CAHitNtupletGeneratorKernelsCPU::buildDoublets(HitsOnCPU const &hh, cudaStr device_theCellTracks_.get(), device_theCellTracksContainer_); - device_theCells_ = memoryPool::cuda::make_buffer(params_.maxNumberOfDoublets_,deleter); + device_theCells_ = memoryPool::cuda::make_buffer(params_.maxNumberOfDoublets_, deleter); if (0 == nhits) return; // protect against empty events diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu index 38719acfee87d..f7aeb829d18fa 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.cu @@ -145,8 +145,10 @@ void CAHitNtupletGeneratorKernelsGPU::buildDoublets(HitsOnCPU const &hh, cudaStr #endif // in principle we can use "nhits" to heuristically dimension the workspace... - memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); - device_isOuterHitOfCell_ = memoryPool::cuda::make_buffer(std::max(1, nhits),deleter); + memoryPool::Deleter deleter = + memoryPool::Deleter(std::make_shared(stream, memoryPool::onDevice)); + device_isOuterHitOfCell_ = + memoryPool::cuda::make_buffer(std::max(1, nhits), deleter); assert(device_isOuterHitOfCell_.get()); isOuterHitOfCell_ = GPUCACell::OuterHitOfCell{device_isOuterHitOfCell_.get(), hh.offsetBPIX2()}; diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h index f5f3a0803484a..aebfae19acdee 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernels.h @@ -8,7 +8,6 @@ #include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h" - // #define DUMP_GPU_TK_TUPLES namespace cAHitNtupletGenerator { @@ -165,7 +164,6 @@ class CAHitNtupletGeneratorKernels { using Params = cAHitNtupletGenerator::Params; using Counters = cAHitNtupletGenerator::Counters; - template using buffer = memoryPool::buffer; diff --git a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc index ce40f0feba94f..9edd3815375c5 100644 --- a/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc +++ b/RecoPixelVertexing/PixelTriplets/plugins/CAHitNtupletGeneratorKernelsAlloc.cc @@ -12,26 +12,27 @@ void CAHitNtupletGeneratorKernelsCPU::allocateOnGPU(int32_t nHits, cudaStream_t // ALLOCATIONS FOR THE INTERMEDIATE RESULTS (STAYS ON WORKER) ////////////////////////////////////////////////////////// - memoryPool::Where where = std::is_same::value ? memoryPool::onDevice : memoryPool::onCPU; + memoryPool::Where where = + std::is_same::value ? memoryPool::onDevice : memoryPool::onCPU; memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared(stream, where)); - device_theCellNeighbors_ = memoryPool::cuda::make_buffer(1,deleter); - device_theCellTracks_ = memoryPool::cuda::make_buffer(1,deleter); + device_theCellNeighbors_ = memoryPool::cuda::make_buffer(1, deleter); + device_theCellTracks_ = memoryPool::cuda::make_buffer(1, deleter); #ifdef GPU_DEBUG - std::cout << "Allocation for tuple building. N hits " << nHits - << ((where==memoryPool::onDevice) ? " on GPU" : " on CPU")< 0); - device_hitToTuple_ = memoryPool::cuda::make_buffer(1,deleter); + device_hitToTuple_ = memoryPool::cuda::make_buffer(1, deleter); device_hitToTupleStorage_ = memoryPool::cuda::make_buffer(nHits, deleter); hitToTupleView_.assoc = device_hitToTuple_.get(); hitToTupleView_.offStorage = device_hitToTupleStorage_.get(); hitToTupleView_.offSize = nHits; - device_tupleMultiplicity_ = memoryPool::cuda::make_buffer(1,deleter); + device_tupleMultiplicity_ = memoryPool::cuda::make_buffer(1, deleter); device_storage_ = memoryPool::cuda::make_buffer(3, deleter); diff --git a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc index 116c51f0c2f3e..3b0c42a78fe65 100644 --- a/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc +++ b/RecoVertex/BeamSpotProducer/plugins/BeamSpotToCUDA.cc @@ -31,7 +31,7 @@ namespace { BeamSpotPOD* data() { return data_h_.get(); } BeamSpotPOD const* data() const { return data_h_.get(); } - auto & ptr() { return data_h_; } + auto& ptr() { return data_h_; } cms::cuda::host::noncached::unique_ptr const& ptr() const { return data_h_; } private: