Skip to content

Commit

Permalink
code format
Browse files Browse the repository at this point in the history
  • Loading branch information
VinInn committed May 8, 2022
1 parent e7d8632 commit b4f4d46
Show file tree
Hide file tree
Showing 19 changed files with 111 additions and 109 deletions.
7 changes: 4 additions & 3 deletions CUDADataFormats/BeamSpot/interface/BeamSpotCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,15 @@

class BeamSpotCUDA {
public:

using buffer = memoryPool::buffer<BeamSpotPOD>;

// default constructor, required by cms::cuda::Product<BeamSpotCUDA>
BeamSpotCUDA() = default;

// constructor that allocates cached device memory on the given CUDA stream
BeamSpotCUDA(cudaStream_t stream) { data_d_ = memoryPool::cuda::make_buffer<BeamSpotPOD>(1,stream, memoryPool::onDevice); }
BeamSpotCUDA(cudaStream_t stream) {
data_d_ = memoryPool::cuda::make_buffer<BeamSpotPOD>(1, stream, memoryPool::onDevice);
}

// movable, non-copiable
BeamSpotCUDA(BeamSpotCUDA const&) = delete;
Expand All @@ -26,7 +27,7 @@ class BeamSpotCUDA {
BeamSpotPOD* data() { return data_d_.get(); }
BeamSpotPOD const* data() const { return data_d_.get(); }

buffer & ptr() { return data_d_; }
buffer& ptr() { return data_d_; }
buffer const& ptr() const { return data_d_; }

private:
Expand Down
3 changes: 0 additions & 3 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigiErrorsCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
#include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/SimpleVector.h"


class SiPixelDigiErrorsCUDA {
public:
using SiPixelErrorCompactVector = cms::cuda::SimpleVector<SiPixelErrorCompact>;
Expand Down Expand Up @@ -41,6 +40,4 @@ class SiPixelDigiErrorsCUDA {
int nErrorWords_ = 0;
};



#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigiErrorsCUDA_h
1 change: 0 additions & 1 deletion CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ class SiPixelDigisCUDA {
uint32_t nDigis_h = 0;
};


// #include "SiPixelDigisCUDAImpl.h"

#endif // CUDADataFormats_SiPixelDigi_interface_SiPixelDigisCUDA_h
10 changes: 5 additions & 5 deletions CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,21 @@
#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"


SiPixelDigisCUDA::SiPixelDigisCUDA(size_t maxFedWords, cudaStream_t stream)
: m_store(memoryPool::cuda::make_buffer<SiPixelDigisCUDA::StoreType>(
SiPixelDigisCUDASOAView::roundFor128ByteAlignment(maxFedWords) *
static_cast<int>(SiPixelDigisCUDASOAView::StorageLocation::kMAX),
stream,memoryPool::onDevice)),
stream,
memoryPool::onDevice)),
m_view(m_store, maxFedWords, SiPixelDigisCUDASOAView::StorageLocation::kMAX) {
assert(maxFedWords != 0);
}

memoryPool::buffer<SiPixelDigisCUDA::StoreType> SiPixelDigisCUDA::copyAllToHostAsync(
cudaStream_t stream) const {
memoryPool::buffer<SiPixelDigisCUDA::StoreType> SiPixelDigisCUDA::copyAllToHostAsync(cudaStream_t stream) const {
auto ret = memoryPool::cuda::make_buffer<StoreType>(
m_view.roundFor128ByteAlignment(nDigis()) * static_cast<int>(SiPixelDigisCUDASOAView::StorageLocationHost::kMAX),
stream,memoryPool::onHost);
stream,
memoryPool::onHost);
cudaCheck(cudaMemcpyAsync(ret.get(),
m_view.clus(),
m_view.roundFor128ByteAlignment(nDigis()) * sizeof(SiPixelDigisCUDA::StoreType) *
Expand Down
21 changes: 9 additions & 12 deletions CUDADataFormats/SiPixelDigi/src/SiPixelDigiErrorsCUDA.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,31 +3,29 @@
#include "HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h"
#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h"


SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream) :
formatterErrors_h(std::move(errors)),
nErrorWords_(maxFedWords) {
SiPixelDigiErrorsCUDA::SiPixelDigiErrorsCUDA(size_t maxFedWords, SiPixelFormatterErrors errors, cudaStream_t stream)
: formatterErrors_h(std::move(errors)), nErrorWords_(maxFedWords) {
assert(maxFedWords != 0);

memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, memoryPool::onDevice));
memoryPool::Deleter deleter =
memoryPool::Deleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, memoryPool::onDevice));
assert(deleter.pool());

data_d = memoryPool::cuda::make_buffer<SiPixelErrorCompact>(maxFedWords, deleter);
error_d = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1,deleter);
error_h = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1,stream,memoryPool::onHost);

data_d = memoryPool::cuda::make_buffer<SiPixelErrorCompact>(maxFedWords, deleter);
error_d = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1, deleter);
error_h = memoryPool::cuda::make_buffer<SiPixelErrorCompactVector>(1, stream, memoryPool::onHost);

cudaMemsetAsync(data_d.get(), 0x00, maxFedWords, stream);

cms::cuda::make_SimpleVector(error_h.get(), maxFedWords, data_d.get());
assert(error_h->empty());
assert(error_h->capacity() == static_cast<int>(maxFedWords));

cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1,stream));
cudaCheck(memoryPool::cuda::copy(error_d, error_h, 1, stream));
}

void SiPixelDigiErrorsCUDA::copyErrorToHostAsync(cudaStream_t stream) {
cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1,stream));
cudaCheck(memoryPool::cuda::copy(error_h, error_d, 1, stream));
}

SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync(cudaStream_t stream) const {
Expand All @@ -44,4 +42,3 @@ SiPixelDigiErrorsCUDA::HostDataError SiPixelDigiErrorsCUDA::dataErrorToHostAsync
err.set_data(data.get());
return HostDataError(err, std::move(data));
}

1 change: 0 additions & 1 deletion CUDADataFormats/SiPixelDigi/src/SiPixelDigisCUDA.cc
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDA.h"
#include "CUDADataFormats/SiPixelDigi/interface/SiPixelDigisCUDAImpl.h"

Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DSOAView.h"
#include "RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h"


#include "HeterogeneousCore/CUDAUtilities/interface/memoryPool.h"

class TrackingRecHit2DHeterogeneous {
Expand Down Expand Up @@ -38,21 +37,21 @@ class TrackingRecHit2DHeterogeneous {
TrackingRecHit2DHeterogeneous() = default;

/*inline*/ TrackingRecHit2DHeterogeneous(uint32_t nHits,
bool isPhase2,
int32_t offsetBPIX2,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t const* hitsModuleStart,
memoryPool::Where where,
cudaStream_t stream,
TrackingRecHit2DHeterogeneous const* input = nullptr);
bool isPhase2,
int32_t offsetBPIX2,
pixelCPEforGPU::ParamsOnGPU const* cpeParams,
uint32_t const* hitsModuleStart,
memoryPool::Where where,
cudaStream_t stream,
TrackingRecHit2DHeterogeneous const* input = nullptr);

// used on CPU only
/*inline*/ TrackingRecHit2DHeterogeneous(float* store32,
uint16_t* store16,
uint32_t* modules,
int nHits,
memoryPool::Where where = memoryPool::onCPU,
cudaStream_t stream = nullptr);
uint16_t* store16,
uint32_t* modules,
int nHits,
memoryPool::Where where = memoryPool::onCPU,
cudaStream_t stream = nullptr);
~TrackingRecHit2DHeterogeneous() = default;

TrackingRecHit2DHeterogeneous(const TrackingRecHit2DHeterogeneous&) = delete;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
#include "CUDADataFormats/TrackingRecHit/interface/TrackingRecHit2DHeterogeneousImpl.h"

4 changes: 2 additions & 2 deletions HeterogeneousCore/CUDACore/src/ScopedContext.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ namespace {
try {
auto error = cudaGetErrorName(status);
auto message = cudaGetErrorString(status);
std::cout << "Callback of CUDA stream " << streamId << " in device " << device
<< " error " << error << ": " << message << std::endl;
std::cout << "Callback of CUDA stream " << streamId << " in device " << device << " error " << error << ": "
<< message << std::endl;
throw cms::Exception("CUDAError") << "Callback of CUDA stream " << streamId << " in device " << device
<< " error " << error << ": " << message;
} catch (cms::Exception&) {
Expand Down
12 changes: 6 additions & 6 deletions HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ namespace memoryPool {

struct CudaDeleterBase : public DeleterBase {
CudaDeleterBase(cudaStream_t const &stream, Where where) : DeleterBase(getPool(where)), m_stream(stream) {
// if (stream) return;
// std::cout << "0 stream???" << std::endl;
// throw std::bad_alloc();
// if (stream) return;
// std::cout << "0 stream???" << std::endl;
// throw std::bad_alloc();
}

CudaDeleterBase(cudaStream_t const &stream, SimplePoolAllocator *pool) : DeleterBase(pool), m_stream(stream) {
// if (stream) return;
// std::cout << "0 stream???" << std::endl;
// throw std::bad_alloc();
// if (stream) return;
// std::cout << "0 stream???" << std::endl;
// throw std::bad_alloc();
}

~CudaDeleterBase() override = default;
Expand Down
25 changes: 13 additions & 12 deletions HeterogeneousCore/CUDAUtilities/interface/cudaMemoryPoolImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
namespace {

// free callback
void CUDART_CB freeCallback(cudaStream_t streamId, cudaError_t status, void* p) {
//void CUDART_CB freeCallback(void *p) {
void CUDART_CB freeCallback(cudaStream_t streamId, cudaError_t status, void *p) {
//void CUDART_CB freeCallback(void *p) {
if (status != cudaSuccess) {
std::cout << "Error in free callaback in stream " << streamId << std::endl;
auto error = cudaGetErrorName(status);
auto message = cudaGetErrorString(status);
std::cout << " error " << error << ": " << message << std::endl;
std::cout << "Error in free callaback in stream " << streamId << std::endl;
auto error = cudaGetErrorName(status);
auto message = cudaGetErrorString(status);
std::cout << " error " << error << ": " << message << std::endl;
}
// std::cout << "free callaback for stream " << streamId << std::endl;
auto payload = (memoryPool::Payload *)(p);
Expand All @@ -32,8 +32,8 @@ struct CudaAlloc {
static void scheduleFree(memoryPool::Payload *payload, cudaStream_t stream) {
// std::cout << "schedule free for stream " << stream <<std::endl;
if (stream)
cudaCheck(cudaStreamAddCallback(stream, freeCallback, payload,0));
// cudaCheck(cudaLaunchHostFunc(stream, freeCallback, payload));
cudaCheck(cudaStreamAddCallback(stream, freeCallback, payload, 0));
// cudaCheck(cudaLaunchHostFunc(stream, freeCallback, payload));
else
memoryPool::scheduleFree(payload);
}
Expand All @@ -48,10 +48,11 @@ struct CudaDeviceAlloc : public CudaAlloc {
// std::cout << "alloc " << size << ((err == cudaSuccess) ? " ok" : " err") << std::endl;
return err == cudaSuccess ? p : nullptr;
}
static void free(Pointer ptr) {
auto err = cudaFree(ptr);
// std::cout << "free" << ((err == cudaSuccess) ? " ok" : " err") <<std::endl;
if (err != cudaSuccess) std::cout << " error in cudaFree??" << std::endl;
static void free(Pointer ptr) {
auto err = cudaFree(ptr);
// std::cout << "free" << ((err == cudaSuccess) ? " ok" : " err") <<std::endl;
if (err != cudaSuccess)
std::cout << " error in cudaFree??" << std::endl;
}
};

Expand Down
12 changes: 8 additions & 4 deletions HeterogeneousCore/CUDAUtilities/interface/memoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#include <memory>
#include <new>

#include<iostream>
#include <iostream>
class SimplePoolAllocator;

namespace memoryPool {
Expand Down Expand Up @@ -31,10 +31,14 @@ namespace memoryPool {
std::shared_ptr<DeleterBase> getDeleter() const { return me; }

void operator()(void* p) {
if (!me)
if (!me) {
std::cout << "deleter w/o implementation!!!" << m_bucket << std::endl;
throw std::bad_alloc();
if(!p) std::cout << "delete null pointer!!! " << m_bucket << std::endl;
if (m_bucket<0) std::cout << "delete with negative bucket!!!" << std::endl;
}
if (!p)
std::cout << "delete null pointer!!! " << m_bucket << std::endl;
if (m_bucket < 0)
std::cout << "delete with negative bucket!!!" << std::endl;
// assert(p == pool()->pointer(m_bucket));
(*me)(m_bucket);
}
Expand Down
45 changes: 22 additions & 23 deletions HeterogeneousCore/CUDAUtilities/test/testPoolUImt.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void CUDART_CB myCallback(void *fun) {
(*(F *)(fun))();
}

__global__ void kernel_set(int s, Node ** p, int me) {
__global__ void kernel_set(int s, Node **p, int me) {
int first = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = first; i < s; i += gridDim.x * blockDim.x) {
assert(p[i]);
Expand Down Expand Up @@ -81,7 +81,6 @@ void go() {

#endif


bool stop = false;
bool bin24 = false;
Thread monitor([&] {
Expand All @@ -98,18 +97,17 @@ void go() {

int s = 40;
{
std::cout << "try to allocate " << s << std::endl;
auto stream = streams[0];
{
auto pd = memoryPool::cuda::make_buffer<int>(s, stream, where);
assert(pd.get());
memoryPool::cuda::dumpStat();
pd = memoryPool::cuda::make_buffer<int>(s, stream, where);
std::cout << "try to allocate " << s << std::endl;
auto stream = streams[0];
{
auto pd = memoryPool::cuda::make_buffer<int>(s, stream, where);
assert(pd.get());
memoryPool::cuda::dumpStat();
pd = memoryPool::cuda::make_buffer<int>(s, stream, where);
memoryPool::cuda::dumpStat();
}
cudaStreamSynchronize(stream);
memoryPool::cuda::dumpStat();
}
cudaStreamSynchronize(stream);
memoryPool::cuda::dumpStat();

}
std::atomic<int> nt = 0;

Expand Down Expand Up @@ -140,7 +138,7 @@ void go() {
iter++;
auto &stream = streams[me];

memoryPool::Deleter devDeleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream,where));
memoryPool::Deleter devDeleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, where));
auto n = rgen1(eng);
bool large = 0 == (iter % (128 + me));
for (int k = 0; k < n; ++k) {
Expand All @@ -152,19 +150,20 @@ void go() {
}
uint64_t s = 1LL << b;
assert(s > 0);
auto p0 = memoryPool::cuda::make_buffer<Node>(s/sizeof(Node) + sizeof(Node), devDeleter);
if (!p0.get()) {
try {
auto p0 = memoryPool::cuda::make_buffer<Node>(s / sizeof(Node) + sizeof(Node), devDeleter);
auto p = p0.get();
if (nullptr == p) {
std::cout << "error not detected??? " << b << ' ' << std::endl;
memoryPool::cuda::dumpStat();
}
assert(p);
hp[k] = p;
} catch (...) {
std::cout << "\n\n!!!Failed " << me << " at " << iter << std::endl;
memoryPool::cuda::dumpStat();
return;
}
auto p = p0.get();
if (nullptr == p) {
std::cout << "error not detected??? " << b << ' ' << std::endl;
memoryPool::cuda::dumpStat();
}
assert(p);
hp[k] = p;
}
#ifdef __CUDACC__
assert(n <= 100);
Expand Down
18 changes: 11 additions & 7 deletions RecoPixelVertexing/PixelTriplets/plugins/BrokenLineFitOnGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@ void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv,
auto numberOfBlocks = (maxNumberOfConcurrentFits_ + blockSize - 1) / blockSize;

// Fit internals
memoryPool::Deleter deleter = memoryPool::Deleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, memoryPool::onDevice));
auto tkidGPU = memoryPool::cuda::make_buffer<caConstants::tindex_type>(maxNumberOfConcurrentFits_,deleter);
auto hitsGPU = memoryPool::cuda::make_buffer<double>(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double), deleter);
auto hits_geGPU = memoryPool::cuda::make_buffer<float>(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float), deleter);
auto fast_fit_resultsGPU = memoryPool::cuda::make_buffer<double>(maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), deleter);
memoryPool::Deleter deleter =
memoryPool::Deleter(std::make_shared<memoryPool::cuda::BundleDelete>(stream, memoryPool::onDevice));
auto tkidGPU = memoryPool::cuda::make_buffer<caConstants::tindex_type>(maxNumberOfConcurrentFits_, deleter);
auto hitsGPU = memoryPool::cuda::make_buffer<double>(
maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix3xNd<6>) / sizeof(double), deleter);
auto hits_geGPU = memoryPool::cuda::make_buffer<float>(
maxNumberOfConcurrentFits_ * sizeof(riemannFit::Matrix6xNf<6>) / sizeof(float), deleter);
auto fast_fit_resultsGPU = memoryPool::cuda::make_buffer<double>(
maxNumberOfConcurrentFits_ * sizeof(riemannFit::Vector4d) / sizeof(double), deleter);

for (uint32_t offset = 0; offset < maxNumberOfTuples; offset += maxNumberOfConcurrentFits_) {
// fit triplets
Expand Down Expand Up @@ -127,8 +131,8 @@ void HelixFitOnGPU::launchBrokenLineKernels(HitsView const *hv,
cudaCheck(cudaGetLastError());
}
#ifdef GPU_DEBUG
cudaDeviceSynchronize();
cudaCheck(cudaGetLastError());
cudaDeviceSynchronize();
cudaCheck(cudaGetLastError());
#endif
} // loop on concurrent fits
}
Loading

0 comments on commit b4f4d46

Please sign in to comment.