Skip to content

Commit

Permalink
Fix the initialisation ofthe modules to unpack in SiPixelRawToCluster…
Browse files Browse the repository at this point in the history
…Heterogeneous (cms-sw#208)

As an optimisation, move the default non-regional case to the EventSetup, and allocate,  fill and transfer event-by-event only for the regional case.
  • Loading branch information
makortel authored and fwyzard committed Dec 7, 2018
1 parent 4684349 commit b4843b4
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,20 @@ class SiPixelFedCablingMapGPUWrapper {
// returns pointer to GPU memory
const SiPixelFedCablingMapGPU *getGPUProductAsync(cuda::stream_t<>& cudaStream) const;

// returns pointer to GPU memory
const unsigned char *getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const;
edm::cuda::device::unique_ptr<unsigned char[]> getModToUnpRegionalAsync(std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) const;

// Allocates host and device memory, converts data to host memory,
// copies host memory to device memory asynchronously. It is the
// caller's responsibility to have this object to live until all
// operations on the device memory have completed.
class ModulesToUnpack {
public:
ModulesToUnpack(cuda::stream_t<>& cudaStream);
~ModulesToUnpack() = default;

void fillAsync(SiPixelFedCablingMap const& cablingMap, std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream);

const unsigned char *get() const { return modToUnpDevice.get(); }

private:
edm::cuda::device::unique_ptr<unsigned char[]> modToUnpDevice;
edm::cuda::host::unique_ptr<unsigned char[]> modToUnpHost;
};

private:
const SiPixelFedCablingMap *cablingMap_;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> fedMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> linkMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocMap;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> RawId;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> rocInDet;
std::vector<unsigned int, CUDAHostAllocator<unsigned int>> moduleId;
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> badRocs;
std::vector<unsigned char, CUDAHostAllocator<unsigned char>> modToUnpDefault;
unsigned int size;
bool hasQuality_;

Expand All @@ -64,6 +51,12 @@ class SiPixelFedCablingMapGPUWrapper {
SiPixelFedCablingMapGPU *cablingMapDevice = nullptr; // same internal pointers as above, struct itself is on GPU
};
CUDAESProduct<GPUData> gpuData_;

struct ModulesToUnpack {
~ModulesToUnpack();
unsigned char *modToUnpDefault = nullptr; // pointer to GPU
};
CUDAESProduct<ModulesToUnpack> modToUnp_;
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ std::unique_ptr<PixelUnpackingRegions> regions_;

edm::ESWatcher<SiPixelFedCablingMapRcd> recordWatcher;
edm::ESWatcher<SiPixelQualityRcd> qualityWatcher;
bool recordWatcherUpdatedSinceLastTransfer_ = false;

bool usePilotBlade;
bool usePhase1;
Expand Down Expand Up @@ -276,7 +275,6 @@ const FEDRawDataCollection *SiPixelRawToClusterHeterogeneous::initialize(const e
fedIds = cablingMap->fedIds();
cabling_ = cablingMap->cablingTree();
LogDebug("map version:")<< cabling_->version();
recordWatcherUpdatedSinceLastTransfer_ = true;
}
// initialize quality record or update if necessary
if (qualityWatcher.check( es )&&useQuality) {
Expand Down Expand Up @@ -462,17 +460,6 @@ void SiPixelRawToClusterHeterogeneous::produceCPU(edm::HeterogeneousEvent& ev, c
void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEvent& ev, const edm::EventSetup& es, cuda::stream_t<>& cudaStream) {
const auto buffers = initialize(ev.event(), es);

auto gpuModulesToUnpack = SiPixelFedCablingMapGPUWrapper::ModulesToUnpack(cudaStream);
if (regions_) {
std::set<unsigned int> modules = *(regions_->modulesToUnpack());
gpuModulesToUnpack.fillAsync(*cablingMap_, modules, cudaStream);
}
else if(recordWatcherUpdatedSinceLastTransfer_) {
// If regions_ are disabled, it is enough to fill and transfer only if cablingMap has changed
gpuModulesToUnpack.fillAsync(*cablingMap_, std::set<unsigned int>(), cudaStream);
recordWatcherUpdatedSinceLastTransfer_ = false;
}

edm::ESHandle<SiPixelFedCablingMapGPUWrapper> hgpuMap;
es.get<CkfComponentsRecord>().get(hgpuMap);
if(hgpuMap->hasQuality() != useQuality) {
Expand All @@ -481,6 +468,17 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv
// get the GPU product already here so that the async transfer can begin
const auto *gpuMap = hgpuMap->getGPUProductAsync(cudaStream);

edm::cuda::device::unique_ptr<unsigned char[]> modulesToUnpackRegional;
const unsigned char *gpuModulesToUnpack;
if (regions_) {
modulesToUnpackRegional = hgpuMap->getModToUnpRegionalAsync(*(regions_->modulesToUnpack()), cudaStream);
gpuModulesToUnpack = modulesToUnpackRegional.get();
}
else {
gpuModulesToUnpack = hgpuMap->getModToUnpAllAsync(cudaStream);
}


edm::ESHandle<SiPixelGainCalibrationForHLTGPU> hgains;
es.get<SiPixelGainCalibrationForHLTGPURcd>().get(hgains);

Expand Down Expand Up @@ -548,7 +546,7 @@ void SiPixelRawToClusterHeterogeneous::acquireGPUCuda(const edm::HeterogeneousEv

} // end of for loop

gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack.get(), hgains->getGPUProductAsync(cudaStream),
gpuAlgo_.makeClustersAsync(gpuMap, gpuModulesToUnpack, hgains->getGPUProductAsync(cudaStream),
wordFedAppender,
wordCounterGPU, fedCounter, convertADCtoElectrons,
useQuality, includeErrors, enableTransfer_, debug, cudaStream);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCablingMap const& cablingMap,
TrackerGeometry const& trackerGeom,
SiPixelQuality const *badPixelInfo):
cablingMap_(&cablingMap),
fedMap(pixelgpudetails::MAX_SIZE), linkMap(pixelgpudetails::MAX_SIZE), rocMap(pixelgpudetails::MAX_SIZE),
RawId(pixelgpudetails::MAX_SIZE), rocInDet(pixelgpudetails::MAX_SIZE), moduleId(pixelgpudetails::MAX_SIZE),
badRocs(pixelgpudetails::MAX_SIZE),
badRocs(pixelgpudetails::MAX_SIZE), modToUnpDefault(pixelgpudetails::MAX_SIZE),
hasQuality_(badPixelInfo != nullptr)
{
std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
Expand All @@ -46,6 +47,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
if (pixelRoc != nullptr) {
RawId[index] = pixelRoc->rawId();
rocInDet[index] = pixelRoc->idInDetUnit();
modToUnpDefault[index] = false;
if (badPixelInfo != nullptr)
badRocs[index] = badPixelInfo->IsRocBad(pixelRoc->rawId(), pixelRoc->idInDetUnit());
else
Expand All @@ -54,6 +56,7 @@ SiPixelFedCablingMapGPUWrapper::SiPixelFedCablingMapGPUWrapper(SiPixelFedCabling
RawId[index] = 9999;
rocInDet[index] = 9999;
badRocs[index] = true;
modToUnpDefault[index] = true;
}
index++;
}
Expand Down Expand Up @@ -124,16 +127,21 @@ const SiPixelFedCablingMapGPU *SiPixelFedCablingMapGPUWrapper::getGPUProductAsyn
return data.cablingMapDevice;
}

SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::ModulesToUnpack(cuda::stream_t<>& cudaStream)
{
edm::Service<CUDAService> cs;
modToUnpDevice = cs->make_device_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
modToUnpHost = cs->make_host_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
const unsigned char *SiPixelFedCablingMapGPUWrapper::getModToUnpAllAsync(cuda::stream_t<>& cudaStream) const {
const auto& data = modToUnp_.dataForCurrentDeviceAsync(cudaStream, [this](ModulesToUnpack& data, cuda::stream_t<>& stream) {
cudaCheck(cudaMalloc((void**) & data.modToUnpDefault, pixelgpudetails::MAX_SIZE_BYTE_BOOL));
cudaCheck(cudaMemcpyAsync(data.modToUnpDefault, this->modToUnpDefault.data(), this->modToUnpDefault.size() * sizeof(unsigned char), cudaMemcpyDefault, stream.id()));
});
return data.modToUnpDefault;
}

void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablingMap const& cablingMap, std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) {
std::vector<unsigned int> const& fedIds = cablingMap.fedIds();
std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap.cablingTree();
edm::cuda::device::unique_ptr<unsigned char[]> SiPixelFedCablingMapGPUWrapper::getModToUnpRegionalAsync(std::set<unsigned int> const& modules, cuda::stream_t<>& cudaStream) const {
edm::Service<CUDAService> cs;
auto modToUnpDevice = cs->make_device_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);
auto modToUnpHost = cs->make_host_unique<unsigned char[]>(pixelgpudetails::MAX_SIZE, cudaStream);

std::vector<unsigned int> const& fedIds = cablingMap_->fedIds();
std::unique_ptr<SiPixelFedCablingTree> const& cabling = cablingMap_->cablingTree();

unsigned int startFed = *(fedIds.begin());
unsigned int endFed = *(fedIds.end() - 1);
Expand All @@ -157,6 +165,7 @@ void SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::fillAsync(SiPixelFedCablin
}

cuda::memory::async::copy(modToUnpDevice.get(), modToUnpHost.get(), pixelgpudetails::MAX_SIZE * sizeof(unsigned char), cudaStream.id());
return modToUnpDevice;
}


Expand All @@ -173,3 +182,7 @@ SiPixelFedCablingMapGPUWrapper::GPUData::~GPUData() {
}
cudaCheck(cudaFree(cablingMapDevice));
}

SiPixelFedCablingMapGPUWrapper::ModulesToUnpack::~ModulesToUnpack() {
cudaCheck(cudaFree(modToUnpDefault));
}

0 comments on commit b4843b4

Please sign in to comment.