Skip to content

Commit

Permalink
ITS-Tracking: introduce multi-ROF seeding vertexer (AliceO2Group#13323)
Browse files Browse the repository at this point in the history
* Add multi rof vertexer idea

* More vertices is better than less

* Improve tracklet validation and manage late vertices

* Fix GPU compilation

* Fix leak and bugs

* Add vertices in both rofs

* Fix rebasing

* Fix non-deltaRof behaviour

* Fix multiple iterations

* Refactor tracklets and fix second iteration with multirof
  • Loading branch information
mconcas authored Aug 21, 2024
1 parent 56ac000 commit ba9e426
Show file tree
Hide file tree
Showing 22 changed files with 524 additions and 343 deletions.
2 changes: 1 addition & 1 deletion DataFormats/Detectors/Common/src/CTFHeader.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ using DetID = o2::detectors::DetID;
/// describe itsel as a string
std::string CTFHeader::describe() const
{
return fmt::format("Run:{:07d} TF:{} Orbit:{:08d} CteationTime:{} Detectors: {}", run, tfCounter, firstTForbit, creationTime, DetID::getNames(detectors));
return fmt::format("Run:{:07d} TF:{} Orbit:{:08d} CreationTime:{} Detectors: {}", run, tfCounter, firstTForbit, creationTime, DetID::getNames(detectors));
}

void CTFHeader::print() const
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ class Vertex : public VertexBase
GPUd() void setChi2(float v) { mChi2 = v; }
GPUd() float getChi2() const { return mChi2; }

GPUd() const Stamp& getTimeStamp() const { return mTimeStamp; }
GPUd() Stamp& getTimeStamp() { return mTimeStamp; }
GPUhd() const Stamp& getTimeStamp() const { return mTimeStamp; }
GPUhd() Stamp& getTimeStamp() { return mTimeStamp; }
GPUd() void setTimeStamp(const Stamp& v) { mTimeStamp = v; }

protected:
Expand Down
2 changes: 1 addition & 1 deletion DataFormats/common/include/CommonDataFormat/TimeStamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class TimeStamp
GPUhdDefault() TimeStamp() CON_DEFAULT;
GPUhdDefault() ~TimeStamp() CON_DEFAULT;
GPUdi() TimeStamp(T time) { mTimeStamp = time; }
GPUdi() T getTimeStamp() const { return mTimeStamp; }
GPUhdi() T getTimeStamp() const { return mTimeStamp; }
GPUdi() void setTimeStamp(T t) { mTimeStamp = t; }
GPUdi() bool operator==(const TimeStamp<T>& t) const { return mTimeStamp == t.mTimeStamp; }

Expand Down
10 changes: 5 additions & 5 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,15 +210,15 @@ class TimeFrameGPU : public TimeFrame
/// interface
int getNClustersInRofSpan(const int, const int, const int) const;
IndexTableUtils* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
int* getDeviceROframesClusters(const int layer) { return mROframesClustersDevice[layer]; }
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
std::vector<std::vector<Vertex>>& getVerticesInChunks() { return mVerticesInChunks; }
std::vector<std::vector<int>>& getNVerticesInChunks() { return mNVerticesInChunks; }
std::vector<o2::its::TrackITSExt>& getTrackITSExt() { return mTrackITSExt; }
std::vector<std::vector<o2::MCCompLabel>>& getLabelsInChunks() { return mLabelsInChunks; }
int getNAllocatedROFs() const { return mNrof; } // Allocated means maximum nROF for each chunk while populated is the number of loaded ones.
StaticTrackingParameters<nLayers>* getDeviceTrackingParameters() { return mTrackingParamsDevice; }
Vertex* getDeviceVertices() { return mVerticesDevice; }
int* getDeviceROframesPV() { return mROframesPVDevice; }
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
unsigned char* getDeviceUsedClusters(const int);
const o2::base::Propagator* getChainPropagator();

Expand Down Expand Up @@ -251,10 +251,10 @@ class TimeFrameGPU : public TimeFrame
// Device pointers
StaticTrackingParameters<nLayers>* mTrackingParamsDevice;
IndexTableUtils* mIndexTableUtilsDevice;
std::array<int*, nLayers> mROframesClustersDevice;
std::array<int*, nLayers> mROFramesClustersDevice;
std::array<unsigned char*, nLayers> mUsedClustersDevice;
Vertex* mVerticesDevice;
int* mROframesPVDevice;
int* mROFramesPVDevice;

// Hybrid pref
std::array<Cluster*, nLayers> mClustersDevice;
Expand Down Expand Up @@ -314,7 +314,7 @@ size_t TimeFrameGPU<nLayers>::loadChunkData(const size_t chunk, const size_t off
template <int nLayers>
inline int TimeFrameGPU<nLayers>::getNClustersInRofSpan(const int rofIdstart, const int rofSpanSize, const int layerId) const
{
return static_cast<int>(mROframesClusters[layerId][(rofIdstart + rofSpanSize) < mROframesClusters.size() ? rofIdstart + rofSpanSize : mROframesClusters.size() - 1] - mROframesClusters[layerId][rofIdstart]);
return static_cast<int>(mROFramesClusters[layerId][(rofIdstart + rofSpanSize) < mROFramesClusters.size() ? rofIdstart + rofSpanSize : mROFramesClusters.size() - 1] - mROFramesClusters[layerId][rofIdstart]);
}
} // namespace gpu
} // namespace its
Expand Down
8 changes: 4 additions & 4 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -443,21 +443,21 @@ void TimeFrameGPU<nLayers>::initDevice(const int chunks,
mMemChunks[iChunk].allocate(GpuTimeFrameChunk<nLayers>::computeRofPerChunk(mGpuParams, mAvailMemGB), mGpuStreams[iChunk]);
}
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROframesClustersDevice[iLayer]), mROframesClusters[iLayer].size() * sizeof(int)));
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROFramesClustersDevice[iLayer]), mROFramesClusters[iLayer].size() * sizeof(int)));
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&(mUsedClustersDevice[iLayer])), sizeof(unsigned char) * mGpuParams.clustersPerROfCapacity * mNrof));
}
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mVerticesDevice), sizeof(Vertex) * mGpuParams.maxVerticesCapacity));
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROframesPVDevice), sizeof(int) * (mNrof + 1)));
checkGPUError(cudaMalloc(reinterpret_cast<void**>(&mROFramesPVDevice), sizeof(int) * (mNrof + 1)));

mFirstInit = false;
}
if (maxLayers < nLayers) { // Vertexer
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
checkGPUError(cudaMemcpy(mROframesClustersDevice[iLayer], mROframesClusters[iLayer].data(), mROframesClusters[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
checkGPUError(cudaMemcpy(mROFramesClustersDevice[iLayer], mROFramesClusters[iLayer].data(), mROFramesClusters[iLayer].size() * sizeof(int), cudaMemcpyHostToDevice));
}
} else { // Tracker
checkGPUError(cudaMemcpy(mVerticesDevice, mPrimaryVertices.data(), sizeof(Vertex) * mPrimaryVertices.size(), cudaMemcpyHostToDevice));
checkGPUError(cudaMemcpy(mROframesPVDevice, mROframesPV.data(), sizeof(int) * mROframesPV.size(), cudaMemcpyHostToDevice));
checkGPUError(cudaMemcpy(mROFramesPVDevice, mROFramesPV.data(), sizeof(int) * mROFramesPV.size(), cudaMemcpyHostToDevice));
if (!iteration) {
for (auto iLayer{0}; iLayer < nLayers; ++iLayer) {
checkGPUError(cudaMemset(mUsedClustersDevice[iLayer], 0, sizeof(unsigned char) * mGpuParams.clustersPerROfCapacity * mNrof));
Expand Down
12 changes: 6 additions & 6 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ GPUg() void printTrackletsNotStrided(const Tracklet* t,
// Compute the tracklets for a given layer
template <int nLayers = 7>
GPUg() void computeLayerTrackletsKernelSingleRof(
const int rof0,
const int maxRofs,
const short rof0,
const short maxRofs,
const int layerIndex,
const Cluster* clustersCurrentLayer, // input data rof0
const Cluster* clustersNextLayer, // input data rof0-delta <rof0< rof0+delta (up to 3 rofs)
Expand Down Expand Up @@ -385,8 +385,8 @@ GPUg() void computeLayerTrackletsKernelSingleRof(
if (usedClustersLayer[currentSortedIndex]) {
continue;
}
int minRof = (rof0 >= trkPars->DeltaROF) ? rof0 - trkPars->DeltaROF : 0;
int maxRof = (rof0 == maxRofs - trkPars->DeltaROF) ? rof0 : rof0 + trkPars->DeltaROF;
short minRof = (rof0 >= trkPars->DeltaROF) ? rof0 - trkPars->DeltaROF : 0;
short maxRof = (rof0 == static_cast<short>(maxRofs - trkPars->DeltaROF)) ? rof0 : rof0 + trkPars->DeltaROF;
const float inverseR0{1.f / currentCluster.radius};
for (int iPrimaryVertex{0}; iPrimaryVertex < nVertices; iPrimaryVertex++) {
const auto& primaryVertex{vertices[iPrimaryVertex]};
Expand All @@ -410,7 +410,7 @@ GPUg() void computeLayerTrackletsKernelSingleRof(
}
constexpr int tableSize{256 * 128 + 1}; // hardcoded for the time being

for (int rof1{minRof}; rof1 <= maxRof; ++rof1) {
for (short rof1{minRof}; rof1 <= maxRof; ++rof1) {
if (!(roFrameClustersNext[rof1 + 1] - roFrameClustersNext[rof1])) { // number of clusters on next layer > 0
continue;
}
Expand Down Expand Up @@ -561,7 +561,7 @@ GPUg() void computeLayerTrackletsKernelMultipleRof(
const float tanL{(currentCluster.zCoordinate - nextCluster.zCoordinate) / (currentCluster.radius - nextCluster.radius)};
const size_t stride{currentClusterIndex * maxTrackletsPerCluster};
if (storedTracklets < maxTrackletsPerCluster) {
new (trackletsRof0 + stride + storedTracklets) Tracklet{currentSortedIndexChunk, nextClusterIndex, tanL, phi, static_cast<ushort>(rof0), static_cast<ushort>(rof1)};
new (trackletsRof0 + stride + storedTracklets) Tracklet{currentSortedIndexChunk, nextClusterIndex, tanL, phi, static_cast<short>(rof0), static_cast<short>(rof1)};
}
// else {
// printf("its-gpu-tracklet-finder: on rof %d layer: %d: found more tracklets (%d) than maximum allowed per cluster. This is lossy!\n", rof0, layerIndex, storedTracklets);
Expand Down
36 changes: 18 additions & 18 deletions Detectors/ITSMFT/ITS/tracking/GPU/cuda/VertexerTraitsGPU.cu
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ GPUg() void trackleterKernelSingleRof(
Tracklet* Tracklets,
int* foundTracklets,
const IndexTableUtils* utils,
const int rofId,
const short rofId,
const size_t maxTrackletsPerCluster = 1e2)
{
const int phiBins{utils->getNphiBins()};
Expand Down Expand Up @@ -234,15 +234,15 @@ GPUg() void trackleterKernelMultipleRof(
Tracklet* Tracklets,
int* foundTracklets,
const IndexTableUtils* utils,
const unsigned int startRofId,
const unsigned int rofSize,
const short startRofId,
const short rofSize,
const float phiCut,
const size_t maxTrackletsPerCluster = 1e2)
{
const int phiBins{utils->getNphiBins()};
const int zBins{utils->getNzBins()};
for (unsigned int iRof{blockIdx.x}; iRof < rofSize; iRof += gridDim.x) {
auto rof = iRof + startRofId;
for (auto iRof{blockIdx.x}; iRof < rofSize; iRof += gridDim.x) {
short rof = static_cast<short>(iRof) + startRofId;
auto* clustersNextLayerRof = clustersNextLayer + (sizeNextLClusters[rof] - sizeNextLClusters[startRofId]);
auto* clustersCurrentLayerRof = clustersCurrentLayer + (sizeCurrentLClusters[rof] - sizeCurrentLClusters[startRofId]);
auto nClustersNextLayerRof = sizeNextLClusters[rof + 1] - sizeNextLClusters[rof];
Expand Down Expand Up @@ -273,9 +273,9 @@ GPUg() void trackleterKernelMultipleRof(
if (o2::gpu::GPUCommonMath::Abs(smallestAngleDifference(currentCluster.phi, nextCluster.phi)) < phiCut) {
if (storedTracklets < maxTrackletsPerCluster) {
if constexpr (Mode == TrackletMode::Layer0Layer1) {
new (TrackletsRof + stride + storedTracklets) Tracklet{iNextLayerClusterIndex, iCurrentLayerClusterIndex, nextCluster, currentCluster, static_cast<int>(rof), static_cast<int>(rof)};
new (TrackletsRof + stride + storedTracklets) Tracklet{iNextLayerClusterIndex, iCurrentLayerClusterIndex, nextCluster, currentCluster, rof, rof};
} else {
new (TrackletsRof + stride + storedTracklets) Tracklet{iCurrentLayerClusterIndex, iNextLayerClusterIndex, currentCluster, nextCluster, static_cast<int>(rof), static_cast<int>(rof)};
new (TrackletsRof + stride + storedTracklets) Tracklet{iCurrentLayerClusterIndex, iNextLayerClusterIndex, currentCluster, nextCluster, rof, rof};
}
++storedTracklets;
}
Expand Down Expand Up @@ -625,8 +625,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
gpu::trackleterKernelMultipleRof<TrackletMode::Layer0Layer1><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clustersNextLayer, // 0 2
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clustersCurrentLayer, // 1 1
mTimeFrameGPU->getDeviceROframesClusters(0), // const int* sizeNextLClusters,
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeCurrentLClusters,
mTimeFrameGPU->getDeviceROFramesClusters(0), // const int* sizeNextLClusters,
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeCurrentLClusters,
mTimeFrameGPU->getChunk(chunkId).getDeviceIndexTables(0), // const int* nextIndexTables,
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(0), // Tracklet* Tracklets,
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(0), // int* foundTracklets,
Expand All @@ -639,8 +639,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
gpu::trackleterKernelMultipleRof<TrackletMode::Layer1Layer2><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(2), // const Cluster* clustersNextLayer, // 0 2
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clustersCurrentLayer, // 1 1
mTimeFrameGPU->getDeviceROframesClusters(2), // const int* sizeNextLClusters,
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeCurrentLClusters,
mTimeFrameGPU->getDeviceROFramesClusters(2), // const int* sizeNextLClusters,
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeCurrentLClusters,
mTimeFrameGPU->getChunk(chunkId).getDeviceIndexTables(2), // const int* nextIndexTables,
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(1), // Tracklet* Tracklets,
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(1), // int* foundTracklets,
Expand All @@ -653,8 +653,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
gpu::trackletSelectionKernelMultipleRof<true><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clusters0, // Clusters on layer 0
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clusters1, // Clusters on layer 1
mTimeFrameGPU->getDeviceROframesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
mTimeFrameGPU->getDeviceROFramesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(0), // Tracklet* tracklets01, // Tracklets on layer 0-1
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(1), // Tracklet* tracklets12, // Tracklets on layer 1-2
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(0), // const int* nFoundTracklets01, // Number of tracklets found on layers 0-1
Expand Down Expand Up @@ -686,8 +686,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
gpu::trackletSelectionKernelMultipleRof<false><<<rofs, 1024, 0, mTimeFrameGPU->getStream(chunkId).get()>>>(
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(0), // const Cluster* clusters0, // Clusters on layer 0
mTimeFrameGPU->getChunk(chunkId).getDeviceClusters(1), // const Cluster* clusters1, // Clusters on layer 1
mTimeFrameGPU->getDeviceROframesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
mTimeFrameGPU->getDeviceROframesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
mTimeFrameGPU->getDeviceROFramesClusters(0), // const int* sizeClustersL0, // Number of clusters on layer 0 per ROF
mTimeFrameGPU->getDeviceROFramesClusters(1), // const int* sizeClustersL1, // Number of clusters on layer 1 per ROF
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(0), // Tracklet* tracklets01, // Tracklets on layer 0-1
mTimeFrameGPU->getChunk(chunkId).getDeviceTracklets(1), // Tracklet* tracklets12, // Tracklets on layer 1-2
mTimeFrameGPU->getChunk(chunkId).getDeviceNTrackletCluster(0), // const int* nFoundTracklets01, // Number of tracklets found on layers 0-1
Expand Down Expand Up @@ -721,8 +721,8 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
std::vector<bool> usedLines;
for (int rofId{0}; rofId < rofs; ++rofId) {
auto rof = offset + rofId;
auto clustersL1offsetRof = mTimeFrameGPU->getROframeClusters(1)[rof] - mTimeFrameGPU->getROframeClusters(1)[offset]; // starting cluster offset for this ROF
auto nClustersL1Rof = mTimeFrameGPU->getROframeClusters(1)[rof + 1] - mTimeFrameGPU->getROframeClusters(1)[rof]; // number of clusters for this ROF
auto clustersL1offsetRof = mTimeFrameGPU->getROFrameClusters(1)[rof] - mTimeFrameGPU->getROFrameClusters(1)[offset]; // starting cluster offset for this ROF
auto nClustersL1Rof = mTimeFrameGPU->getROFrameClusters(1)[rof + 1] - mTimeFrameGPU->getROFrameClusters(1)[rof]; // number of clusters for this ROF
auto linesOffsetRof = exclusiveFoundLinesHost[clustersL1offsetRof]; // starting line offset for this ROF
auto nLinesRof = exclusiveFoundLinesHost[clustersL1offsetRof + nClustersL1Rof] - linesOffsetRof;
gsl::span<const o2::its::Line> linesInRof(lines.data() + linesOffsetRof, static_cast<gsl::span<o2::its::Line>::size_type>(nLinesRof));
Expand Down Expand Up @@ -754,7 +754,7 @@ void VertexerTraitsGPU::computeTracklets(const int iteration)
int start{0};
for (int rofId{0}; rofId < mTimeFrameGPU->getNVerticesInChunks()[chunkId].size(); ++rofId) {
gsl::span<const Vertex> rofVerts{mTimeFrameGPU->getVerticesInChunks()[chunkId].data() + start, static_cast<gsl::span<Vertex>::size_type>(mTimeFrameGPU->getNVerticesInChunks()[chunkId][rofId])};
mTimeFrameGPU->addPrimaryVertices(rofVerts);
mTimeFrameGPU->addPrimaryVertices(rofVerts, rofId, 0);
if (mTimeFrameGPU->hasMCinformation()) {
// mTimeFrameGPU->getVerticesLabels().emplace_back();
// TODO: add MC labels
Expand Down
Loading

0 comments on commit ba9e426

Please sign in to comment.