Skip to content

Commit

Permalink
Create TPC occupancy map for whole TPC not for sector/row
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrohr committed Mar 4, 2024
1 parent f2c709b commit ee61a88
Show file tree
Hide file tree
Showing 13 changed files with 37 additions and 46 deletions.
5 changes: 2 additions & 3 deletions GPU/GPUTracking/Base/GPUParam.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ namespace gpu
struct GPUSettingsRec;
struct GPUSettingsGTP;
struct GPURecoStepConfiguration;
struct GPUTPCClusterOccupancyMapBin;

struct GPUParamSlice {
float Alpha; // slice angle
Expand All @@ -60,7 +59,7 @@ struct GPUParam_t {

GPUTPCGeometry tpcGeometry; // TPC Geometry
GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM
const GPUTPCClusterOccupancyMapBin* occupancyMap; // Ptr to TPC occupancy map
const unsigned int* occupancyMap; // Ptr to TPC occupancy map

GPUParamSlice SliceParam[GPUCA_NSLICES];

Expand Down Expand Up @@ -104,7 +103,7 @@ struct GPUParam : public internal::GPUParam_t<GPUSettingsRec, GPUSettingsParam>
GPUd() float GetClusterError2(int yz, int type, float z, float angle2, float scaledMult, float scaledAvgCharge) const;
GPUd() void GetClusterErrors2(char sector, int row, float z, float sinPhi, float DzDs, float time, float avgCharge, float& ErrY2, float& ErrZ2) const;
GPUd() void UpdateClusterError2ByState(short clusterState, float& ErrY2, float& ErrZ2) const;
GPUd() float GetScaledMult(int iSlice, int iRow, float time) const;
GPUd() float GetScaledMult(float time) const;

GPUd() void Slice2Global(int iSlice, float x, float y, float z, float* X, float* Y, float* Z) const;
GPUd() void Global2Slice(int iSlice, float x, float y, float z, float* X, float* Y, float* Z) const;
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/Base/GPUParam.inc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ GPUdi() void MEM_LG(GPUParam)::GetClusterErrors2(char sector, int iRow, float z,
float angleY2 = s2 * sec2; // dy/dx
float angleZ2 = DzDs * DzDs * sec2; // dz/dx

float mult = time >= 0.f ? GetScaledMult(sector, iRow, time) / tpcGeometry.Row2X(iRow) : 0.f;
float mult = time >= 0.f ? GetScaledMult(time) / tpcGeometry.Row2X(iRow) : 0.f;

ErrY2 = GetClusterError2(0, rowType, z, angleY2, mult, avgCharge);
ErrZ2 = GetClusterError2(1, rowType, z, angleZ2, mult, avgCharge);
Expand Down Expand Up @@ -191,14 +191,14 @@ GPUdi() void MEM_LG(GPUParam)::UpdateClusterError2ByState(short clusterState, fl
}

MEM_CLASS_PRE()
GPUdi() float MEM_LG(GPUParam)::GetScaledMult(int iSlice, int iRow, float time) const
GPUdi() float MEM_LG(GPUParam)::GetScaledMult(float time) const
{
#if !defined(__OPENCL__) || defined(__OPENCLCPP__)
if (!occupancyMap) {
return 0.f;
}
const unsigned int bin = CAMath::Max(0.f, time / rec.tpc.occupancyMapTimeBins);
return occupancyMap[bin].bin[iSlice][iRow] * rec.tpc.clusterErrorOccupancyScaler;
return occupancyMap[bin] * rec.tpc.clusterErrorOccupancyScaler;
#else
return 0.f;
#endif
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ unsigned int GPUReconstructionCPU::SetAndGetNestedLoopOmpFactor(bool condition,
return mNestedLoopOmpFactor;
}

void GPUReconstructionCPU::UpdateParamOccupancyMap(const GPUTPCClusterOccupancyMapBin* mapHost, const GPUTPCClusterOccupancyMapBin* mapGPU, int stream)
void GPUReconstructionCPU::UpdateParamOccupancyMap(const unsigned int* mapHost, const unsigned int* mapGPU, int stream)
{
param().occupancyMap = mapHost;
if (IsGPU()) {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
void SetNestedLoopOmpFactor(unsigned int f) { mNestedLoopOmpFactor = f; }
unsigned int SetAndGetNestedLoopOmpFactor(bool condition, unsigned int max);

void UpdateParamOccupancyMap(const GPUTPCClusterOccupancyMapBin* mapHost, const GPUTPCClusterOccupancyMapBin* mapGPU, int stream = -1);
void UpdateParamOccupancyMap(const unsigned int* mapHost, const unsigned int* mapGPU, int stream = -1);

protected:
struct GPUProcessorProcessors : public GPUProcessor {
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Definitions/GPUSettingsList.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ AddOptionRTC(maxTimeBinAboveThresholdIn1000Bin, unsigned short, 500, "", 0, "Exc
AddOptionRTC(maxConsecTimeBinAboveThreshold, unsigned short, 200, "", 0, "Except pad from cluster finding if number of consecutive charges in a fragment is above this baseline (disable = 0)")
AddOptionRTC(noisyPadSaturationThreshold, unsigned short, 700, "", 0, "Threshold where a timebin is considered saturated, disabling the noisy pad check for that pad")
AddOptionRTC(occupancyMapTimeBins, unsigned short, 100, "", 0, "Number of timebins per histogram bin of occupancy map (0 = disable occupancy map)")
AddOptionRTC(occupancyMapTimeBinsAverage, unsigned short, 2, "", 0, "Number of timebins +/- to use for the averaging")
AddOptionRTC(trackFitCovLimit, unsigned short, 1000, "", 0, "Abort fit when y/z cov exceed the limit")
AddOptionRTC(addErrorsCECrossing, unsigned char, 0, "", 0, "Add additional custom track errors when crossing CE, 0 = no custom errors but att 0.5 to sigma_z^2, 1 = only to cov diagonal, 2 = preserve correlations")
AddOptionRTC(trackMergerMinPartHits, unsigned char, 10, "", 0, "Minimum hits of track part during track merging")
Expand Down
8 changes: 5 additions & 3 deletions GPU/GPUTracking/Global/GPUChainTrackingSliceTracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,12 @@ int GPUChainTracking::RunTPCTrackingSlices_internal()
AllocateRegisteredMemory(mInputsHost->mResourceOccupancyMap, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::tpcOccupancyMap)]);
ReleaseEvent(mEvents->init);
auto* ptr = doGPU ? mInputsShadow->mTPCClusterOccupancyMap : mInputsHost->mTPCClusterOccupancyMap;
auto* ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec->AllocateVolatileMemory(GPUTPCClusterOccupancyMapBin::getTotalSize(param()), doGPU);
int streamOccMap = mRec->NStreams() - 1;
runKernel<GPUMemClean16>(GetGridAutoStep(streamOccMap, RecoStep::TPCSliceTracking), krnlRunRangeNone, {}, ptr, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), krnlRunRangeNone, krnlEventNone, ptr);
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), krnlRunRangeNone, krnlEventNone, ptr);
runKernel<GPUMemClean16>(GetGridAutoStep(streamOccMap, RecoStep::TPCSliceTracking), krnlRunRangeNone, {}, ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize(param()));
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk(GPUCA_NSLICES * GPUCA_ROW_COUNT, streamOccMap), krnlRunRangeNone, krnlEventNone, ptrTmp);
runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk(GPUTPCClusterOccupancyMapBin::getNBins(param()), streamOccMap), krnlRunRangeNone, krnlEventNone, ptrTmp, ptr);
mRec->ReturnVolatileMemory();
if (doGPU) {
TransferMemoryResourceLinkToHost(RecoStep::TPCSliceTracking, mInputsHost->mResourceOccupancyMap, streamOccMap, &mEvents->init);
} else {
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Global/GPUTrackingInputProvider.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class GPUTrackingInputProvider : public GPUProcessor
o2::tpc::ClusterNative* mPclusterNativeBuffer = nullptr;
o2::tpc::ClusterNative* mPclusterNativeOutput = nullptr;

GPUTPCClusterOccupancyMapBin* mTPCClusterOccupancyMap = nullptr;
unsigned int* mTPCClusterOccupancyMap = nullptr;

unsigned int* mErrorCodes = nullptr;

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Merger/GPUTPCGMMerger.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ class GPUTPCGMMerger : public GPUProcessor
std::vector<float> StreamerUncorrectedZY(int iSlice, int iRow, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop) const;

void DebugStreamerUpdate(int iTrk, int ihit, float xx, float yy, float zz, const GPUTPCGMMergedTrackHit& cluster, const o2::tpc::ClusterNative& clusterNative, const GPUTPCGMTrackParam& track, const GPUTPCGMPropagator& prop, const gputpcgmmergertypes::InterpolationErrorHit& interpolation, char rejectChi2, bool refit, int retVal) const;
static void DebugStreamerReject(float mAlpha, int iRow, float posY, float posZ, short clusterState, char rejectChi2, const gputpcgmmergertypes::InterpolationErrorHit& inter, bool refit, int retVal, float err2Y, float err2Z, const GPUTPCGMTrackParam& track, char sector, const GPUParam& param, float time, float avgCharge);
static void DebugStreamerReject(float mAlpha, int iRow, float posY, float posZ, short clusterState, char rejectChi2, const gputpcgmmergertypes::InterpolationErrorHit& inter, bool refit, int retVal, float err2Y, float err2Z, const GPUTPCGMTrackParam& track, const GPUParam& param, float time, float avgCharge);
#endif

GPUdi() int SliceTrackInfoFirst(int iSlice) const { return mSliceTrackInfoIndex[iSlice]; }
Expand Down
11 changes: 6 additions & 5 deletions GPU/GPUTracking/Merger/GPUTPCGMMergerDump.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -331,11 +331,12 @@ void GPUTPCGMMerger::DebugRefitMergedTrack(const GPUTPCGMMergedTrack& track) con

std::vector<unsigned short> GPUTPCGMMerger::StreamerOccupancyBin(int iSlice, int iRow, float time) const
{
std::vector<unsigned short> retVal(5);
std::vector<unsigned short> retVal(1 + 2 * Param().rec.tpc.occupancyMapTimeBinsAverage);
#ifdef DEBUG_STREAMER
const int bin = CAMath::Max(0.f, time / Param().rec.tpc.occupancyMapTimeBins);
for (int i = 0; i < 5; i++) {
retVal[i] = (bin - 2 + i >= 0 && bin - 2 + i < GPUTPCClusterOccupancyMapBin::getNBins(Param())) ? Param().occupancyMap[bin - 2 + i].bin[iSlice][iRow] : 0;
for (int i = 0; i < 1 + 2 * Param().rec.tpc.occupancyMapTimeBinsAverage; i++) {
const int mybin = bin + i - Param().rec.tpc.occupancyMapTimeBinsAverage;
retVal[i] = (mybin >= 0 && mybin < GPUTPCClusterOccupancyMapBin::getNBins(Param())) ? Param().occupancyMap[i] : 0;
}
#endif
return retVal;
Expand Down Expand Up @@ -375,10 +376,10 @@ void GPUTPCGMMerger::DebugStreamerUpdate(int iTrk, int ihit, float xx, float yy,
#endif
}

void GPUTPCGMMerger::DebugStreamerReject(float mAlpha, int iRow, float posY, float posZ, short clusterState, char rejectChi2, const gputpcgmmergertypes::InterpolationErrorHit& inter, bool refit, int retVal, float err2Y, float err2Z, const GPUTPCGMTrackParam& track, char sector, const GPUParam& param, float time, float avgCharge)
void GPUTPCGMMerger::DebugStreamerReject(float mAlpha, int iRow, float posY, float posZ, short clusterState, char rejectChi2, const gputpcgmmergertypes::InterpolationErrorHit& inter, bool refit, int retVal, float err2Y, float err2Z, const GPUTPCGMTrackParam& track, const GPUParam& param, float time, float avgCharge)
{
#ifdef DEBUG_STREAMER
float scaledMult = (time >= 0.f ? param.GetScaledMult(sector, iRow, time) / param.tpcGeometry.Row2X(iRow) : 0.f);
float scaledMult = (time >= 0.f ? param.GetScaledMult(time) / param.tpcGeometry.Row2X(iRow) : 0.f);
o2::utils::DebugStreamer::instance()->getStreamer("debug_InterpolateReject", "UPDATE") << o2::utils::DebugStreamer::instance()->getUniqueTreeName("tree_InterpolateReject").data()
<< "mAlpha=" << mAlpha
<< "iRow=" << iRow
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Merger/GPUTPCGMPropagator.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ GPUd() int GPUTPCGMPropagator::Update(float posY, float posZ, int iRow, const GP
} else {
int retVal = InterpolateReject(param, posY, posZ, clusterState, rejectChi2, inter, err2Y, err2Z);
GPUCA_DEBUG_STREAMER_CHECK(if (o2::utils::DebugStreamer::checkStream(o2::utils::StreamFlags::streamRejectCluster, iTrk)) {
GPUTPCGMMerger::DebugStreamerReject(mAlpha, iRow, posY, posZ, clusterState, rejectChi2, *inter, refit, retVal, err2Y, err2Z, *mT, sector, param, time, avgCharge);
GPUTPCGMMerger::DebugStreamerReject(mAlpha, iRow, posY, posZ, clusterState, rejectChi2, *inter, refit, retVal, err2Y, err2Z, *mT, param, time, avgCharge);
});
if (retVal) {
return retVal;
Expand Down
36 changes: 12 additions & 24 deletions GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,33 +36,21 @@ GPUdii() void GPUTPCCreateOccupancyMap::Thread<GPUTPCCreateOccupancyMap::fill>(i
}

template <>
GPUdii() void GPUTPCCreateOccupancyMap::Thread<GPUTPCCreateOccupancyMap::fold>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUTPCClusterOccupancyMapBin* GPUrestrict() map)
GPUdii() void GPUTPCCreateOccupancyMap::Thread<GPUTPCCreateOccupancyMap::fold>(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, GPUTPCClusterOccupancyMapBin* GPUrestrict() map, unsigned int* GPUrestrict() output)
{
GPUParam& GPUrestrict() param = processors.param;
const int iSliceRow = iBlock * nThreads + iThread;
if (iSliceRow > GPUCA_ROW_COUNT * GPUCA_NSLICES) {
return;
}
static constexpr unsigned int FOLD_BINS_BEEFORE_AFTER = 2;
static constexpr unsigned int FOLD_BINS = FOLD_BINS_BEEFORE_AFTER * 2 + 1;
const unsigned int iSlice = iSliceRow / GPUCA_ROW_COUNT;
const unsigned int iRow = iSliceRow % GPUCA_ROW_COUNT;
const unsigned int nBins = GPUTPCClusterOccupancyMapBin::getNBins(param);
if (nBins < FOLD_BINS) {
const unsigned int bin = iBlock * nThreads + iThread;
if (bin >= GPUTPCClusterOccupancyMapBin::getNBins(param)) {
return;
}
unsigned short lastVal[FOLD_BINS_BEEFORE_AFTER];
unsigned int sum = (FOLD_BINS_BEEFORE_AFTER + 1) * map[0].bin[iSlice][iRow];
for (unsigned int i = 0; i < FOLD_BINS_BEEFORE_AFTER; i++) {
sum += map[i + 1].bin[iSlice][iRow];
lastVal[i] = map[0].bin[iSlice][iRow];
}
unsigned int lastValIndex = 0;
for (unsigned int i = 0; i < nBins; i++) {
unsigned short useLastVal = lastVal[lastValIndex];
lastVal[lastValIndex] = map[i].bin[iSlice][iRow];
map[i].bin[iSlice][iRow] = sum / FOLD_BINS;
sum += map[CAMath::Min(i + FOLD_BINS_BEEFORE_AFTER + 1, nBins - 1)].bin[iSlice][iRow] - useLastVal;
lastValIndex = lastValIndex < FOLD_BINS_BEEFORE_AFTER - 1 ? lastValIndex + 1 : 0;
int binmin = CAMath::Max<int>(0, bin - param.rec.tpc.occupancyMapTimeBinsAverage);
int binmax = CAMath::Min<int>(GPUTPCClusterOccupancyMapBin::getNBins(param), bin + param.rec.tpc.occupancyMapTimeBinsAverage + 1);
unsigned int sum = 0;
for (int i = binmin; i < binmax; i++) {
for (int iSliceRow = 0; iSliceRow < GPUCA_NSLICES * GPUCA_ROW_COUNT; iSliceRow++) {
sum += (&map[i].bin[0][0])[iSliceRow];
}
}
sum /= binmax - binmin;
output[bin] = sum;
}
4 changes: 2 additions & 2 deletions GPU/GPUTracking/SliceTracker/GPUTPCCreateOccupancyMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class GPUTPCCreateOccupancyMap : public GPUKernelTemplate
fill = 0,
fold = 1 };
GPUhdi() CONSTEXPR static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::TPCSliceTracking; }
template <int iKernel = defaultKernel>
GPUd() static void Thread(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUTPCClusterOccupancyMapBin* map);
template <int iKernel = defaultKernel, typename... Args>
GPUd() static void Thread(int nBlocks, int nThreads, int iBlock, int iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, Args... args);
};

} // namespace GPUCA_NAMESPACE::gpu
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/kernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks1" "= TPCMERG
o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, globalTracks2" "= TPCMERGER" NO single char parameter)
o2_gpu_add_kernel("GPUTPCGlobalDebugSortKernels, borderTracks" "= TPCMERGER" NO single char parameter)
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fill" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map)
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map)
o2_gpu_add_kernel("GPUTPCCreateOccupancyMap, fold" "= TPCOCCUPANCY" LB simple GPUTPCClusterOccupancyMapBin* map "unsigned int*" output)
o2_gpu_add_kernel("GPUTPCGMMergerTrackFit" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT TPCDEDX" LB simple int mode)
o2_gpu_add_kernel("GPUTPCGMMergerFollowLoopers" "GPUTPCGMMergerGPU TPCMERGER TPCTRACKER MATLUT" LB simple)
o2_gpu_add_kernel("GPUTPCGMMergerUnpackResetIds" "GPUTPCGMMergerGPU TPCMERGER" LB simple int iSlice)
Expand Down

0 comments on commit ee61a88

Please sign in to comment.