Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ITS-tracking::Async: optionally discard whole TF instead of breaking #13791

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ struct TrackingParameters {
bool PerPrimaryVertexProcessing = false;
bool SaveTimeBenchmarks = false;
bool DoUPCIteration = false;
bool FataliseUponFailure = true;
bool DropTFUponFailure = false;
/// Cluster attachment
bool UseTrackFollower = false;
bool UseTrackFollowerTop = false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ class TimeFrame
void printCellLUTonLayer(int i);
void printTrackletLUTs();
void printCellLUTs();
void printROFInfo(const int rofId);
void printSliceInfo(const int, const int);

IndexTableUtils mIndexTableUtils;

Expand Down Expand Up @@ -297,6 +297,13 @@ class TimeFrame
std::vector<uint8_t> mMultiplicityCutMask;

const o2::base::PropagatorImpl<float>* mPropagatorDevice = nullptr; // Needed only for GPU
void dropTracks()
{
for (auto& v : mTracks) {
deepVectorClear(v);
}
}

protected:
template <typename T>
void deepVectorClear(std::vector<T>& vec)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,12 @@ namespace its

struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper<VertexerParamConfig> {

int nIterations = 1; // Number of vertexing passes to perform
int vertPerRofThreshold = 0; // Maximum number of vertices per ROF to trigger second a round
bool allowSingleContribClusters = false;
// Number of ROFs to be considered for the vertexing
int deltaRof = 0;
int nIterations = 1; // Number of vertexing passes to perform.
int vertPerRofThreshold = 0; // Maximum number of vertices per ROF to trigger second a iteration.
bool allowSingleContribClusters = false; // attempt to find vertices in case of a single tracklet found.
int deltaRof = 0; // Number of ROFs to be considered for the vertexing.

// geometrical cuts
// geometrical cuts for tracklet selection
float zCut = 0.002f;
float phiCut = 0.005f;
float pairCut = 0.04f;
Expand All @@ -42,12 +41,12 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper<VertexerPa
float maxZPositionAllowed = 25.f; // 4x sZ of the beam

// Artefacts selections
int clusterContributorsCut = 16;
int clusterContributorsCut = 16; // minimum number of contributors for the second vertex found in the same ROF (pileup cut)
int maxTrackletsPerCluster = 1e2;
int phiSpan = -1;
int zSpan = -1;
int ZBins = 1;
int PhiBins = 128;
int ZBins = 1; // z-phi index table configutation: number of z bins
int PhiBins = 128; // z-phi index table configutation: number of phi bins

int nThreads = 1;

Expand All @@ -56,11 +55,11 @@ struct VertexerParamConfig : public o2::conf::ConfigurableParamHelper<VertexerPa

struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper<TrackerParamConfig> {
// Use TGeo for mat. budget
bool useMatCorrTGeo = false;
bool useFastMaterial = false;
int deltaRof = 0;
float sysErrY2[7] = {0}; // systematic error^2 in Y per layer
float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer
bool useMatCorrTGeo = false; // use full geometry to corect for material budget accounting in the fits. Default is to use the material budget LUT.
bool useFastMaterial = false; // use faster material approximation for material budget accounting in the fits.
int deltaRof = 0; // configure the width of the window in ROFs to be considered for the tracking.
float sysErrY2[7] = {0}; // systematic error^2 in Y per layer
float sysErrZ2[7] = {0}; // systematic error^2 in Z per layer
float maxChi2ClusterAttachment = -1.f;
float maxChi2NDF = -1.f;
float nSigmaCut = -1.f;
Expand All @@ -69,23 +68,25 @@ struct TrackerParamConfig : public o2::conf::ConfigurableParamHelper<TrackerPara
float pvRes = -1.f;
int LUTbinsPhi = -1;
int LUTbinsZ = -1;
float diamondPos[3] = {0.f, 0.f, 0.f};
bool useDiamond = false;
unsigned long maxMemory = 0;
int useTrackFollower = -1; // bit 0: allow mixing implies bits 1&2; bit 1: topwards; bit2: downwards; => 0 off
float trackFollowerNSigmaZ = 1.f; // sigma in z-cut for track-following search rectangle
float trackFollowerNSigmaPhi = 1.f; // sigma in phi-cut for track-following search rectangle
float diamondPos[3] = {0.f, 0.f, 0.f}; // override the position of the vertex
bool useDiamond = false; // enable overriding the vertex position
unsigned long maxMemory = 0; // override default protections on the maximum memory to be used by the tracking
int useTrackFollower = -1; // bit 0: allow mixing implies bits 1&2; bit 1: topwards; bit2: downwards; => 0 off
float trackFollowerNSigmaZ = 1.f; // sigma in z-cut for track-following search rectangle
float trackFollowerNSigmaPhi = 1.f; // sigma in phi-cut for track-following search rectangle
float cellsPerClusterLimit = -1.f;
float trackletsPerClusterLimit = -1.f;
int findShortTracks = -1;
int nThreads = 1;
int nOrbitsPerIterations = 0;
int nROFsPerIterations = 0;
bool perPrimaryVertexProcessing = false;
bool saveTimeBenchmarks = false;
bool overrideBeamEstimation = false; // used by gpuwf only
int trackingMode = -1; // -1: unset, 0=sync, 1=async, 2=cosmics used by gpuwf only
bool doUPCIteration = false;
int nThreads = 1; // number of threads to perform the operations in parallel.
int nROFsPerIterations = 0; // size of the slice of ROFs to be processed at a time, preferably integer divisors of nROFs per TF, to balance the iterations.
int nOrbitsPerIterations = 0; // not implemented: size of the slice of ROFs to be processed at a time, computed using the number of ROFs per orbit.
bool perPrimaryVertexProcessing = false; // perform the full tracking considering the vertex hypotheses one at the time.
bool saveTimeBenchmarks = false; // dump metrics on file
bool overrideBeamEstimation = false; // use beam position from meanVertex CCDB object
int trackingMode = -1; // -1: unset, 0=sync, 1=async, 2=cosmics used by gpuwf only
bool doUPCIteration = false; // Perform an additional iteration for UPC events on tagged vertices. You want to combine this config with VertexerParamConfig.nIterations=2
bool fataliseUponFailure = true; // granular management of the fatalisation in async mode
bool dropTFUponFailure = false;

O2ParamDef(TrackerParamConfig, "ITSCATrackerParam");
};
Expand Down
21 changes: 12 additions & 9 deletions Detectors/ITSMFT/ITS/tracking/src/TimeFrame.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -609,16 +609,19 @@ void TimeFrame::printNClsPerROF()
}
}

void TimeFrame::printROFInfo(const int rofId)
void TimeFrame::printSliceInfo(const int startROF, const int sliceSize)
{
std::cout << "ROF " << rofId << " dump:" << std::endl;
for (int iLayer{0}; iLayer < mClusters.size(); ++iLayer) {
std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(rofId, iLayer).size() << " clusters." << std::endl;
}
std::cout << "Number of seeding vertices: " << getPrimaryVertices(rofId).size() << std::endl;
int iVertex{0};
for (auto& v : getPrimaryVertices(rofId)) {
std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl;
std::cout << "Dumping slice of " << sliceSize << " rofs:" << std::endl;
for (int iROF{startROF}; iROF < startROF + sliceSize; ++iROF) {
std::cout << "ROF " << iROF << " dump:" << std::endl;
for (unsigned int iLayer{0}; iLayer < mClusters.size(); ++iLayer) {
std::cout << "Layer " << iLayer << " has: " << getClustersOnLayer(iROF, iLayer).size() << " clusters." << std::endl;
}
std::cout << "Number of seeding vertices: " << getPrimaryVertices(iROF).size() << std::endl;
int iVertex{0};
for (auto& v : getPrimaryVertices(iROF)) {
std::cout << "\t vertex " << iVertex++ << ": x=" << v.getX() << " " << " y=" << v.getY() << " z=" << v.getZ() << " has " << v.getNContributors() << " contributors." << std::endl;
}
}
}

Expand Down
20 changes: 17 additions & 3 deletions Detectors/ITSMFT/ITS/tracking/src/Tracker.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
}
}

bool dropTF = false;
for (int iteration = 0; iteration < (int)mTrkParams.size(); ++iteration) {
if (iteration == 3 && mTrkParams[0].DoUPCIteration) {
mTimeFrame->swapMasks();
Expand All @@ -75,9 +76,12 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
&Tracker::computeTracklets, "Tracklet finding", [](std::string) {}, iteration, iROFs, iVertex);
nTracklets += mTraits->getTFNumberOfTracklets();
if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) {
mTimeFrame->printROFInfo(iROFs);
mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations);
error(fmt::format("Too much memory used during trackleting in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.",
iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB));
if (mTrkParams[iteration].DropTFUponFailure) {
dropTF = true;
}
break;
}
float trackletsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfTracklets()) / mTraits->getTFNumberOfClusters() : 0.f;
Expand All @@ -91,9 +95,12 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
&Tracker::computeCells, "Cell finding", [](std::string) {}, iteration);
nCells += mTraits->getTFNumberOfCells();
if (!mTimeFrame->checkMemory(mTrkParams[iteration].MaxMemory)) {
mTimeFrame->printROFInfo(iROFs);
mTimeFrame->printSliceInfo(iROFs, mTrkParams[iteration].nROFsPerIterations);
error(fmt::format("Too much memory used during cell finding in iteration {} in ROF span {}-{}: {:.2f} GB. Current limit is {:.2f} GB, check the detector status and/or the selections.",
iteration, iROFs, iROFs + mTrkParams[iteration].nROFsPerIterations, mTimeFrame->getArtefactsMemory() / GB, mTrkParams[iteration].MaxMemory / GB));
if (mTrkParams[iteration].DropTFUponFailure) {
dropTF = true;
}
break;
}
float cellsPerCluster = mTraits->getTFNumberOfClusters() > 0 ? float(mTraits->getTFNumberOfCells()) / mTraits->getTFNumberOfClusters() : 0.f;
Expand All @@ -110,7 +117,7 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
&Tracker::findRoads, "Road finding", [](std::string) {}, iteration);
}
iVertex++;
} while (iVertex < maxNvertices);
} while (iVertex < maxNvertices && !dropTF);
logger(fmt::format(" - Tracklet finding: {} tracklets found in {:.2f} ms", nTracklets, timeTracklets));
logger(fmt::format(" - Cell finding: {} cells found in {:.2f} ms", nCells, timeCells));
logger(fmt::format(" - Neighbours finding: {} neighbours found in {:.2f} ms", nNeighbours, timeNeighbours));
Expand All @@ -122,6 +129,11 @@ void Tracker::clustersToTracks(std::function<void(std::string s)> logger, std::f
total += timeExtending;
logger(fmt::format(" - Extending Tracks: {} extended tracks using {} clusters found in {:.2f} ms", nExtendedTracks + mTimeFrame->mNExtendedTracks, nExtendedClusters + mTimeFrame->mNExtendedUsedClusters, timeExtending));
}
if (dropTF) {
error(fmt::format("...Dropping Timeframe..."));
mTimeFrame->dropTracks();
break; // breaking out the iterations loop
}
}

total += evaluateTask(&Tracker::findShortPrimaries, "Short primaries finding", logger);
Expand Down Expand Up @@ -502,6 +514,8 @@ void Tracker::getGlobalConfiguration()
params.nROFsPerIterations = nROFsPerIterations;
params.PerPrimaryVertexProcessing = tc.perPrimaryVertexProcessing;
params.SaveTimeBenchmarks = tc.saveTimeBenchmarks;
params.FataliseUponFailure = tc.fataliseUponFailure;
params.DropTFUponFailure = tc.dropTFUponFailure;
for (int iD{0}; iD < 3; ++iD) {
params.Diamond[iD] = tc.diamondPos[iD];
}
Expand Down
4 changes: 2 additions & 2 deletions Detectors/ITSMFT/ITS/tracking/src/TrackingInterface.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -271,13 +271,13 @@ void ITSTrackingInterface::run(framework::ProcessingContext& pc)
mTimeFrame->setROFMask(processUPCMask);
// Run CA tracker
if constexpr (isGPU) {
if (mMode == o2::its::TrackingMode::Async) {
if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) {
mTracker->clustersToTracksHybrid(logger, fatalLogger);
} else {
mTracker->clustersToTracksHybrid(logger, errorLogger);
}
} else {
if (mMode == o2::its::TrackingMode::Async) {
if (mMode == o2::its::TrackingMode::Async && o2::its::TrackerParamConfig::Instance().fataliseUponFailure) {
mTracker->clustersToTracks(logger, fatalLogger);
} else {
mTracker->clustersToTracks(logger, errorLogger);
Expand Down
Loading