Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CPV: mute raw decoder error reporting for 10 minutes if it reports mo… #8627

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ namespace o2
namespace cpv
{

class RawDecoderError
{
public:
struct RawDecoderError {
RawDecoderError() = default; //Constructors for vector::emplace_back methods
RawDecoderError(short c, short d, short g, short p, RawErrorType_t e) : ccId(c), dil(d), gas(g), pad(p), errortype(e) {}
RawDecoderError(const RawDecoderError& e) = default;
Expand All @@ -37,7 +35,6 @@ class RawDecoderError
short gas;
short pad;
RawErrorType_t errortype;
ClassDefNV(RawDecoderError, 1);
};

union AddressCharge {
Expand Down Expand Up @@ -99,6 +96,9 @@ class RawDecoder
/// \return Reference to the list of decoding errors
const std::vector<o2::cpv::RawDecoderError>& getErrors() const { return mErrors; }

/// \brief mute error reporting
void muteErrors() { mIsMuteErrors = true; }

protected:
/// \brief Read channels for the current event in the raw buffer
RawErrorType_t readChannels();
Expand All @@ -111,9 +111,10 @@ class RawDecoder
std::vector<uint32_t> mDigits; ///< vector of channels and BCs in the raw stream
std::vector<o2::cpv::BCRecord> mBCRecords; ///< vector of bc references to digits
std::vector<RawDecoderError> mErrors; ///< vector of decoding errors
bool mChannelsInitialized = false; ///< check whether the channels are initialized
bool mChannelsInitialized; ///< check whether the channels are initialized
bool mIsMuteErrors; ///< mute errors

ClassDefNV(RawDecoder, 2);
ClassDefNV(RawDecoder, 3);
};

} // namespace cpv
Expand Down
58 changes: 38 additions & 20 deletions Detectors/CPV/reconstruction/src/RawDecoder.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
using namespace o2::cpv;

RawDecoder::RawDecoder(RawReaderMemory& reader) : mRawReader(reader),
mChannelsInitialized(false)
mChannelsInitialized(false),
mIsMuteErrors(false)
{
}

RawErrorType_t RawDecoder::decode()
{

auto& rdh = mRawReader.getRawHeader();
short linkID = o2::raw::RDHUtils::getLinkID(rdh);
mDigits.clear();
Expand All @@ -42,6 +42,12 @@ RawErrorType_t RawDecoder::decode()
RawErrorType_t RawDecoder::readChannels()
{
mChannelsInitialized = false;
// // test error
// if (!mIsMuteErrors) {
// LOG(error) << "RawDecoder::readChannels() : "
// << "test error";
// }
// mErrors.emplace_back(-1, 0, 0, 0, kOK); //5 is non-existing link with general errors

auto& payloadWords = mRawReader.getPayload();
uint32_t wordCountFromLastHeader = 1; //header word is included
Expand All @@ -59,20 +65,24 @@ RawErrorType_t RawDecoder::readChannels()
<< "I read cpv header for orbit = " << header.orbit()
<< " and BC = " << header.bc();
if (!isHeaderExpected) { //actually, header was not expected
LOG(error) << "RawDecoder::readChannels() : "
<< "header was not expected";
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "header was not expected";
}
removeLastNDigits(nDigitsAddedFromLastHeader); //remove previously added digits as they are bad
mErrors.emplace_back(5, 0, 0, 0, kNO_CPVTRAILER);
mErrors.emplace_back(-1, 0, 0, 0, kNO_CPVTRAILER);
}
skipUntilNextHeader = false;
currentBC = header.bc();
wordCountFromLastHeader = 0;
nDigitsAddedFromLastHeader = 0;
if (currentOrbit != header.orbit()) { //bad cpvheader
LOG(error) << "RawDecoder::readChannels() : "
<< "currentOrbit(=" << currentOrbit
<< ") != header.orbit()(=" << header.orbit() << ")";
mErrors.emplace_back(5, 0, 0, 0, kCPVHEADER_INVALID); //5 is non-existing link with general errors
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "currentOrbit(=" << currentOrbit
<< ") != header.orbit()(=" << header.orbit() << ")";
}
mErrors.emplace_back(-1, 0, 0, 0, kCPVHEADER_INVALID); //5 is non-existing link with general errors
skipUntilNextHeader = true;
}
} else {
Expand All @@ -89,8 +99,10 @@ RawErrorType_t RawDecoder::readChannels()
if (addDigit(pw.mDataWord, word.ccId(), currentBC)) {
nDigitsAddedFromLastHeader++;
} else {
LOG(debug) << "RawDecoder::readChannels() : "
<< "read pad word with non-valid pad address";
if (!mIsMuteErrors) {
LOG(debug) << "RawDecoder::readChannels() : "
<< "read pad word with non-valid pad address";
}
unsigned int dil = pw.dil, gas = pw.gas, address = pw.address;
mErrors.emplace_back(word.ccId(), dil, gas, address, kPadAddress);
}
Expand All @@ -103,28 +115,34 @@ RawErrorType_t RawDecoder::readChannels()
if (diffInCount > 1 ||
diffInCount < -1) {
//some words lost?
LOG(error) << "RawDecoder::readChannels() : "
<< "Read " << wordCountFromLastHeader << " words, expected " << trailer.wordCounter();
mErrors.emplace_back(5, 0, 0, 0, kCPVTRAILER_INVALID);
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "Read " << wordCountFromLastHeader << " words, expected " << trailer.wordCounter();
}
mErrors.emplace_back(-1, 0, 0, 0, kCPVTRAILER_INVALID);
//throw all previous data and go to next header
removeLastNDigits(nDigitsAddedFromLastHeader);
skipUntilNextHeader = true;
}
if (trailer.bc() != currentBC) {
//trailer does not fit header
LOG(error) << "RawDecoder::readChannels() : "
<< "CPVHeader BC is " << currentBC << " but CPVTrailer BC is " << trailer.bc();
mErrors.emplace_back(5, 0, 0, 0, kCPVTRAILER_INVALID);
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "CPVHeader BC(" << currentBC << ") != CPVTrailer BC(" << trailer.bc() << ")";
}
mErrors.emplace_back(-1, 0, 0, 0, kCPVTRAILER_INVALID);
removeLastNDigits(nDigitsAddedFromLastHeader);
skipUntilNextHeader = true;
}
isHeaderExpected = true;
} else {
wordCountFromLastHeader++;
//error
LOG(error) << "RawDecoder::readChannels() : "
<< "Read unknown word";
mErrors.emplace_back(5, 0, 0, 0, kUNKNOWN_WORD); //add error for non-existing row
if (!mIsMuteErrors) {
LOG(error) << "RawDecoder::readChannels() : "
<< "Read unknown word";
}
mErrors.emplace_back(-1, 0, 0, 0, kUNKNOWN_WORD); //add error for non-existing row
//what to do?
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
// or submit itself to any jurisdiction.

#include <vector>

#include <chrono>
#include "Framework/DataProcessorSpec.h"
#include "Framework/Task.h"
#include "Framework/ConcreteDataMatcher.h"
Expand Down Expand Up @@ -68,12 +68,18 @@ class RawToDigitConverterSpec : public framework::Task
char CheckHWAddress(short ddl, short hwAddress, short& fee);

private:
bool mIsUsingGainCalibration; ///< Use gain calibration from CCDB
bool mIsUsingBadMap; ///< Use BadChannelMap to mask bad channels
bool mIsPedestalData; ///< Do not subtract pedestals if true
std::vector<Digit> mOutputDigits; ///< Container with output cells
std::vector<TriggerRecord> mOutputTriggerRecords; ///< Container with output cells
std::vector<RawDecoderError> mOutputHWErrors; ///< Errors occured in reading data
bool mIsUsingGainCalibration; ///< Use gain calibration from CCDB
bool mIsUsingBadMap; ///< Use BadChannelMap to mask bad channels
bool mIsPedestalData; ///< Do not subtract pedestals if true
std::vector<Digit> mOutputDigits; ///< Container with output cells
std::vector<TriggerRecord> mOutputTriggerRecords; ///< Container with output cells
std::vector<RawDecoderError> mOutputHWErrors; ///< Errors occured in reading data
bool mIsMuteDecoderErrors = false; ///< mute errors for 10 minutes
int mDecoderErrorsCounterWhenMuted = 0; ///< errors counter while errors are muted
int mDecoderErrorsPerMinute = 0; ///< errors per minute counter
int mMinutesPassed = 0; ///< runtime duration in minutes
std::chrono::time_point<std::chrono::system_clock> mStartTime; ///< Time of start of decoding
std::chrono::time_point<std::chrono::system_clock> mTimeWhenMuted; ///< Time when muted errors
};

/// \brief Creating DataProcessorSpec for the CPV Digit Converter Spec
Expand Down
58 changes: 52 additions & 6 deletions Detectors/CPV/workflow/src/RawToDigitConverterSpec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ using Lifetime = o2::framework::Lifetime;

void RawToDigitConverterSpec::init(framework::InitContext& ctx)
{
mStartTime = std::chrono::system_clock::now();
mDecoderErrorsPerMinute = 0;
mIsMuteDecoderErrors = false;

LOG(debug) << "Initializing RawToDigitConverterSpec...";
// Pedestal flag true/false
LOG(info) << "Pedestal run: " << (mIsPedestalData ? "YES" : "NO");
Expand All @@ -56,6 +60,23 @@ void RawToDigitConverterSpec::init(framework::InitContext& ctx)

void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
{
// check timers if we need mute/unmute error reporting
auto now = std::chrono::system_clock::now();
if (mIsMuteDecoderErrors) { // check if 10-minutes muting period passed
if (((now - mTimeWhenMuted) / std::chrono::minutes(1)) >= 10) {
mIsMuteDecoderErrors = false; //unmute
if (mDecoderErrorsCounterWhenMuted) {
LOG(error) << "RawToDigitConverterSpec::run() : " << mDecoderErrorsCounterWhenMuted << " errors happened while it was muted ((";
}
mDecoderErrorsCounterWhenMuted = 0;
}
}
if (((now - mStartTime) / std::chrono::minutes(1)) > mMinutesPassed) {
mMinutesPassed = (now - mStartTime) / std::chrono::minutes(1);
LOG(debug) << "minutes passed: " << mMinutesPassed;
mDecoderErrorsPerMinute = 0;
}

// Cache digits from bunch crossings as the component reads timeframes from many links consecutively
std::map<o2::InteractionRecord, std::shared_ptr<std::vector<o2::cpv::Digit>>> digitBuffer; // Internal digit buffer
int firstEntry = 0;
Expand Down Expand Up @@ -120,11 +141,13 @@ void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
try {
rawreader.next();
} catch (RawErrorType_t e) {
LOG(error) << "Raw decoding error " << (int)e;
if (!mIsMuteDecoderErrors) {
LOG(error) << "Raw decoding error " << (int)e;
}
//add error list
//RawErrorType_t is defined in O2/Detectors/CPV/reconstruction/include/CPVReconstruction/RawReaderMemory.h
//RawDecoderError(short c, short d, short g, short p, RawErrorType_t e)
mOutputHWErrors.emplace_back(25, 0, 0, 0, e); //Put general errors to non-existing ccId 25
mOutputHWErrors.emplace_back(-1, 0, 0, 0, e); //Put general errors to non-existing ccId -1
//if problem in header, abandon this page
if (e == RawErrorType_t::kRDH_DECODING) {
LOG(error) << "RDH decoding error. Skipping this TF";
Expand All @@ -139,17 +162,40 @@ void RawToDigitConverterSpec::run(framework::ProcessingContext& ctx)
auto mod = o2::raw::RDHUtils::getLinkID(rdh) + 2; //link=0,1,2 -> mod=2,3,4
//for now all modules are written to one LinkID
if (mod > o2::cpv::Geometry::kNMod || mod < 2) { //only 3 correct modules:2,3,4
LOG(error) << "module=" << mod << "do not exist";
mOutputHWErrors.emplace_back(25, mod, 0, 0, kRDH_INVALID); //Add non-existing modules to non-existing ccId 25 and dilogic = mod
continue; //skip STU mod
if (!mIsMuteDecoderErrors) {
LOG(error) << "RDH linkId corresponds to module " << mod << " which does not exist";
}
mOutputHWErrors.emplace_back(-1, mod, 0, 0, kRDH_INVALID); //Add non-existing modules to non-existing ccId -1 and dilogic = mod
continue;
}
o2::cpv::RawDecoder decoder(rawreader);
if (mIsMuteDecoderErrors) {
decoder.muteErrors();
}
RawErrorType_t err = decoder.decode();
int decoderErrors = 0;
for (auto errs : decoder.getErrors()) {
if (errs.ccId == -1) { // error related to wrong data format
decoderErrors++;
}
}
mDecoderErrorsPerMinute += decoderErrors;
// LOG(debug) << "RawDecoder found " << decoderErrors << " raw format errors";
// LOG(debug) << "Now I have " << mDecoderErrorsPerMinute << " errors for current minute";
if (mIsMuteDecoderErrors) {
mDecoderErrorsCounterWhenMuted += decoder.getErrors().size();
} else {
if (mDecoderErrorsPerMinute > 10) { // mute error reporting for 10 minutes
LOG(warning) << "> 10 raw decoder error messages per minute, muting it for 10 minutes";
mIsMuteDecoderErrors = true;
mTimeWhenMuted = std::chrono::system_clock::now();
}
}

if (!(err == kOK || err == kOK_NO_PAYLOAD)) {
//TODO handle severe errors
//TODO: probably careful conversion of decoder errors to Fitter errors?
mOutputHWErrors.emplace_back(25, mod, 0, 0, err); //assign general RDH errors to non-existing ccId 25 and dilogic = mod
mOutputHWErrors.emplace_back(-1, mod, 0, 0, err); //assign general RDH errors to non-existing ccId -1 and dilogic = mod
}

std::shared_ptr<std::vector<o2::cpv::Digit>> currentDigitContainer;
Expand Down