Skip to content

Commit

Permalink
New RooAbsData::split function for smaller datasets in categories
Browse files Browse the repository at this point in the history
  • Loading branch information
gartrog committed Dec 10, 2021
1 parent 30c8781 commit ceb4559
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 2 deletions.
4 changes: 4 additions & 0 deletions roofit/roofitcore/inc/RooAbsData.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class RooAbsReal ;
class RooRealVar;
class RooAbsRealLValue;
class RooAbsCategory ;
class RooSimultaneous ;
class RooAbsCategoryLValue;
class Roo1DTable ;
class RooPlot;
Expand Down Expand Up @@ -225,6 +226,9 @@ class RooAbsData : public TNamed, public RooPrintable {
// Split a dataset by a category
virtual TList* split(const RooAbsCategory& splitCat, Bool_t createEmptyDataSets=kFALSE) const ;

// Split a dataset by categories of a RooSimultaneous
virtual TList* split(const RooSimultaneous& simpdf, Bool_t createEmptyDataSets=kFALSE) const ;

// Fast splitting for SimMaster setData
Bool_t canSplitFast() const ;
RooAbsData* getSimData(const char* idxstate) ;
Expand Down
111 changes: 111 additions & 0 deletions roofit/roofitcore/src/RooAbsData.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ observable snapshots are stored in the dataset.
#include "RooCategory.h"
#include "RooTrace.h"
#include "RooUniformBinning.h"
#include "RooSimultaneous.h"

#include "RooRealVar.h"
#include "RooGlobalFunc.h"
Expand Down Expand Up @@ -1596,6 +1597,116 @@ TList* RooAbsData::split(const RooAbsCategory& splitCat, Bool_t createEmptyDataS
return dsetList;
}

////////////////////////////////////////////////////////////////////////////////
/// Split dataset into subsets based on the categorisation of the RooSimultaneous
/// A TList of RooDataSets is returned in which each RooDataSet is named
/// after the state name of splitCat of which it contains the dataset subset.
/// The observables splitCat itself is no longer present in the sub datasets, as well as the
/// observables of the other categories.
/// If createEmptyDataSets is kFALSE (default) this method only creates datasets for states
/// which have at least one entry The caller takes ownership of the returned list and its contents

TList* RooAbsData::split(const RooSimultaneous& simpdf, Bool_t createEmptyDataSets) const
{
RooAbsCategoryLValue& splitCat = const_cast<RooAbsCategoryLValue&>(simpdf.indexCat());

// Sanity check
if (!splitCat.dependsOn(*get())) {
coutE(InputArguments) << "RooTreeData::split(" << GetName() << ") ERROR category " << splitCat.GetName()
<< " doesn't depend on any variable in this dataset" << endl ;
return 0 ;
}

// Clone splitting category and attach to self
RooAbsCategory* cloneCat =0;
RooArgSet* cloneSet = 0;
if (splitCat.isDerived()) {
cloneSet = (RooArgSet*) RooArgSet(splitCat).snapshot(kTRUE) ;
if (!cloneSet) {
coutE(InputArguments) << "RooTreeData::split(" << GetName() << ") Couldn't deep-clone splitting category, abort." << endl ;
return 0 ;
}
cloneCat = (RooAbsCategory*) cloneSet->find(splitCat.GetName()) ;
cloneCat->attachDataSet(*this) ;
} else {
cloneCat = dynamic_cast<RooAbsCategory*>(get()->find(splitCat.GetName())) ;
if (!cloneCat) {
coutE(InputArguments) << "RooTreeData::split(" << GetName() << ") ERROR category " << splitCat.GetName()
<< " is fundamental and does not appear in this dataset" << endl ;
return 0 ;
}
}

// Split a dataset in a series of subsets, each corresponding
// to a state of splitCat
TList* dsetList = new TList ;

// Construct set of variables to be included in split sets = full set - split category
RooArgSet subsetVars(*get()) ;
if (splitCat.isDerived()) {
RooArgSet* vars = splitCat.getVariables() ;
subsetVars.remove(*vars,kTRUE,kTRUE) ;
delete vars ;
} else {
subsetVars.remove(splitCat,kTRUE,kTRUE) ;
}

// Add weight variable explicitly if dataset has weights, but no top-level weight
// variable exists (can happen with composite datastores)
Bool_t addWV(kFALSE) ;
RooRealVar newweight("weight","weight",-1e9,1e9) ;
if (isWeighted() && !IsA()->InheritsFrom(RooDataHist::Class())) {
subsetVars.add(newweight) ;
addWV = kTRUE ;
}

// By default, remove all category observables from the subdatasets
RooArgSet allObservables;
for( const auto& catPair : splitCat) {
const auto& catPdf = simpdf.getPdf(catPair.first.c_str());
allObservables.add(*(catPdf->getObservables(this)));
}
subsetVars.remove(allObservables, kTRUE, kTRUE);


// If createEmptyDataSets is true, prepopulate with empty sets corresponding to all states
if (createEmptyDataSets) {
for (const auto& nameIdx : *cloneCat) {
// Add in the subset only the observables corresponding to this category
RooArgSet subsetVarsCat(subsetVars);
const auto& catPdf = simpdf.getPdf(nameIdx.first.c_str());
subsetVarsCat.add(*(catPdf->getObservables(this)));

RooAbsData* subset = emptyClone(nameIdx.first.c_str(), nameIdx.first.c_str(), &subsetVarsCat,(addWV?"weight":0)) ;
dsetList->Add((RooAbsArg*)subset) ;
}
}


// Loop over dataset and copy event to matching subset
const bool propWeightSquared = isWeighted();
for (Int_t i = 0; i < numEntries(); ++i) {
const RooArgSet* row = get(i);
RooAbsData* subset = (RooAbsData*) dsetList->FindObject(cloneCat->getCurrentLabel());
if (!subset) {
// Add in the subset only the observables corresponding to this category
RooArgSet subsetVarsCat(subsetVars);
const auto& catPdf = simpdf.getPdf(cloneCat->getCurrentLabel());
subsetVarsCat.add(*(catPdf->getObservables(this)));
subset = emptyClone(cloneCat->getCurrentLabel(),cloneCat->getCurrentLabel(),&subsetVarsCat,(addWV?"weight":0));
dsetList->Add((RooAbsArg*)subset);
}
if (!propWeightSquared) {
subset->add(*row, weight());
} else {
subset->add(*row, weight(), weightSquared());
}
}

delete cloneSet;
return dsetList;
}

////////////////////////////////////////////////////////////////////////////////
/// Plot dataset on specified frame.
///
Expand Down
2 changes: 1 addition & 1 deletion roofit/roofitcore/src/RooAbsProxy.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ RooAbsProxy::RooAbsProxy(const char* /*name*/, const RooAbsProxy& other) :

void RooAbsProxy::changeNormSet(const RooArgSet* newNormSet)
{
_nset = (RooArgSet*) newNormSet ;
_nset = const_cast<RooArgSet*>(newNormSet) ;
}


Expand Down
2 changes: 1 addition & 1 deletion roofit/roofitcore/src/RooAbsTestStatistic.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ void RooAbsTestStatistic::initSimMode(RooSimultaneous* simpdf, RooAbsData* data,
RooAbsCategoryLValue& simCat = const_cast<RooAbsCategoryLValue&>(simpdf->indexCat());

TString simCatName(simCat.GetName());
TList* dsetList = const_cast<RooAbsData*>(data)->split(simCat,processEmptyDataSets());
TList* dsetList = const_cast<RooAbsData*>(data)->split(*simpdf,processEmptyDataSets());
if (!dsetList) {
coutE(Fitting) << "RooAbsTestStatistic::initSimMode(" << GetName() << ") ERROR: index category of simultaneous pdf is missing in dataset, aborting" << endl;
throw std::runtime_error("RooAbsTestStatistic::initSimMode() ERROR, index category of simultaneous pdf is missing in dataset, aborting");
Expand Down
4 changes: 4 additions & 0 deletions roofit/roofitcore/src/RooNLLVar.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,10 @@ Double_t RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEv

_dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize, (_binnedPdf?kFALSE:kTRUE) ) ;

//_dataClone->Print();
//_dataClone->Print("V");
//_dataClone->store()->dump();



// If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
Expand Down
1 change: 1 addition & 0 deletions roofit/roofitcore/src/RooVectorDataStore.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ const RooArgSet* RooVectorDataStore::get(Int_t index) const
_currentWeightIndex = index;

if (_cache) {
//_cache->dump();
_cache->get(index) ;
}

Expand Down

0 comments on commit ceb4559

Please sign in to comment.