Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Heppy upstream bugfixes and crab cfg examples #264

Merged
merged 10 commits into from
Feb 18, 2015
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def declareVariables(self,setup):
tree = self.tree
self.declareCoreVariables(tree, isMC)

if not hasattr(self.cfg_ana,"ignoreAnalyzerBookings") or not self.cfg_ana.ignoreAnalyzerBooking :
if not hasattr(self.cfg_ana,"ignoreAnalyzerBookings") or not self.cfg_ana.ignoreAnalyzerBookings :
#import variables declared by the analyzers
if hasattr(setup,"globalVariables"):
self.globalVariables+=setup.globalVariables
Expand Down Expand Up @@ -137,6 +137,9 @@ def fillCoreVariables(self, tr, event, isMC):
tr.vfill('pdfWeight_%s' % pdf, event.pdfWeights[pdf])

def process(self, event):
if hasattr(self.cfg_ana,"filter") :
if not self.cfg_ana.filter(event) :
return True #do not stop processing, just filter myself
self.readCollections( event.input)
self.fillTree(event)

Expand Down
11 changes: 6 additions & 5 deletions PhysicsTools/Heppy/python/analyzers/core/TreeAnalyzerNumpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@ class TreeAnalyzerNumpy( Analyzer ):

def __init__(self, cfg_ana, cfg_comp, looperName):
super(TreeAnalyzerNumpy,self).__init__(cfg_ana, cfg_comp, looperName)
self.outservicename = "outputfile"
if hasattr(cfg_ana,"outservicename") :
self.outservicename = cfg_ana.outservicename
self.outservicename = getattr(cfg_ana,"outservicename","outputfile")
self.treename = getattr(cfg_ana,"treename","tree")


def beginLoop(self, setup) :
super(TreeAnalyzerNumpy, self).beginLoop(setup)
print setup.services
if self.outservicename in setup.services:
print "Using outputfile given in", self.outservicename
self.file = setup.services[self.outservicename].file
Expand All @@ -28,7 +26,10 @@ def beginLoop(self, setup) :
isCompressed = self.cfg_ana.isCompressed if hasattr(self.cfg_ana,'isCompressed') else 1
print 'Compression', isCompressed
self.file = TFile( fileName, 'recreate', '', isCompressed )
self.tree = Tree('tree', self.name)
self.file.cd()
if self.file.Get(self.treename) :
raise RuntimeError, "You are booking two Trees with the same name in the same file"
self.tree = Tree(self.treename, self.name)
self.tree.setDefaultFloatType(getattr(self.cfg_ana, 'defaultFloatType','D')); # or 'F'
self.declareVariables(setup)

Expand Down
1 change: 1 addition & 0 deletions PhysicsTools/Heppy/test/crab/heppy_config.py
30 changes: 30 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from WMCore.Configuration import Configuration
config = Configuration()

config.section_("General")
config.General.requestName = 'CRAB_HEPPY_test_2'
config.General.workArea = 'crab_projects_test_2'

config.section_("JobType")
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'heppy_crab_fake_pset.py'
config.JobType.scriptExe = 'heppy_crab_script.sh'
config.JobType.inputFiles = ['heppy_config.py','heppy_crab_script.py']
config.JobType.outputFiles = ['tree.root']

config.section_("Data")
config.Data.inputDataset = '/WH_HToBB_WToLNu_M-125_13TeV_powheg-herwigpp/Phys14DR-PU40bx25_PHYS14_25_V1-v1/MINIAODSIM'
config.Data.inputDBS = 'global'
config.Data.splitting = 'FileBased'
config.Data.unitsPerJob = 1
config.Data.outLFN = '/store/user/arizzi/CRABHeppyTest1/'
config.Data.publication = True
config.Data.publishDataName = 'CRAB_HEPPY_Test1'

config.section_("Site")
config.Site.storageSite = "T2_IT_Rome"

#if you uncomment this you have to change also
#the heppy_crab_script.py uncommenting the line
# #crabFiles[i]="root://cms-xrd-global.cern.ch/"+crabFiles[i]
#config.Data.ignoreLocality = True
4 changes: 4 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_fake_pset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('FAKE')
process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring())
process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(10))
63 changes: 63 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python
import os
# probably easier to fetch everything without subdirs, but that's up to user preferences
#import PhysicsTools.HeppyCore.framework.config as cfg
#cfg.Analyzer.nosubdir=True

import PSet
import sys
import re
print "ARGV:",sys.argv
JobNumber=sys.argv[1]
crabFiles=PSet.process.source.fileNames
print crabFiles
firstInput = crabFiles[0]
print "--------------- using edmFileUtil to convert PFN to LFN -------------------------"
for i in xrange(0,len(crabFiles)) :
pfn=os.popen("edmFileUtil -d %s"%(crabFiles[i])).read()
pfn=re.sub("\n","",pfn)
print crabFiles[i],"->",pfn
crabFiles[i]=pfn
#crabFiles[i]="root://cms-xrd-global.cern.ch/"+crabFiles[i]

import imp
handle = open("heppy_config.py", 'r')
cfo = imp.load_source("heppy_config", "heppy_config.py", handle)
config = cfo.config
handle.close()

#replace files with crab ones, no splitting beyond what crab give us
config.components[0].files=crabFiles

#Use a simple self configured looper so that we know where the output goes
from PhysicsTools.HeppyCore.framework.looper import Looper
looper = Looper( 'Output', config, nPrint = 1)
looper.loop()
looper.write()

#place the file in the main folder
os.rename("Output/tree.root", "tree.root")

#create bare minimum FJR
fwkreport='''
<FrameworkJobReport>
<ReadBranches>
</ReadBranches>
<PerformanceReport>
<PerformanceSummary Metric="StorageStatistics">
<Metric Name="Parameter-untracked-bool-enabled" Value="true"/>
<Metric Name="Parameter-untracked-bool-stats" Value="true"/>
<Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>
<Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>
<Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>
<Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>
</PerformanceSummary>
</PerformanceReport>

<GeneratorInfo>
</GeneratorInfo>
</FrameworkJobReport>
'''

f1=open('./FrameworkJobReport.xml', 'w+')
f1.write(fwkreport)
13 changes: 13 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
echo "================== START OF HEPPY CRAB SCRIPT========================="
echo "Unpacking libs"
rm -rf $CMSSW_BASE/lib/
rm -rf $CMSSW_BASE/src/
rm -rf $CMSSW_BASE/module/
rm -rf $CMSSW_BASE/python/
mv lib $CMSSW_BASE/lib
mv src $CMSSW_BASE/src
mv module $CMSSW_BASE/module
mv python $CMSSW_BASE/python
echo "Running of Heppy"
python heppy_crab_script.py $1
echo "============= END OF HEPPY CRAB SCRIPT ========================="
168 changes: 168 additions & 0 deletions PhysicsTools/Heppy/test/example_autofill_multipleTrees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#! /usr/bin/env python
# This example shows how to have multiple Tree either in the same file on in different file.
# In particular here we create a second tree producer containing only information and then,
# cloning it in two copies, we store it both in the same file as the main tree and in separate file

import ROOT
import PhysicsTools.HeppyCore.framework.config as cfg
# avoid creating subdirs, in case subdirs are wanted the treeProducer should have different names (set name="blabla" in the config)
cfg.Analyzer.nosubdir=True

# The content of the output tree is defined here
# the definitions of the NtupleObjects are located under PhysicsTools/Heppy/pythonanalyzers/objects/autophobj.py

from PhysicsTools.Heppy.analyzers.core.AutoFillTreeProducer import *
treeProducer= cfg.Analyzer(
class_object=AutoFillTreeProducer,
verbose=False,
vectorTree = True,
#here the list of simple event variables (floats, int) can be specified
globalVariables = [
NTupleVariable("rho", lambda ev: ev.rho, float, help="jets rho"),
],
#here one can specify compound objects
globalObjects = {
"met" : NTupleObject("met", metType, help="PF E_{T}^{miss}, after default type 1 corrections"),
},
collections = {
#The following would just store the electrons and muons from miniaod without any selection or cleaning
# only the basice particle information is saved
#"slimmedMuons" : ( AutoHandle( ("slimmedMuons",), "std::vector<pat::Muon>" ),
# NTupleCollection("mu", particleType, 4, help="patMuons, directly from MINIAOD") ),
#"slimmedElectron" : ( AutoHandle( ("slimmedElectrons",), "std::vector<pat::Electron>" ),
# NTupleCollection("ele", particleType, 4, help="patElectron, directly from MINIAOD") ),

#standard dumping of objects
"selectedLeptons" : NTupleCollection("leptons", leptonType, 8, help="Leptons after the preselection"),
"selectedTaus" : NTupleCollection("TauGood", tauType, 3, help="Taus after the preselection"),
"cleanJets" : NTupleCollection("Jet", jetType, 8, help="Cental jets after full selection and cleaning, sorted by b-tag"),
#dump of gen objects
"gentopquarks" : NTupleCollection("GenTop", genParticleType, 2, help="Generated top quarks from hard scattering"),
"genbquarks" : NTupleCollection("GenBQuark", genParticleType, 2, help="Generated bottom quarks from top quark decays"),
"genwzquarks" : NTupleCollection("GenQuark", genParticleType, 6, help="Generated quarks from W/Z decays"),
"genleps" : NTupleCollection("GenLep", genParticleType, 6, help="Generated leptons from W/Z decays"),
"gentauleps" : NTupleCollection("GenLepFromTau", genParticleType, 6, help="Generated leptons from decays of taus from W/Z/h decays"),

}
)

#make a light weight dump containing only generator information
treeProducer2= cfg.Analyzer(
treename="genonly",
ignoreAnalyzerBookings=True, #we do not want trigger bits here or any other central booking
class_object=AutoFillTreeProducer,
verbose=False,
vectorTree = True,
collections = {
#dump of gen objects
"gentopquarks" : NTupleCollection("GenTop", genParticleType, 2, help="Generated top quarks from hard scattering"),
"genbquarks" : NTupleCollection("GenBQuark", genParticleType, 2, help="Generated bottom quarks from top quark decays"),
"genwzquarks" : NTupleCollection("GenQuark", genParticleType, 6, help="Generated quarks from W/Z decays"),
"genleps" : NTupleCollection("GenLep", genParticleType, 6, help="Generated leptons from W/Z decays"),
"gentauleps" : NTupleCollection("GenLepFromTau", genParticleType, 6, help="Generated leptons from decays of taus from W/Z/h decays"),

}
)

#create a copy of tree producer with the difference that it stores it in a separate file
from copy import deepcopy
treeProducer3 = deepcopy(treeProducer2)
treeProducer3.filter = lambda ev : len(getattr(ev,"genbquarks",[])) > 0 # select only events with b-quarks
treeProducer3.outservicename="genonlyfile"



# Import standard analyzers and take their default config
from PhysicsTools.Heppy.analyzers.objects.LeptonAnalyzer import LeptonAnalyzer
LepAna = LeptonAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.VertexAnalyzer import VertexAnalyzer
VertexAna = VertexAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.PhotonAnalyzer import PhotonAnalyzer
PhoAna = PhotonAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.TauAnalyzer import TauAnalyzer
TauAna = TauAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.JetAnalyzer import JetAnalyzer
JetAna = JetAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.gen.LHEAnalyzer import LHEAnalyzer
LHEAna = LHEAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.gen.GeneratorAnalyzer import GeneratorAnalyzer
GenAna = GeneratorAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.METAnalyzer import METAnalyzer
METAna = METAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.core.PileUpAnalyzer import PileUpAnalyzer
PUAna = PileUpAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.core.TriggerBitAnalyzer import TriggerBitAnalyzer
FlagsAna = TriggerBitAnalyzer.defaultEventFlagsConfig

# Configure trigger bit analyzer
from PhysicsTools.Heppy.analyzers.core.TriggerBitAnalyzer import TriggerBitAnalyzer
TrigAna= cfg.Analyzer(
verbose=False,
class_object=TriggerBitAnalyzer,
#grouping several paths into a single flag
# v* can be used to ignore the version of a path
triggerBits={
'ELE':["HLT_Ele23_Ele12_CaloId_TrackId_Iso_v*","HLT_Ele32_eta2p1_WP85_Gsf_v*","HLT_Ele32_eta2p1_WP85_Gsf_v*"],
'MU': ["HLT_Mu17_TrkIsoVVL_TkMu8_TrkIsoVVL_v*","HLT_Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_v*","HLT_IsoTkMu24_eta2p1_IterTrk02_v*","HLT_IsoTkMu24_IterTrk02_v*"],
},
# processName='HLT',
# outprefix='HLT'
#setting 'unrollbits' to true will not only store the OR for each set of trigger bits but also the individual bits
#caveat: this does not unroll the version numbers
unrollbits=True
)



#replace some parameters
LepAna.loose_muon_pt = 10

sequence = [LHEAna,FlagsAna, GenAna, PUAna,TrigAna,VertexAna,LepAna,TauAna,PhoAna,JetAna,METAna,treeProducer,treeProducer2,treeProducer3]

#use tfile service to provide a single TFile to all modules where they
#can write any root object. If the name is 'outputfile' or the one specified in treeProducer
#also the treeProducer uses this file
from PhysicsTools.HeppyCore.framework.services.tfile import TFileService
output_service = cfg.Service(
TFileService,
'outputfile',
name="outputfile",
fname='tree.root',
option='recreate'
)
output_service2= cfg.Service(
TFileService,
'genonlyfile',
name="genonlyfile",
fname='treegen.root',
option='recreate'
)


# the following two lines are just for automatic testing
# they are not needed for running on your own samples
from PhysicsTools.Heppy.utils.miniAodFiles import miniAodFiles
testfiles=miniAodFiles()
print "Running on test file %s" % testfiles

sample = cfg.MCComponent(
#specify the file you want to run on
# files = ["/scratch/arizzi/Hbb/CMSSW_7_2_2_patch2/src/VHbbAnalysis/Heppy/test/ZLL-8A345C56-6665-E411-9C25-1CC1DE04DF20.root"],
files = testfiles,
name="SingleSample", isMC=True,isEmbed=False
)

# the following is declared in case this cfg is used in input to the heppy.py script
from PhysicsTools.HeppyCore.framework.eventsfwlite import Events
selectedComponents = [sample]
config = cfg.Config( components = selectedComponents,
sequence = sequence,
services = [output_service,output_service2],
events_class = Events)

# and the following runs the process directly if running as with python filename.py
if __name__ == '__main__':
from PhysicsTools.HeppyCore.framework.looper import Looper
looper = Looper( 'Loop', config, nPrint = 5,nEvents=300)
looper.loop()
looper.write()
4 changes: 2 additions & 2 deletions PhysicsTools/Heppy/test/example_autofill_wCmsRun.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@

sample = cfg.Component(
#specify the file you want to run on
files = ["/scratch/arizzi/Hbb/CMSSW_7_2_2_patch2/src/VHbbAnalysis/Heppy/test/ZLL-8A345C56-6665-E411-9C25-1CC1DE04DF20.root"],
# files = testfiles,
# files = ["/scratch/arizzi/Hbb/CMSSW_7_2_2_patch2/src/VHbbAnalysis/Heppy/test/ZLL-8A345C56-6665-E411-9C25-1CC1DE04DF20.root"],
files = testfiles,
name="SingleSample", isMC=False,isEmbed=False
)

Expand Down
1 change: 1 addition & 0 deletions PhysicsTools/HeppyCore/python/framework/heppy.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def main( options, args ):

parser.add_option("-N", "--nevents",
dest="nevents",
type="int",
help="number of events to process",
default=None)
parser.add_option("-p", "--nprint",
Expand Down
8 changes: 7 additions & 1 deletion PhysicsTools/HeppyCore/python/framework/looper.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,13 @@ def __init__( self, name,
if len(self.cfg_comp.files)==0:
errmsg = 'please provide at least an input file in the files attribute of this component\n' + str(self.cfg_comp)
raise ValueError( errmsg )
self.events = config.events_class(self.cfg_comp.files, tree_name)
if hasattr(config,"preprocessor") and config.preprocessor is not None :
self.cfg_comp = config.preprocessor.run(self.cfg_comp,self.outDir,firstEvent,nEvents)
if hasattr(self.cfg_comp,"options"):
print self.cfg_comp.files,self.cfg_comp.options
self.events = config.events_class(self.cfg_comp.files, tree_name,options=self.cfg_comp.options)
else :
self.events = config.events_class(self.cfg_comp.files, tree_name)
if hasattr(self.cfg_comp, 'fineSplit'):
fineSplitIndex, fineSplitFactor = self.cfg_comp.fineSplit
if fineSplitFactor > 1:
Expand Down