Skip to content

Commit

Permalink
Merge pull request #264 from gpetruc/heppy_pr22
Browse files Browse the repository at this point in the history
Heppy upstream bugfixes and crab cfg examples
  • Loading branch information
gpetruc committed Feb 18, 2015
2 parents 8226035 + 90d8f6a commit 18328e1
Show file tree
Hide file tree
Showing 11 changed files with 299 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def declareVariables(self,setup):
tree = self.tree
self.declareCoreVariables(tree, isMC)

if not hasattr(self.cfg_ana,"ignoreAnalyzerBookings") or not self.cfg_ana.ignoreAnalyzerBooking :
if not hasattr(self.cfg_ana,"ignoreAnalyzerBookings") or not self.cfg_ana.ignoreAnalyzerBookings :
#import variables declared by the analyzers
if hasattr(setup,"globalVariables"):
self.globalVariables+=setup.globalVariables
Expand Down Expand Up @@ -137,6 +137,9 @@ def fillCoreVariables(self, tr, event, isMC):
tr.vfill('pdfWeight_%s' % pdf, event.pdfWeights[pdf])

def process(self, event):
if hasattr(self.cfg_ana,"filter") :
if not self.cfg_ana.filter(event) :
return True #do not stop processing, just filter myself
self.readCollections( event.input)
self.fillTree(event)

Expand Down
11 changes: 6 additions & 5 deletions PhysicsTools/Heppy/python/analyzers/core/TreeAnalyzerNumpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,12 @@ class TreeAnalyzerNumpy( Analyzer ):

def __init__(self, cfg_ana, cfg_comp, looperName):
super(TreeAnalyzerNumpy,self).__init__(cfg_ana, cfg_comp, looperName)
self.outservicename = "outputfile"
if hasattr(cfg_ana,"outservicename") :
self.outservicename = cfg_ana.outservicename
self.outservicename = getattr(cfg_ana,"outservicename","outputfile")
self.treename = getattr(cfg_ana,"treename","tree")


def beginLoop(self, setup) :
super(TreeAnalyzerNumpy, self).beginLoop(setup)
print setup.services
if self.outservicename in setup.services:
print "Using outputfile given in", self.outservicename
self.file = setup.services[self.outservicename].file
Expand All @@ -28,7 +26,10 @@ def beginLoop(self, setup) :
isCompressed = self.cfg_ana.isCompressed if hasattr(self.cfg_ana,'isCompressed') else 1
print 'Compression', isCompressed
self.file = TFile( fileName, 'recreate', '', isCompressed )
self.tree = Tree('tree', self.name)
self.file.cd()
if self.file.Get(self.treename) :
raise RuntimeError, "You are booking two Trees with the same name in the same file"
self.tree = Tree(self.treename, self.name)
self.tree.setDefaultFloatType(getattr(self.cfg_ana, 'defaultFloatType','D')); # or 'F'
self.declareVariables(setup)

Expand Down
1 change: 1 addition & 0 deletions PhysicsTools/Heppy/test/crab/heppy_config.py
30 changes: 30 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
from WMCore.Configuration import Configuration
config = Configuration()

config.section_("General")
config.General.requestName = 'CRAB_HEPPY_test_2'
config.General.workArea = 'crab_projects_test_2'

config.section_("JobType")
config.JobType.pluginName = 'Analysis'
config.JobType.psetName = 'heppy_crab_fake_pset.py'
config.JobType.scriptExe = 'heppy_crab_script.sh'
config.JobType.inputFiles = ['heppy_config.py','heppy_crab_script.py']
config.JobType.outputFiles = ['tree.root']

config.section_("Data")
config.Data.inputDataset = '/WH_HToBB_WToLNu_M-125_13TeV_powheg-herwigpp/Phys14DR-PU40bx25_PHYS14_25_V1-v1/MINIAODSIM'
config.Data.inputDBS = 'global'
config.Data.splitting = 'FileBased'
config.Data.unitsPerJob = 1
config.Data.outLFN = '/store/user/arizzi/CRABHeppyTest1/'
config.Data.publication = True
config.Data.publishDataName = 'CRAB_HEPPY_Test1'

config.section_("Site")
config.Site.storageSite = "T2_IT_Rome"

#if you uncomment this you have to change also
#the heppy_crab_script.py uncommenting the line
# #crabFiles[i]="root://cms-xrd-global.cern.ch/"+crabFiles[i]
#config.Data.ignoreLocality = True
4 changes: 4 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_fake_pset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import FWCore.ParameterSet.Config as cms
process = cms.Process('FAKE')
process.source = cms.Source("PoolSource", fileNames = cms.untracked.vstring())
process.maxEvents = cms.untracked.PSet(input = cms.untracked.int32(10))
63 changes: 63 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/usr/bin/env python
import os
# probably easier to fetch everything without subdirs, but that's up to user preferences
#import PhysicsTools.HeppyCore.framework.config as cfg
#cfg.Analyzer.nosubdir=True

import PSet
import sys
import re
print "ARGV:",sys.argv
JobNumber=sys.argv[1]
crabFiles=PSet.process.source.fileNames
print crabFiles
firstInput = crabFiles[0]
print "--------------- using edmFileUtil to convert PFN to LFN -------------------------"
for i in xrange(0,len(crabFiles)) :
pfn=os.popen("edmFileUtil -d %s"%(crabFiles[i])).read()
pfn=re.sub("\n","",pfn)
print crabFiles[i],"->",pfn
crabFiles[i]=pfn
#crabFiles[i]="root://cms-xrd-global.cern.ch/"+crabFiles[i]

import imp
handle = open("heppy_config.py", 'r')
cfo = imp.load_source("heppy_config", "heppy_config.py", handle)
config = cfo.config
handle.close()

#replace files with crab ones, no splitting beyond what crab give us
config.components[0].files=crabFiles

#Use a simple self configured looper so that we know where the output goes
from PhysicsTools.HeppyCore.framework.looper import Looper
looper = Looper( 'Output', config, nPrint = 1)
looper.loop()
looper.write()

#place the file in the main folder
os.rename("Output/tree.root", "tree.root")

#create bare minimum FJR
fwkreport='''
<FrameworkJobReport>
<ReadBranches>
</ReadBranches>
<PerformanceReport>
<PerformanceSummary Metric="StorageStatistics">
<Metric Name="Parameter-untracked-bool-enabled" Value="true"/>
<Metric Name="Parameter-untracked-bool-stats" Value="true"/>
<Metric Name="Parameter-untracked-string-cacheHint" Value="application-only"/>
<Metric Name="Parameter-untracked-string-readHint" Value="auto-detect"/>
<Metric Name="ROOT-tfile-read-totalMegabytes" Value="0"/>
<Metric Name="ROOT-tfile-write-totalMegabytes" Value="0"/>
</PerformanceSummary>
</PerformanceReport>
<GeneratorInfo>
</GeneratorInfo>
</FrameworkJobReport>
'''

f1=open('./FrameworkJobReport.xml', 'w+')
f1.write(fwkreport)
13 changes: 13 additions & 0 deletions PhysicsTools/Heppy/test/crab/heppy_crab_script.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
echo "================== START OF HEPPY CRAB SCRIPT========================="
echo "Unpacking libs"
rm -rf $CMSSW_BASE/lib/
rm -rf $CMSSW_BASE/src/
rm -rf $CMSSW_BASE/module/
rm -rf $CMSSW_BASE/python/
mv lib $CMSSW_BASE/lib
mv src $CMSSW_BASE/src
mv module $CMSSW_BASE/module
mv python $CMSSW_BASE/python
echo "Running of Heppy"
python heppy_crab_script.py $1
echo "============= END OF HEPPY CRAB SCRIPT ========================="
168 changes: 168 additions & 0 deletions PhysicsTools/Heppy/test/example_autofill_multipleTrees.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#! /usr/bin/env python
# This example shows how to have multiple Tree either in the same file on in different file.
# In particular here we create a second tree producer containing only information and then,
# cloning it in two copies, we store it both in the same file as the main tree and in separate file

import ROOT
import PhysicsTools.HeppyCore.framework.config as cfg
# avoid creating subdirs, in case subdirs are wanted the treeProducer should have different names (set name="blabla" in the config)
cfg.Analyzer.nosubdir=True

# The content of the output tree is defined here
# the definitions of the NtupleObjects are located under PhysicsTools/Heppy/pythonanalyzers/objects/autophobj.py

from PhysicsTools.Heppy.analyzers.core.AutoFillTreeProducer import *
treeProducer= cfg.Analyzer(
class_object=AutoFillTreeProducer,
verbose=False,
vectorTree = True,
#here the list of simple event variables (floats, int) can be specified
globalVariables = [
NTupleVariable("rho", lambda ev: ev.rho, float, help="jets rho"),
],
#here one can specify compound objects
globalObjects = {
"met" : NTupleObject("met", metType, help="PF E_{T}^{miss}, after default type 1 corrections"),
},
collections = {
#The following would just store the electrons and muons from miniaod without any selection or cleaning
# only the basice particle information is saved
#"slimmedMuons" : ( AutoHandle( ("slimmedMuons",), "std::vector<pat::Muon>" ),
# NTupleCollection("mu", particleType, 4, help="patMuons, directly from MINIAOD") ),
#"slimmedElectron" : ( AutoHandle( ("slimmedElectrons",), "std::vector<pat::Electron>" ),
# NTupleCollection("ele", particleType, 4, help="patElectron, directly from MINIAOD") ),

#standard dumping of objects
"selectedLeptons" : NTupleCollection("leptons", leptonType, 8, help="Leptons after the preselection"),
"selectedTaus" : NTupleCollection("TauGood", tauType, 3, help="Taus after the preselection"),
"cleanJets" : NTupleCollection("Jet", jetType, 8, help="Cental jets after full selection and cleaning, sorted by b-tag"),
#dump of gen objects
"gentopquarks" : NTupleCollection("GenTop", genParticleType, 2, help="Generated top quarks from hard scattering"),
"genbquarks" : NTupleCollection("GenBQuark", genParticleType, 2, help="Generated bottom quarks from top quark decays"),
"genwzquarks" : NTupleCollection("GenQuark", genParticleType, 6, help="Generated quarks from W/Z decays"),
"genleps" : NTupleCollection("GenLep", genParticleType, 6, help="Generated leptons from W/Z decays"),
"gentauleps" : NTupleCollection("GenLepFromTau", genParticleType, 6, help="Generated leptons from decays of taus from W/Z/h decays"),

}
)

#make a light weight dump containing only generator information
treeProducer2= cfg.Analyzer(
treename="genonly",
ignoreAnalyzerBookings=True, #we do not want trigger bits here or any other central booking
class_object=AutoFillTreeProducer,
verbose=False,
vectorTree = True,
collections = {
#dump of gen objects
"gentopquarks" : NTupleCollection("GenTop", genParticleType, 2, help="Generated top quarks from hard scattering"),
"genbquarks" : NTupleCollection("GenBQuark", genParticleType, 2, help="Generated bottom quarks from top quark decays"),
"genwzquarks" : NTupleCollection("GenQuark", genParticleType, 6, help="Generated quarks from W/Z decays"),
"genleps" : NTupleCollection("GenLep", genParticleType, 6, help="Generated leptons from W/Z decays"),
"gentauleps" : NTupleCollection("GenLepFromTau", genParticleType, 6, help="Generated leptons from decays of taus from W/Z/h decays"),

}
)

#create a copy of tree producer with the difference that it stores it in a separate file
from copy import deepcopy
treeProducer3 = deepcopy(treeProducer2)
treeProducer3.filter = lambda ev : len(getattr(ev,"genbquarks",[])) > 0 # select only events with b-quarks
treeProducer3.outservicename="genonlyfile"



# Import standard analyzers and take their default config
from PhysicsTools.Heppy.analyzers.objects.LeptonAnalyzer import LeptonAnalyzer
LepAna = LeptonAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.VertexAnalyzer import VertexAnalyzer
VertexAna = VertexAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.PhotonAnalyzer import PhotonAnalyzer
PhoAna = PhotonAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.TauAnalyzer import TauAnalyzer
TauAna = TauAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.JetAnalyzer import JetAnalyzer
JetAna = JetAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.gen.LHEAnalyzer import LHEAnalyzer
LHEAna = LHEAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.gen.GeneratorAnalyzer import GeneratorAnalyzer
GenAna = GeneratorAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.objects.METAnalyzer import METAnalyzer
METAna = METAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.core.PileUpAnalyzer import PileUpAnalyzer
PUAna = PileUpAnalyzer.defaultConfig
from PhysicsTools.Heppy.analyzers.core.TriggerBitAnalyzer import TriggerBitAnalyzer
FlagsAna = TriggerBitAnalyzer.defaultEventFlagsConfig

# Configure trigger bit analyzer
from PhysicsTools.Heppy.analyzers.core.TriggerBitAnalyzer import TriggerBitAnalyzer
TrigAna= cfg.Analyzer(
verbose=False,
class_object=TriggerBitAnalyzer,
#grouping several paths into a single flag
# v* can be used to ignore the version of a path
triggerBits={
'ELE':["HLT_Ele23_Ele12_CaloId_TrackId_Iso_v*","HLT_Ele32_eta2p1_WP85_Gsf_v*","HLT_Ele32_eta2p1_WP85_Gsf_v*"],
'MU': ["HLT_Mu17_TrkIsoVVL_TkMu8_TrkIsoVVL_v*","HLT_Mu17_TrkIsoVVL_Mu8_TrkIsoVVL_v*","HLT_IsoTkMu24_eta2p1_IterTrk02_v*","HLT_IsoTkMu24_IterTrk02_v*"],
},
# processName='HLT',
# outprefix='HLT'
#setting 'unrollbits' to true will not only store the OR for each set of trigger bits but also the individual bits
#caveat: this does not unroll the version numbers
unrollbits=True
)



#replace some parameters
LepAna.loose_muon_pt = 10

sequence = [LHEAna,FlagsAna, GenAna, PUAna,TrigAna,VertexAna,LepAna,TauAna,PhoAna,JetAna,METAna,treeProducer,treeProducer2,treeProducer3]

#use tfile service to provide a single TFile to all modules where they
#can write any root object. If the name is 'outputfile' or the one specified in treeProducer
#also the treeProducer uses this file
from PhysicsTools.HeppyCore.framework.services.tfile import TFileService
output_service = cfg.Service(
TFileService,
'outputfile',
name="outputfile",
fname='tree.root',
option='recreate'
)
output_service2= cfg.Service(
TFileService,
'genonlyfile',
name="genonlyfile",
fname='treegen.root',
option='recreate'
)


# the following two lines are just for automatic testing
# they are not needed for running on your own samples
from PhysicsTools.Heppy.utils.miniAodFiles import miniAodFiles
testfiles=miniAodFiles()
print "Running on test file %s" % testfiles

sample = cfg.MCComponent(
#specify the file you want to run on
# files = ["/scratch/arizzi/Hbb/CMSSW_7_2_2_patch2/src/VHbbAnalysis/Heppy/test/ZLL-8A345C56-6665-E411-9C25-1CC1DE04DF20.root"],
files = testfiles,
name="SingleSample", isMC=True,isEmbed=False
)

# the following is declared in case this cfg is used in input to the heppy.py script
from PhysicsTools.HeppyCore.framework.eventsfwlite import Events
selectedComponents = [sample]
config = cfg.Config( components = selectedComponents,
sequence = sequence,
services = [output_service,output_service2],
events_class = Events)

# and the following runs the process directly if running as with python filename.py
if __name__ == '__main__':
from PhysicsTools.HeppyCore.framework.looper import Looper
looper = Looper( 'Loop', config, nPrint = 5,nEvents=300)
looper.loop()
looper.write()
4 changes: 2 additions & 2 deletions PhysicsTools/Heppy/test/example_autofill_wCmsRun.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@

sample = cfg.Component(
#specify the file you want to run on
files = ["/scratch/arizzi/Hbb/CMSSW_7_2_2_patch2/src/VHbbAnalysis/Heppy/test/ZLL-8A345C56-6665-E411-9C25-1CC1DE04DF20.root"],
# files = testfiles,
# files = ["/scratch/arizzi/Hbb/CMSSW_7_2_2_patch2/src/VHbbAnalysis/Heppy/test/ZLL-8A345C56-6665-E411-9C25-1CC1DE04DF20.root"],
files = testfiles,
name="SingleSample", isMC=False,isEmbed=False
)

Expand Down
1 change: 1 addition & 0 deletions PhysicsTools/HeppyCore/python/framework/heppy.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ def main( options, args ):

parser.add_option("-N", "--nevents",
dest="nevents",
type="int",
help="number of events to process",
default=None)
parser.add_option("-p", "--nprint",
Expand Down
8 changes: 7 additions & 1 deletion PhysicsTools/HeppyCore/python/framework/looper.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,13 @@ def __init__( self, name,
if len(self.cfg_comp.files)==0:
errmsg = 'please provide at least an input file in the files attribute of this component\n' + str(self.cfg_comp)
raise ValueError( errmsg )
self.events = config.events_class(self.cfg_comp.files, tree_name)
if hasattr(config,"preprocessor") and config.preprocessor is not None :
self.cfg_comp = config.preprocessor.run(self.cfg_comp,self.outDir,firstEvent,nEvents)
if hasattr(self.cfg_comp,"options"):
print self.cfg_comp.files,self.cfg_comp.options
self.events = config.events_class(self.cfg_comp.files, tree_name,options=self.cfg_comp.options)
else :
self.events = config.events_class(self.cfg_comp.files, tree_name)
if hasattr(self.cfg_comp, 'fineSplit'):
fineSplitIndex, fineSplitFactor = self.cfg_comp.fineSplit
if fineSplitFactor > 1:
Expand Down

0 comments on commit 18328e1

Please sign in to comment.