Merge branch 'ImprovedParsing' into ImprovedParsingMerge

cms-sw · Aug 1, 2020 · 35b32c4 · 35b32c4
2 parents 21e251a + 34306be
commit 35b32c4
Show file tree

Hide file tree

Showing 3 changed files with 170 additions and 10 deletions.
diff --git a/GeneratorInterface/LHEInterface/test/createLHEFormatFromROOTFile.py b/GeneratorInterface/LHEInterface/test/createLHEFormatFromROOTFile.py
@@ -0,0 +1,107 @@
+import ROOT
+import sys
+
+
+"""
+Usage 
+python createLHEFormatFromROOTFile.py inputfile outputfile pdgId_particle_to_undo_decay1 pdgId_particle_to_undo_decay2 pdgId_particle_to_undo_decay3 ...
+"""
+
+args = sys.argv[:]
+
+class HEPPart(object):
+  def __init__(self,event, idx):
+    """
+    Class to organize the description of a particle in the LHE event
+    event : whole event information (usually a entry in the input TTree)
+    idx : the index of the particle inside the LHE file
+    """
+    for att in ["pt","eta","phi", "mass","lifetime","pdgId","status","spin","color1", "color2","mother1","mother2","incomingpz"]:
+      setattr(self, att, getattr(event, "LHEPart_"+att)[idx])
+    self.setP4()
+
+  def setP4(self):
+    self.p4 = ROOT.TLorentzVector()
+    if self.status != -1:
+      self.p4.SetPtEtaPhiM(self.pt, self.eta, self.phi, self.mass)
+    else:
+      self.p4.SetPxPyPzE(0.,0.,self.incomingpz, abs(self.incomingpz))
+  def printPart(self):
+    """ Just to print it pretty """
+    return "       {pdg:d} {status:d}    {mother1:d}    {mother2:d}  {color1:d}  {color2:d} {px:e} {py:e} {pz:e} {energy:e} {mass:e} {time:e} {spin:e}\n".format(pdg=self.pdgId,status=self.status, mother1=self.mother1, mother2=self.mother2, color1=self.color1, color2=self.color2, px=self.p4.Px(), py=self.p4.Py(), pz=self.p4.Pz(), energy=self.p4.E(), mass=self.mass, time=self.lifetime, spin=self.spin)
+
+
+class LHEPrinter(object):
+  def __init__(self,theFile,theTree,outputLHE,undoDecays=[], chunkers=-1, prDict={}):
+    """
+    theFile : path to input root file with the whole LHEinformation
+    theTree : number of ttree inside the file, usually "Events"
+    outputLHE: name of output .lhe file
+    undoDecays: pdgId of particles whose decays we want to undo (i.e. W that are decayed with madspin, as the reweighting is called before madspin)
+    chunkers: process by chunks in case we want to later multithread the jobs. Argument is max size (in events) of the chunks
+    prDict  : a dictionary indicating possible mismatchings between the ProcessID in the new gridpack and the old (matching the numbers at generate p p > x y z  @0 in the run card)
+    """
+
+    self.prDict = prDict
+    self.outputLHE = outputLHE
+    self.fil  = ROOT.TFile(theFile,"OPEN")
+    self.tree = self.fil.Get(theTree)
+    self.undoDecays = undoDecays
+    self.baseheader =  "<event nplo=\" {nplo} \" npnlo=\" {npnlo} \">\n"
+    self.baseline1 = " {nparts}      {prid} {weight} {scale} {aqed} {aqcd}\n"
+    self.baseender = "<rwgt>\n</rwgt>\n</event>\n"
+    self.chunkers = chunkers
+
+  def insideLoop(self):
+    """ Loop over all events and process the root file into a plain text LHE file"""
+    totalEvents = self.tree.GetEntries()
+    if self.chunkers == -1 or self.chunkers > totalEvents:
+      self.output = open(self.outputLHE,"w")
+      self.chunkers = totalEvents + 1
+    else:
+      self.output = open(self.outputLHE+"chunk0","w")
+      chunk = 0
+
+    print "Processing %i events, please wait..."%totalEvents
+    iEv = 0
+    pEv = 0
+    chunk = 0
+    for ev in self.tree:
+      iEv += 1
+      pEv += 1
+      if pEv >= self.chunkers:
+        pEv = 0
+        chunk += 1 
+        self.output.close()
+        self.output = open(self.outputLHE+"chunk%i"%chunk,"w")
+      print "...Event %i/%i"%(iEv, totalEvents)
+      self.process(ev)
+
+  def process(self, ev):
+    """First produce the global line like <event nplo=" -1 " npnlo=" 1 ">"""
+    self.output.write(self.baseheader.format(nplo = ord(str(ev.LHE_NpLO)) if ord(str(ev.LHE_NpLO)) != 255  else  -1, npnlo = ord(str(ev.LHE_NpNLO)) if ord(str(ev.LHE_NpNLO)) != 255  else  -1))
+
+    """Then we need to treat the whole thing to undo the madspin decays, update statuses and rewrite particle order"""
+    lhepart = []
+    deletedIndexes = []
+    for i in range(getattr(ev, "nLHEPart")):
+      testPart = HEPPart(ev,i)
+      testPart.mother1 = testPart.mother1 - sum([1*(testPart.mother1 > d) for d in deletedIndexes])
+      testPart.mother2 = testPart.mother2 - sum([1*(testPart.mother2 > d) for d in deletedIndexes])
+      if testPart.mother1 != 0:
+        if abs(lhepart[testPart.mother1-1].pdgId) in self.undoDecays: #If from something that decays after weighting just skip it and update particle indexes
+          deletedIndexes.append(i)
+          continue
+      if abs(testPart.pdgId) in self.undoDecays:
+        testPart.status = 1 
+      lhepart.append(testPart)
+
+    """ Now we can compute properly the number of particles at LHE """
+    self.output.write(self.baseline1.format(nparts=len(lhepart), prid=self.prDict[str(ord(str(ev.LHE_ProcessID)))], weight=ev.LHEWeight_originalXWGTUP, scale=ev.LHE_Scale,aqed=ev.LHE_AlphaQED,aqcd=ev.LHE_AlphaS))
+    """ And save each particle information """
+    for part in lhepart:
+      self.output.write(part.printPart())   
+    self.output.write(self.baseender)
+
+theP = LHEPrinter(args[1],"Events",args[2],undoDecays=[int(i) for i in args[4:]],chunkers=int(args[3]), prDict={str(i):i for i in range(1000)}) #PRDict by default set to not change anything as it is rare to use it 
+theP.insideLoop()
diff --git a/PhysicsTools/NanoAOD/plugins/LHETablesProducer.cc b/PhysicsTools/NanoAOD/plugins/LHETablesProducer.cc
@@ -17,7 +17,8 @@ class LHETablesProducer : public edm::global::EDProducer<> {
       : lheTag_(edm::vector_transform(params.getParameter<std::vector<edm::InputTag>>("lheInfo"),
                                       [this](const edm::InputTag& tag) { return mayConsume<LHEEventProduct>(tag); })),
         precision_(params.getParameter<int>("precision")),
-        storeLHEParticles_(params.getParameter<bool>("storeLHEParticles")) {
+        storeLHEParticles_(params.getParameter<bool>("storeLHEParticles")),
+        storeAllLHEInfo_(params.getParameter<bool>("storeAllLHEInfo")){
     produces<nanoaod::FlatTable>("LHE");
     if (storeLHEParticles_)
       produces<nanoaod::FlatTable>("LHEPart");
@@ -53,25 +54,58 @@ class LHETablesProducer : public edm::global::EDProducer<> {
     double lheHT = 0, lheHTIncoming = 0;
     unsigned int lheNj = 0, lheNb = 0, lheNc = 0, lheNuds = 0, lheNglu = 0;
     double lheVpt = 0;
-
+    double alphaS = 0;
+    double alphaQED=0;
+    double scale  = 0;
+    int    idproc = 0;
     const auto& hepeup = lheProd.hepeup();
     const auto& pup = hepeup.PUP;
     int lep = -1, lepBar = -1, nu = -1, nuBar = -1;
     std::vector<float> vals_pt;
     std::vector<float> vals_eta;
     std::vector<float> vals_phi;
     std::vector<float> vals_mass;
+    std::vector<float> vals_pz;
+    std::vector<float> vals_time;
     std::vector<int> vals_pid;
+    std::vector<int> vals_status;
+    std::vector<int> vals_spin;
+    std::vector<int> vals_col1;
+    std::vector<int> vals_col2;
+    std::vector<int> vals_mother1;
+    std::vector<int> vals_mother2;
+    alphaS = hepeup.AQCDUP;
+    alphaQED = hepeup.AQEDUP; 
+    scale  = hepeup.SCALUP;
+    idproc = hepeup.IDPRUP;
     for (unsigned int i = 0, n = pup.size(); i < n; ++i) {
       int status = hepeup.ISTUP[i];
       int idabs = std::abs(hepeup.IDUP[i]);
-      if (status == 1) {
+      if (status == 1 || status == -1 || storeAllLHEInfo_) {
         TLorentzVector p4(pup[i][0], pup[i][1], pup[i][2], pup[i][3]);  // x,y,z,t
-        vals_pt.push_back(p4.Pt());
-        vals_eta.push_back(p4.Eta());
-        vals_phi.push_back(p4.Phi());
-        vals_mass.push_back(p4.M());
         vals_pid.push_back(hepeup.IDUP[i]);
+        vals_spin.push_back(hepeup.SPINUP[i]);
+        vals_status.push_back(status);
+        if (storeAllLHEInfo_){
+            vals_col1.push_back(hepeup.ICOLUP[i].first);
+            vals_col2.push_back(hepeup.ICOLUP[i].second);
+            vals_mother1.push_back(hepeup.MOTHUP[i].first);
+            vals_mother2.push_back(hepeup.MOTHUP[i].second);
+            vals_time.push_back(hepeup.VTIMUP[i]);
+        }
+        if (status == -1) {
+          vals_pt.push_back(0);
+          vals_eta.push_back(0);
+          vals_phi.push_back(0);
+          vals_mass.push_back(0);
+          vals_pz.push_back(p4.Pz());
+        } else {
+          vals_pt.push_back(p4.Pt());
+          vals_eta.push_back(p4.Eta());
+          vals_phi.push_back(p4.Phi());
+          vals_mass.push_back(p4.M());
+          vals_pz.push_back(0);
+        }
       }
       if ((status == 1) && ((idabs == 21) || (idabs > 0 && idabs < 7))) {  //# gluons and quarks
         // object counters
@@ -131,7 +165,12 @@ class LHETablesProducer : public edm::global::EDProducer<> {
     out.addColumnValue<float>("Vpt", lheVpt, "pT of the W or Z boson at LHE step", nanoaod::FlatTable::FloatColumn);
     out.addColumnValue<uint8_t>("NpNLO", lheProd.npNLO(), "number of partons at NLO", nanoaod::FlatTable::UInt8Column);
     out.addColumnValue<uint8_t>("NpLO", lheProd.npLO(), "number of partons at LO", nanoaod::FlatTable::UInt8Column);
-
+    out.addColumnValue<float>("AlphaS", alphaS, "Per-event alphaS", nanoaod::FlatTable::FloatColumn);
+    if (storeAllLHEInfo_){
+        out.addColumnValue<float>("AlphaQED", alphaQED, "Per-event alphaQED", nanoaod::FlatTable::FloatColumn);
+        out.addColumnValue<float>("Scale", scale, "Per-event scale", nanoaod::FlatTable::FloatColumn);
+        out.addColumnValue<uint8_t>("ProcessID", idproc, "Process id (as in the card ordering)", nanoaod::FlatTable::UInt8Column);
+    }
     auto outPart = std::make_unique<nanoaod::FlatTable>(vals_pt.size(), "LHEPart", false);
     outPart->addColumn<float>("pt", vals_pt, "Pt of LHE particles", nanoaod::FlatTable::FloatColumn, this->precision_);
     outPart->addColumn<float>(
@@ -140,8 +179,19 @@ class LHETablesProducer : public edm::global::EDProducer<> {
         "phi", vals_phi, "Phi of LHE particles", nanoaod::FlatTable::FloatColumn, this->precision_);
     outPart->addColumn<float>(
         "mass", vals_mass, "Mass of LHE particles", nanoaod::FlatTable::FloatColumn, this->precision_);
+    outPart->addColumn<float>(
+        "incomingpz", vals_pz, "Pz of incoming LHE particles", nanoaod::FlatTable::FloatColumn, this->precision_);
     outPart->addColumn<int>("pdgId", vals_pid, "PDG ID of LHE particles", nanoaod::FlatTable::IntColumn);
-
+    outPart->addColumn<int>(
+        "status", vals_status, "LHE particle status; -1:incoming, 1:outgoing", nanoaod::FlatTable::IntColumn);
+    outPart->addColumn<int>("spin", vals_spin, "Spin of LHE particles", nanoaod::FlatTable::IntColumn);
+    if (storeAllLHEInfo_){
+        outPart->addColumn<int>("color1", vals_col1, "First color index of LHE particles", nanoaod::FlatTable::IntColumn);
+        outPart->addColumn<int>("color2", vals_col2, "Second color index of LHE particles", nanoaod::FlatTable::IntColumn);
+        outPart->addColumn<int>("mother1", vals_mother1, "First mother index of LHE particles", nanoaod::FlatTable::IntColumn);
+        outPart->addColumn<int>("mother2", vals_mother2, "Second mother index of LHE particles", nanoaod::FlatTable::IntColumn);
+        outPart->addColumn<float>("lifetime", vals_time, "Own lifetime of LHE particles", nanoaod::FlatTable::FloatColumn, this->precision_);
+    }
     return outPart;
   }
 
@@ -152,13 +202,15 @@ class LHETablesProducer : public edm::global::EDProducer<> {
     desc.add<int>("precision", -1)->setComment("precision on the 4-momenta of the LHE particles");
     desc.add<bool>("storeLHEParticles", false)
         ->setComment("Whether we want to store the 4-momenta of the status 1 particles at LHE level");
+    desc.add<bool>("storeAllLHEInfo", false)->setComment("Whether to store the whole set of intermediate LHE particles, not only the status +/-1 ones");
     descriptions.add("lheInfoTable", desc);
   }
 
 protected:
   const std::vector<edm::EDGetTokenT<LHEEventProduct>> lheTag_;
   const unsigned int precision_;
   const bool storeLHEParticles_;
+  const bool storeAllLHEInfo_;
 };
 
 #include "FWCore/Framework/interface/MakerMacros.h"

diff --git a/PhysicsTools/NanoAOD/python/nano_cff.py b/PhysicsTools/NanoAOD/python/nano_cff.py
@@ -115,7 +115,8 @@
 lheInfoTable = cms.EDProducer("LHETablesProducer",
     lheInfo = cms.VInputTag(cms.InputTag("externalLHEProducer"), cms.InputTag("source")),
     precision = cms.int32(14),
-    storeLHEParticles = cms.bool(True) 
+    storeLHEParticles = cms.bool(True),
+    storeAllLHEInfo = cms.bool(False),  
 )
 
 l1bits=cms.EDProducer("L1TriggerResultsConverter", src=cms.InputTag("gtStage2Digis"), legacyL1=cms.bool(False),