Skip to content

Commit

Permalink
Dylan's improvements of weight parsing
Browse files Browse the repository at this point in the history
changed LHEsource so it compiles, running tests now

added scientific notation for numbers so values are correct

added a few more test statements. Need to think of future needs

Add testing suite (will remove/squash commits right before merge)

Change pdfSetsInfo to be read in from LHAPDF path
Still lingering question of how to decide pdf type
This can be resolved in the new function "setupPdfSetsInfo"

Add check if orphan weight is pdf and fix xml tag swap (needs review)

Allow reading '>' as '>' and fix bug with first weight w/o group
As for the first weight error: if the first weight doesn't have a
weight group, the code would not add it to the list of weights because
of currentGroupName and the first weight both having a trivial "" as
their name. Thats the reason for the currentGroupName being set to
"None" for initialization

Fix space issue in regex and moved unchaning variables outside for

Add pdfgroup splitting. To do this, needed LHAPDF requirements

clang format
  • Loading branch information
dteague authored and kdlong committed May 20, 2020
1 parent 8c2927e commit 6d6b77f
Show file tree
Hide file tree
Showing 31 changed files with 15,589 additions and 163 deletions.
4 changes: 3 additions & 1 deletion GeneratorInterface/Core/interface/LHEWeightHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ namespace gen {
void setHeaderLines(std::vector<std::string> headerLines);
void parseWeights();
void buildGroups();
std::unique_ptr<WeightGroupInfo> buildGroup(const ParsedWeight& weight);
bool isConsistent();
void swapHeaders();
std::unique_ptr<WeightGroupInfo> buildGroup(ParsedWeight& weight);

private:
std::vector<std::string> headerLines_;
Expand Down
16 changes: 13 additions & 3 deletions GeneratorInterface/Core/interface/WeightHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@
#include "SimDataFormats/GeneratorProducts/interface/PdfWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/ScaleWeightGroupInfo.h"
#include "SimDataFormats/GeneratorProducts/interface/MEParamWeightGroupInfo.h"
#include "LHAPDF/LHAPDF.h"
#include <boost/algorithm/string.hpp>
#include <bits/stdc++.h>
#include <fstream>

namespace gen {
struct PdfSetInfo {
Expand All @@ -25,6 +28,7 @@ namespace gen {
std::string groupname;
std::string content;
std::unordered_map<std::string, std::string> attributes;
size_t wgtGroup_idx;
};

class WeightHelper {
Expand All @@ -48,18 +52,24 @@ namespace gen {
bool isScaleWeightGroup(const ParsedWeight& weight);
bool isMEParamWeightGroup(const ParsedWeight& weight);
bool isPdfWeightGroup(const ParsedWeight& weight);
bool isOrphanPdfWeightGroup(ParsedWeight& weight);
bool isMultiSetPdfGroup(WeightGroupInfo& group);
void updateScaleInfo(const ParsedWeight& weight);
void updatePdfInfo(const ParsedWeight& weight);
void updatePdfInfo(int lhaid, int index);
void splitPdfGroups();
std::vector<PdfSetInfo> setupPdfSetsInfo();
std::string searchAttributes(const std::string& label, const ParsedWeight& weight) const;
std::string searchAttributesByTag(const std::string& label, const ParsedWeight& weight) const;
std::string searchAttributesByRegex(const std::string& label, const ParsedWeight& weight) const;

// Possible names for the same thing
const std::unordered_map<std::string, std::vector<std::string>> attributeNames_ = {
{"muf", {"muR", "MUR", "muf", "facscfact"}},
{"mur", {"muF", "MUF", "mur", "renscfact"}},
{"pdf", {"PDF", "PDF set", "lhapdf", "pdf", "pdf set", "pdfset"}}};
{"muf", {"muF", "MUF", "muf", "facscfact"}},
{"mur", {"muR", "MUR", "mur", "renscfact"}},
{"pdf", {"PDF", "PDF set", "lhapdf", "pdf", "pdf set", "pdfset"}},
//{"dyn", {"DYN_SCALE", "dyn_scale_choice"}},
};
};
} // namespace gen

Expand Down
138 changes: 122 additions & 16 deletions GeneratorInterface/Core/src/LHEWeightHelper.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "GeneratorInterface/Core/interface/LHEWeightHelper.h"
#include <boost/algorithm/string/join.hpp>
#include <boost/algorithm/string/replace.hpp>
#include <iostream>

using namespace tinyxml2;
Expand All @@ -9,22 +10,56 @@ namespace gen {

void LHEWeightHelper::parseWeights() {
parsedWeights_.clear();

if (!isConsistent()) {
swapHeaders();
}

tinyxml2::XMLDocument xmlDoc;
xmlDoc.Parse(("<root>" + boost::algorithm::join(headerLines_, "") + "</root>").c_str());
tinyxml2::XMLElement* root = xmlDoc.FirstChildElement("root");
std::string fullHeader = boost::algorithm::join(headerLines_, "");

int xmlError = xmlDoc.Parse(fullHeader.c_str());
// in case of &gt; instead of <
if (xmlError != 0) {
boost::replace_all(fullHeader, "&lt;", "<");
boost::replace_all(fullHeader, "&gt;", ">");
xmlError = xmlDoc.Parse(fullHeader.c_str());
}
// error persists (how to handle error?)
if (xmlError != 0) {
std::cerr << "Error in lhe xml file" << std::endl;
xmlDoc.PrintError();
exit(1);
}

std::vector<std::string> nameAlts_ = {"name", "type"};

size_t weightIndex = 0;
for (auto* e = root->FirstChildElement(); e != nullptr; e = e->NextSiblingElement()) {
size_t groupIndex = 0;
//for (auto* e = root->FirstChildElement(); e != nullptr; e = e->NextSiblingElement()) {
for (auto* e = xmlDoc.RootElement(); e != nullptr; e = e->NextSiblingElement()) {
std::string groupName = "";
if (strcmp(e->Name(), "weight") == 0) {
// we are here if there is a weight that does not belong to any group
std::string text = "";
if (e->GetText())
if (e->GetText()) {
text = e->GetText();
parsedWeights_.push_back({e->Attribute("id"), weightIndex++, groupName, text});
}
if (strcmp(e->Name(), "weightgroup") == 0) {
groupName = e->Attribute("name");
}
std::unordered_map<std::string, std::string> attributes;
for (auto* att = e->FirstAttribute(); att != nullptr; att = att->Next())
attributes[att->Name()] = att->Value();
parsedWeights_.push_back({e->Attribute("id"), weightIndex++, groupName, text, attributes, groupIndex});
} else if (strcmp(e->Name(), "weightgroup") == 0) {
// to deal wiht files with "id" instead of "name"
for (auto nameAtt : nameAlts_) {
if (e->Attribute(nameAtt.c_str())) {
groupName = e->Attribute(nameAtt.c_str());
break;
}
}
if (groupName.empty()) {
throw std::runtime_error("couldn't find groupname");
}
for (auto* inner = e->FirstChildElement("weight"); inner != nullptr;
inner = inner->NextSiblingElement("weight")) {
// we are here if there is a weight in a weightgroup
Expand All @@ -34,39 +69,110 @@ namespace gen {
std::unordered_map<std::string, std::string> attributes;
for (auto* att = inner->FirstAttribute(); att != nullptr; att = att->Next())
attributes[att->Name()] = att->Value();
parsedWeights_.push_back({inner->Attribute("id"), weightIndex++, groupName, text, attributes});
parsedWeights_.push_back({inner->Attribute("id"), weightIndex++, groupName, text, attributes, groupIndex});
}
}
groupIndex++;
}
buildGroups();
}

bool LHEWeightHelper::isConsistent() {
int curLevel = 0;

for (auto line : headerLines_) {
if (line.find("<weightgroup") != std::string::npos) {
curLevel++;
if (curLevel != 1) {
return false;
}
} else if (line.find("</weightgroup>") != std::string::npos) {
curLevel--;
if (curLevel != 0) {
return false;
}
}
}
return curLevel == 0;
}

void LHEWeightHelper::swapHeaders() {
int curLevel = 0;
int open = -1;
int close = -1;
for (size_t idx = 0; idx < headerLines_.size(); idx++) {
std::string line = headerLines_[idx];
if (line.find("<weightgroup") != std::string::npos) {
curLevel++;
if (curLevel != 1) {
open = idx;
}
} else if (line.find("</weightgroup>") != std::string::npos) {
curLevel--;
if (curLevel != 0) {
close = idx;
}
}
if (open > -1 && close > -1) {
std::swap(headerLines_[open], headerLines_[close]);
open = -1;
close = -1;
}
}
}

void LHEWeightHelper::buildGroups() {
weightGroups_.clear();
std::string currentGroupName;
for (const auto& weight : parsedWeights_) {
if (weight.groupname != currentGroupName) {
size_t currentGroupIdx = -1;
for (auto& weight : parsedWeights_) {
if (currentGroupIdx != weight.wgtGroup_idx) {
weightGroups_.push_back(*buildGroup(weight));
currentGroupIdx = weight.wgtGroup_idx;
}
currentGroupName = weight.groupname;

WeightGroupInfo& group = weightGroups_.back();
group.addContainedId(weight.index, weight.id, weight.content);

group.addContainedId(weight.index, weight.id, weight.content);
if (group.weightType() == gen::WeightType::kScaleWeights)
updateScaleInfo(weight);
else if (group.weightType() == gen::WeightType::kPdfWeights)
updatePdfInfo(weight);
}
//splitPdfGroups();
splitPdfGroups();
// checks
for (auto& wgt : weightGroups_) {
if (!wgt.isWellFormed())
std::cout << "\033[1;31m";
std::cout << std::boolalpha << wgt.name() << " (" << wgt.firstId() << "-" << wgt.lastId()
<< "): " << wgt.isWellFormed() << std::endl;
if (wgt.weightType() == gen::WeightType::kScaleWeights) {
auto& wgtScale = dynamic_cast<gen::ScaleWeightGroupInfo&>(wgt);
std::cout << wgtScale.centralIndex() << " ";
std::cout << wgtScale.muR1muF2Index() << " ";
std::cout << wgtScale.muR1muF05Index() << " ";
std::cout << wgtScale.muR2muF1Index() << " ";
std::cout << wgtScale.muR2muF2Index() << " ";
std::cout << wgtScale.muR2muF05Index() << " ";
std::cout << wgtScale.muR05muF1Index() << " ";
std::cout << wgtScale.muR05muF2Index() << " ";
std::cout << wgtScale.muR05muF05Index() << " \n";
} else if (wgt.weightType() == gen::WeightType::kPdfWeights) {
std::cout << wgt.description() << "\n";
}
if (!wgt.isWellFormed())
std::cout << "\033[0m";
}
}

std::unique_ptr<WeightGroupInfo> LHEWeightHelper::buildGroup(const ParsedWeight& weight) {
std::unique_ptr<WeightGroupInfo> LHEWeightHelper::buildGroup(ParsedWeight& weight) {
if (isScaleWeightGroup(weight))
return std::make_unique<ScaleWeightGroupInfo>(weight.groupname);
else if (isPdfWeightGroup(weight))
return std::make_unique<PdfWeightGroupInfo>(weight.groupname);
else if (isMEParamWeightGroup(weight))
return std::make_unique<MEParamWeightGroupInfo>(weight.groupname);
else if (isOrphanPdfWeightGroup(weight))
return std::make_unique<PdfWeightGroupInfo>(weight.groupname);

return std::make_unique<UnknownWeightGroupInfo>(weight.groupname);
}
Expand Down
Loading

0 comments on commit 6d6b77f

Please sign in to comment.