Skip to content

Commit

Permalink
[DF][RDatasetSpec] Handle case when # trees != # files
Browse files Browse the repository at this point in the history
  • Loading branch information
ikabadzhov committed May 17, 2022
1 parent 9f71619 commit 11a7f9a
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 28 deletions.
7 changes: 5 additions & 2 deletions tree/dataframe/inc/ROOT/RDF/RDatasetSpec.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,11 @@ struct RDatasetSpec {

RDatasetSpec(const std::vector<std::string> &treeNames, const std::vector<std::string> &fileNames,
REntryRange entryRange = {})
: fTreeNames(treeNames), fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry),
fEndEntry(entryRange.fEndEntry)
: fTreeNames(
fileNames.size() != treeNames.size() && treeNames.size() != 1
? throw std::runtime_error("RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.")
: treeNames),
fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry)
{
}
};
Expand Down
20 changes: 5 additions & 15 deletions tree/dataframe/src/RLoopManager.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -364,22 +364,12 @@ RLoopManager::RLoopManager(const ROOT::RDF::RDatasetSpec &spec)
fLoopType(ROOT::IsImplicitMTEnabled() ? ELoopType::kROOTFilesMT : ELoopType::kROOTFiles),
fNewSampleNotifier(fNSlots), fSampleInfos(fNSlots)
{
if (spec.fTreeNames.size() == 1) { // a single tree (might be multiple files)
auto chain = std::make_shared<TChain>(spec.fTreeNames[0].c_str());
for (const auto &f : spec.fFileNameGlobs)
chain->Add(f.c_str());
SetTree(chain);
} else {
// Some other times, each different file has its own tree name, we need to
// reconstruct the full path to the tree in each file and pass that to
// TChain::Add
auto chain = std::make_shared<TChain>();
for (auto i = 0u; i < spec.fFileNameGlobs.size(); ++i) {
const auto fullpath = spec.fFileNameGlobs[i] + "?#" + spec.fTreeNames[i];
chain->Add(fullpath.c_str());
}
SetTree(chain);
auto chain = std::make_shared<TChain>();
for (auto i = 0u; i < spec.fFileNameGlobs.size(); ++i) {
const auto fullpath = spec.fFileNameGlobs[i] + "?#" + spec.fTreeNames[spec.fTreeNames.size() == 1 ? 0 : i];
chain->Add(fullpath.c_str());
}
SetTree(chain);
}

struct RSlotRAII {
Expand Down
14 changes: 3 additions & 11 deletions tree/dataframe/test/dataframe_datasetspec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -48,20 +48,12 @@ TEST(RDFDatasetSpec, SingleFileSingleColConstructor)
const auto dfRDS1 = RDataFrame(RDatasetSpec("tree", "file.root", {2, 4})).Display<int>({"x"})->AsString();
EXPECT_EQ(dfRDS1, dfRange0);

// specify 2 trees, second tree is irrelevant, this is correct
const auto dfRDS6 =
RDataFrame(RDatasetSpec({"tree"s, "nottree"s}, {"file.root"s}, {2, 4})).Display<int>({"x"})->AsString();
EXPECT_EQ(dfRDS6, dfRange0);

// specify 2 trees, first tree is irrelevant, this is wrong, emitting C++ error and ROOT error
// specify 2 trees, this is wrong, emitting C++ error
EXPECT_THROW(
try {
ROOT_EXPECT_ERROR(
RDataFrame(RDatasetSpec({"nottree"s, "tree"s}, {"file.root"s}, {2, 4})).Display<int>({"x"})->AsString(),
"TChain::LoadTree", "Cannot find tree with name nottree in file file.root");
RDatasetSpec({"tree"s, "anothertree"s}, {"file.root"s}, {2, 4});
} catch (const std::runtime_error &err) {
EXPECT_EQ(std::string(err.what()),
"Column \"x\" is not in a dataset and is not a custom column been defined.");
EXPECT_EQ(std::string(err.what()), "RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.");
throw;
},
std::runtime_error);
Expand Down

0 comments on commit 11a7f9a

Please sign in to comment.