Skip to content

Commit

Permalink
[DF][RDatasetSpec] Initial version of friend trees handling
Browse files Browse the repository at this point in the history
  • Loading branch information
ikabadzhov committed May 17, 2022
1 parent 11a7f9a commit 828b25b
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 10 deletions.
45 changes: 37 additions & 8 deletions tree/dataframe/inc/ROOT/RDF/RDatasetSpec.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,31 @@ struct RDatasetSpec {
}
};

struct RFriendInfo {
std::vector<std::string> fTreeNames{};
std::vector<std::string> fFileNameGlobs{};
RFriendInfo() {}

RFriendInfo(const std::string &treeName, const std::string &fileName)
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(std::vector<std::string>{fileName})
{
}

RFriendInfo(const std::string &treeName, const std::vector<std::string> &fileNames)
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(fileNames)
{
}

RFriendInfo(const std::vector<std::string> &treeNames, const std::vector<std::string> &fileNames)
: fTreeNames(
fileNames.size() != treeNames.size() && treeNames.size() != 1
? throw std::logic_error("RFriendInfo exepcts either N trees and N files, or 1 tree and N files.")
: treeNames),
fFileNameGlobs(fileNames)
{
}
};

/**
* A list of names of trees.
* This list should go in lockstep with fFileNameGlobs, only in case this dataset is a TChain where each file
Expand All @@ -49,31 +74,35 @@ struct RDatasetSpec {
* A list of file names.
* They can contain the globbing characters supported by TChain. See TChain::Add for more information.
*/

std::vector<std::string> fFileNameGlobs{};

ULong64_t fStartEntry{}; ///< The entry where the dataset processing should start (inclusive).
ULong64_t fEndEntry{}; ///< The entry where the dataset processing should end (exclusive).

RDatasetSpec(const std::string &treeName, const std::string &fileName, REntryRange entryRange = {})
std::vector<RFriendInfo> fFriendInfos{}; ///< List of friends

RDatasetSpec(const std::string &treeName, const std::string &fileName, REntryRange entryRange = {},
const std::vector<RFriendInfo> &friendInfos = {})
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(std::vector<std::string>{fileName}),
fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry)
fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry), fFriendInfos(friendInfos)
{
}

RDatasetSpec(const std::string &treeName, const std::vector<std::string> &fileNames, REntryRange entryRange = {})
RDatasetSpec(const std::string &treeName, const std::vector<std::string> &fileNames, REntryRange entryRange = {},
const std::vector<RFriendInfo> &friendInfos = {})
: fTreeNames(std::vector<std::string>{treeName}), fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry),
fEndEntry(entryRange.fEndEntry)
fEndEntry(entryRange.fEndEntry), fFriendInfos(friendInfos)
{
}

RDatasetSpec(const std::vector<std::string> &treeNames, const std::vector<std::string> &fileNames,
REntryRange entryRange = {})
REntryRange entryRange = {}, const std::vector<RFriendInfo> &friendInfos = {})
: fTreeNames(
fileNames.size() != treeNames.size() && treeNames.size() != 1
? throw std::runtime_error("RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.")
? throw std::logic_error("RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.")
: treeNames),
fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry)
fFileNameGlobs(fileNames), fStartEntry(entryRange.fStartEntry), fEndEntry(entryRange.fEndEntry),
fFriendInfos(friendInfos)
{
}
};
Expand Down
10 changes: 10 additions & 0 deletions tree/dataframe/src/RLoopManager.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,16 @@ RLoopManager::RLoopManager(const ROOT::RDF::RDatasetSpec &spec)
const auto fullpath = spec.fFileNameGlobs[i] + "?#" + spec.fTreeNames[spec.fTreeNames.size() == 1 ? 0 : i];
chain->Add(fullpath.c_str());
}
for (auto i = 0u; i < spec.fFriendInfos.size(); ++i) {
auto temp_friend_chain_name = "f" + i;
auto temp_friend_chain = std::make_shared<TChain>(temp_friend_chain_name);
for (auto j = 0u; j < spec.fFriendInfos[i].fFileNameGlobs[j].size(); ++j) {
const auto fullpath = spec.fFriendInfos[i].fFileNameGlobs[j] + "?#" +
spec.fFriendInfos[i].fTreeNames[spec.fFriendInfos[i].fTreeNames.size() == 1 ? 0 : j];
temp_friend_chain->Add(fullpath.c_str());
}
chain->AddFriend(temp_friend_chain_name);
}
SetTree(chain);
}

Expand Down
4 changes: 2 additions & 2 deletions tree/dataframe/test/dataframe_datasetspec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,11 @@ TEST(RDFDatasetSpec, SingleFileSingleColConstructor)
EXPECT_THROW(
try {
RDatasetSpec({"tree"s, "anothertree"s}, {"file.root"s}, {2, 4});
} catch (const std::runtime_error &err) {
} catch (const std::logic_error &err) {
EXPECT_EQ(std::string(err.what()), "RDatasetSpec exepcts either N trees and N files, or 1 tree and N files.");
throw;
},
std::runtime_error);
std::logic_error);

// specify range [2, 2) (3 is a valid index) => range is disregarded
const auto dfRDS7 = RDataFrame(RDatasetSpec("tree", "file.root", {2, 2})).Display<int>({"x"})->AsString();
Expand Down

0 comments on commit 828b25b

Please sign in to comment.