From 0caecdfbf69550e6d8a72ee616757ad4b12795cf Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Fri, 3 May 2024 13:37:08 +0200 Subject: [PATCH 01/16] Moving RDataSource closer to Podio/EDM4hep --- CMakeLists.txt | 2 +- include/podio/ROOTDataSource.h | 117 ++++++ src/CMakeLists.txt | 9 +- src/ROOTDataSource.cc | 401 +++++++++++++++++++ tests/root_io/CMakeLists.txt | 1 + tests/root_io/read_with_rdatasource_root.cpp | 43 ++ 6 files changed, 571 insertions(+), 2 deletions(-) create mode 100644 include/podio/ROOTDataSource.h create mode 100644 src/ROOTDataSource.cc create mode 100644 tests/root_io/read_with_rdatasource_root.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index dfd6f98db..caa4e3c23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,7 +77,7 @@ option(ENABLE_JULIA "Enable Julia support. When enabled, Julia datamodels w list(APPEND CMAKE_PREFIX_PATH $ENV{ROOTSYS}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) if(NOT ENABLE_RNTUPLE) - find_package(ROOT REQUIRED COMPONENTS RIO Tree) + find_package(ROOT REQUIRED COMPONENTS RIO Tree ROOTDataFrame) else() find_package(ROOT REQUIRED COMPONENTS RIO Tree ROOTNTuple) if(${ROOT_VERSION} VERSION_LESS 6.28.02) diff --git a/include/podio/ROOTDataSource.h b/include/podio/ROOTDataSource.h new file mode 100644 index 000000000..9757df195 --- /dev/null +++ b/include/podio/ROOTDataSource.h @@ -0,0 +1,117 @@ +#ifndef PODIO_DATASOURCE_H__ +#define PODIO_DATASOURCE_H__ + +// STL +#include +#include + +// ROOT +#include +#include + +// Podio +#include +#include +#include + +namespace podio { + using Record_t = std::vector; + + class ROOTDataSource : public ROOT::RDF::RDataSource { + public: + explicit ROOTDataSource(const std::string& filePath, int nEvents = -1); + explicit ROOTDataSource(const std::vector& filePathList, + int nEvents = -1); + + void SetNSlots(unsigned int nSlots) override; + + template + std::vector GetColumnReaders(std::string_view columnName); + + void Initialize() override; + + std::vector> GetEntryRanges() override; + + void InitSlot(unsigned int slot, ULong64_t firstEntry) override; + + bool SetEntry(unsigned int slot, ULong64_t entry) override; + + void FinalizeSlot(unsigned int slot) override; + + void Finalize() override; + + const std::vector& GetColumnNames() const override; + + bool HasColumn(std::string_view columnName) const override; + + std::string GetTypeName(std::string_view columnName) const override; + + protected: + Record_t GetColumnReadersImpl ( + std::string_view name, + const std::type_info& typeInfo) override; + + std::string AsString() override { return "Podio data source"; }; + + private: + /// Number of slots/threads + unsigned int m_nSlots = 1; + /// Input filename + std::vector m_filePathList = {}; + /// Total number of events + unsigned int m_nEvents = 0; + /// Ranges of events available to be processed + std::vector> m_rangesAvailable = {}; + /// Ranges of events available ever created + std::vector> m_rangesAll = {}; + /// Column names + std::vector m_columnNames {}; + /// Column types + std::vector m_columnTypes = {}; + /// Collections, m_Collections[columnIndex][slotIndex] + std::vector> m_Collections = {}; + /// Active collections + std::vector m_activeCollections = {}; + /// Root podio readers + std::vector> m_podioReaders = {}; + /// Podio frames + std::vector> m_frames = {}; + /// Setup input + void SetupInput(int nEvents); + }; + + + /** + * \brief Retrieve from podio::ROOTDataSource per-thread readers for the + * desired columns. + */ + template + std::vector + ROOTDataSource::GetColumnReaders(std::string_view columnName) { + std::cout << "podio::ROOTDataSource: Getting column readers for column: " + << columnName << std::endl; + + std::vector readers; + + return readers; + } + + /** + * @brief Create RDataFrame from multiple Podio files. + * + * @param[in] filePathList List of file paths from which the RDataFrame + * will be created. + * @return RDataFrame created from input file list. + */ + ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); + + /** + * @brief Create RDataFrame from a Podio file. + * + * @param[in] filePath File path from which the RDataFrame will be created. + * @return RDataFrame created from input file list. + */ + ROOT::RDataFrame CreateDataFrame(const std::string& filePath); +} + +#endif /* PODIO_DATASOURCE_H__ */ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 119de39ab..411408ac8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -82,6 +82,7 @@ SET(root_sources ROOTLegacyReader.cc ROOTFrameData.cc RootHelpers.cc + ROOTDataSource.cc ) if(ENABLE_RNTUPLE) list(APPEND root_sources @@ -96,6 +97,7 @@ SET(root_headers ${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h ${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h ${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h + ${PROJECT_SOURCE_DIR}/include/podio/ROOTDataSource.h ) if(ENABLE_RNTUPLE) list(APPEND root_headers @@ -105,7 +107,12 @@ if(ENABLE_RNTUPLE) endif() PODIO_ADD_LIB_AND_DICT(podioRootIO "${root_headers}" "${root_sources}" root_selection.xml) -target_link_libraries(podioRootIO PUBLIC podio::podio ROOT::Core ROOT::RIO ROOT::Tree ROOT::ROOTVecOps) +target_link_libraries(podioRootIO PUBLIC podio::podio + ROOT::Core + ROOT::RIO + ROOT::Tree + ROOT::ROOTVecOps + ROOT::ROOTDataFrame) if(ENABLE_RNTUPLE) target_link_libraries(podioRootIO PUBLIC ROOT::ROOTNTuple) target_compile_definitions(podioRootIO PUBLIC PODIO_ENABLE_RNTUPLE=1) diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc new file mode 100644 index 000000000..7cd52dbb7 --- /dev/null +++ b/src/ROOTDataSource.cc @@ -0,0 +1,401 @@ +#include "podio/ROOTDataSource.h" + +// STL +#include +#include +#include +#include +#include +#include + +// ROOT +#include + +// podio +#include +#include + +bool loadROOTDataSource() { + return true; +} + +namespace podio { + /** + * \brief Construct the podio::ROOTDataSource from the provided file. + */ + ROOTDataSource::ROOTDataSource(const std::string& filePath, + int nEvents) : m_nSlots{1} { + m_filePathList.emplace_back(filePath); + SetupInput(nEvents); + } + + /** + * \brief Construct the podio::ROOTDataSource from the provided file list. + */ + ROOTDataSource::ROOTDataSource( + const std::vector& filePathList, + int nEvents) : m_nSlots{1}, + m_filePathList{filePathList} { + SetupInput(nEvents); + } + + /** + * \brief Setup input for the podio::ROOTDataSource. + */ + void ROOTDataSource::SetupInput(int nEvents) { + // std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl; + + if (m_filePathList.empty()) { + throw std::runtime_error("podio::ROOTDataSource: No input files provided!"); + } + + for (const auto& filePath : m_filePathList) { + // Check if file exists + // Warning: file can be coming from web or eos + // if (!std::filesystem::exists(filePath)) { + // throw std::runtime_error("podio::ROOTDataSource: Provided file \"" + // + filePath + "\" does not exist!"); + // } + + // Check if the provided file contains required metadata + std::unique_ptr inFile(TFile::Open(filePath.data(), "READ")); + auto metadata = inFile->Get("podio_metadata"); + if (!metadata) { + throw std::runtime_error( + "podio::ROOTDataSource: Provided file is missing podio metadata!"); + } + } + + // Create probing frame + podio::Frame frame; + unsigned int nEventsInFiles = 0; + podio::ROOTReader podioReader; + podioReader.openFiles(m_filePathList); + nEventsInFiles = podioReader.getEntries("events"); + frame = podio::Frame(podioReader.readEntry("events", 0)); + + // Determine over how many events to run + if (nEventsInFiles > 0) { + /* + std::cout << "podio::ROOTDataSource: Found " << nEventsInFiles + << " events in files: \n"; + for (const auto& filePath : m_filePathList) { + std::cout << " - " << filePath << "\n"; + } + */ + } else { + throw std::runtime_error("podio::ROOTDataSource: No events found!"); + } + + if (nEvents < 0) { + m_nEvents = nEventsInFiles; + } else if (nEvents == 0) { + throw std::runtime_error( + "podio::ROOTDataSource: Requested to run over zero events!"); + } else { + m_nEvents = nEvents; + } + if (nEventsInFiles < m_nEvents) { + m_nEvents = nEventsInFiles; + } + + // std::cout << "podio::ROOTDataSource: Running over " << m_nEvents << " events." + // << std::endl; + + // Get collections stored in the files + std::vector collNames = frame.getAvailableCollections(); + // std::cout << "podio::ROOTDataSource: Found following collections:\n"; + for (auto& collName: collNames) { + const podio::CollectionBase* coll = frame.get(collName); + if (coll->isValid()) { + m_columnNames.emplace_back(collName); + m_columnTypes.emplace_back(coll->getValueTypeName()); + // std::cout << " - " << collName << "\n"; + } + } + } + + + /** + * \brief Inform the podio::ROOTDataSource of the desired level of parallelism. + */ + void + ROOTDataSource::SetNSlots(unsigned int nSlots) { + // std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots + // << std::endl; + m_nSlots = nSlots; + + if (m_nSlots > m_nEvents) { + throw std::runtime_error("podio::ROOTDataSource: Number of events too small!"); + } + + int eventsPerSlot = m_nEvents / m_nSlots; + for (size_t i = 0; i < (m_nSlots - 1); ++i) { + m_rangesAll.emplace_back(eventsPerSlot * i, eventsPerSlot * (i + 1)); + } + m_rangesAll.emplace_back(eventsPerSlot * (m_nSlots - 1), m_nEvents); + m_rangesAvailable = m_rangesAll; + + // Initialize set of addresses needed + m_Collections.resize( + m_columnNames.size(), + std::vector(m_nSlots, nullptr)); + + // Initialize podio readers + for (size_t i = 0; i < m_nSlots; ++i) { + m_podioReaders.emplace_back(std::make_unique()); + } + + for (size_t i = 0; i < m_nSlots; ++i) { + m_podioReaders[i]->openFiles(m_filePathList); + } + + for (size_t i = 0; i < m_nSlots; ++i) { + m_frames.emplace_back( + std::make_unique( + podio::Frame(m_podioReaders[i]->readEntry("events", 0)))); + } + } + + + /** + * \brief Inform podio::ROOTDataSource that an event-loop is about to start. + */ + void + ROOTDataSource::Initialize() { + // std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl; + } + + + /** + * \brief Retrieve from podio::ROOTDataSource a set of ranges of entries that can be + * processed concurrently. + */ + std::vector> + ROOTDataSource::GetEntryRanges() { + // std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl; + + std::vector> rangesToBeProcessed; + for (auto& range: m_rangesAvailable) { + rangesToBeProcessed.emplace_back( + std::pair{range.first, range.second} + ); + if (rangesToBeProcessed.size() >= m_nSlots) { + break; + } + } + + if (m_rangesAvailable.size() > m_nSlots) { + m_rangesAvailable.erase(m_rangesAvailable.begin(), + m_rangesAvailable.begin() + m_nSlots); + } else { + m_rangesAvailable.erase(m_rangesAvailable.begin(), + m_rangesAvailable.end()); + } + + + /* + std::cout << "podio::ROOTDataSource: Ranges to be processed:\n"; + for (auto& range: rangesToBeProcessed) { + std::cout << " {" << range.first << ", " << range.second + << "}\n"; + } + + if (m_rangesAvailable.size() > 0) { + + std::cout << "podio::ROOTDataSource: Ranges remaining:\n"; + for (auto& range: m_rangesAvailable) { + std::cout << " {" << range.first << ", " << range.second + << "}\n"; + } + } else { + std::cout << "podio::ROOTDataSource: No more remaining ranges.\n"; + } + */ + + return rangesToBeProcessed; + } + + + /** + * \brief Inform podio::ROOTDataSource that a certain thread is about to start working + * on a certain range of entries. + */ + void + ROOTDataSource::InitSlot([[maybe_unused]] unsigned int slot, + [[maybe_unused]] ULong64_t firstEntry) { + // std::cout << "podio::ROOTDataSource: Initializing slot: " << slot + // << " with first entry " << firstEntry << std::endl; + } + + + /** + * \brief Inform podio::ROOTDataSource that a certain thread is about to start working + * on a certain entry. + */ + bool + ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { + // std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: " + // << entry << std::endl; + + m_frames[slot] = std::make_unique( + podio::Frame(m_podioReaders[slot]->readEntry("events", entry))); + + for (auto& collectionIndex: m_activeCollections) { + m_Collections[collectionIndex][slot] = + m_frames[slot]->get(m_columnNames.at(collectionIndex)); + /* + std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; + std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; + std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; + if (m_Collections[collectionIndex][slot]->isValid()) { + std::cout << "Collection valid\n"; + } + */ + } + + return true; + } + + + /** + * \brief Inform podio::ROOTDataSource that a certain thread finished working on a + * certain range of entries. + */ + void + ROOTDataSource::FinalizeSlot([[maybe_unused]] unsigned int slot) { + // std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; + // std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; + + // for (auto& collectionIndex: m_activeCollections) { + // std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; + // std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; + // if (m_Collections[collectionIndex][slot]->isValid()) { + // std::cout << "Collection valid\n"; + // } + // std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; + // } + } + + + /** + * \brief Inform podio::ROOTDataSource that an event-loop finished. + */ + void + ROOTDataSource::Finalize() { + // std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl; + } + + + /** + * \brief Type-erased vector of pointers to pointers to column values --- one + * per slot + */ + Record_t + ROOTDataSource::GetColumnReadersImpl( + std::string_view columnName, + [[maybe_unused]] const std::type_info& typeInfo) { + /* + std::cout << "podio::ROOTDataSource: Getting column reader implementation for column:\n" + << " " << columnName + << "\n with type: " << typeInfo.name() << std::endl; + */ + + auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), + columnName); + if (itr == m_columnNames.end()) { + std::string errMsg = "podio::ROOTDataSource: Can't find requested column \""; + errMsg += columnName; + errMsg += "\"!"; + throw std::runtime_error(errMsg); + } + auto columnIndex = std::distance(m_columnNames.begin(), itr); + m_activeCollections.emplace_back(columnIndex); + /* + std::cout << "podio::ROOTDataSource: Active collections so far:\n" + << " "; + for (auto& i: m_activeCollections) { + std::cout << i << ", "; + } + std::cout << std::endl; + */ + + Record_t columnReaders(m_nSlots); + for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { + // std::cout << " Column index: " << columnIndex << "\n"; + // std::cout << " Slot index: " << slotIndex << "\n"; + // std::cout << " Address: " + // << &m_Collections[columnIndex][slotIndex] + // << std::endl; + columnReaders[slotIndex] = (void*) &m_Collections[columnIndex][slotIndex]; + } + + return columnReaders; + } + + + /** + * \brief Returns a reference to the collection of the dataset's column names + */ + const std::vector& + ROOTDataSource::GetColumnNames() const { + // std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl; + + return m_columnNames; + } + + /** + * \brief Checks if the dataset has a certain column. + */ + bool + ROOTDataSource::HasColumn(std::string_view columnName) const { + // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName + // << std::endl; + + if (std::find(m_columnNames.begin(), + m_columnNames.end(), + columnName) != m_columnNames.end()) { + return true; + } + + return false; + } + + + /** + * \brief Type of a column as a string. Required for JITting. + */ + std::string + ROOTDataSource::GetTypeName(std::string_view columnName) const { + // std::cout << "podio::ROOTDataSource: Looking for type name of column: " + // << columnName << std::endl; + + auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), + columnName); + if (itr != m_columnNames.end()) { + auto i = std::distance(m_columnNames.begin(), itr); + // std::cout << "podio::ROOTDataSource: Found type name: " + // << m_columnTypes.at(i) << std::endl; + + return m_columnTypes.at(i) + "Collection"; + } + + return "float"; + } + + ROOT::RDataFrame + CreateDataFrame(const std::vector& filePathList) { + ROOT::RDataFrame rdf(std::make_unique(filePathList)); + + return rdf; + } + + ROOT::RDataFrame + CreateDataFrame(const std::string& filePath) { + std::vector filePathList; + filePathList.emplace_back(filePath); + ROOT::RDataFrame rdf(std::make_unique(filePathList)); + + return rdf; + } +} diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 7f79a000b..3936a6cc5 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -8,6 +8,7 @@ set(root_dependent_tests read_and_write_frame_root.cpp write_interface_root.cpp read_interface_root.cpp + read_with_rdatasource_root.cpp ) if(ENABLE_RNTUPLE) set(root_dependent_tests diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp new file mode 100644 index 000000000..975767a6e --- /dev/null +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -0,0 +1,43 @@ +#include "read_frame.h" +#include "read_frame_auxiliary.h" + +#include "podio/ROOTDataSource.h" +#include "datamodel/ExampleClusterCollection.h" + +#include +#include + + +ROOT::VecOps::RVec +getEnergy(const ExampleClusterCollection& inColl) { + ROOT::VecOps::RVec result; + + for (const auto& cluster: inColl) { + result.push_back(cluster.energy()); + } + + return result; +} + + +int main(int argc, const char* argv[]) { + std::string inputFile = "example_frame.root"; + if (argc == 2) { + inputFile = argv[1]; + } else if (argc > 2) { + std::cout << "Wrong number of arguments" << std::endl; + std::cout << "Usage: " << argv[0] << " FILE" << std::endl; + return 1; + } + + auto dframe = podio::CreateDataFrame(inputFile); + dframe.Describe().Print(); + std::cout << std::endl; + + auto cluterEnergy = dframe.Define("cluster_energy", getEnergy, {"clusters"}) + .Histo1D("cluster_energy"); + + cluterEnergy->Print(); + + return EXIT_SUCCESS; +} From 8bb3c0850f5704ea0c5bb8896e95abfecbe307eb Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Fri, 3 May 2024 14:22:20 +0200 Subject: [PATCH 02/16] Using /// and @ for the doxygen docs --- include/podio/ROOTDataSource.h | 13 -- src/ROOTDataSource.cc | 145 +++++++++++-------- tests/root_io/read_with_rdatasource_root.cpp | 1 - 3 files changed, 81 insertions(+), 78 deletions(-) diff --git a/include/podio/ROOTDataSource.h b/include/podio/ROOTDataSource.h index 9757df195..c1fd5401a 100644 --- a/include/podio/ROOTDataSource.h +++ b/include/podio/ROOTDataSource.h @@ -96,21 +96,8 @@ namespace podio { return readers; } - /** - * @brief Create RDataFrame from multiple Podio files. - * - * @param[in] filePathList List of file paths from which the RDataFrame - * will be created. - * @return RDataFrame created from input file list. - */ ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); - /** - * @brief Create RDataFrame from a Podio file. - * - * @param[in] filePath File path from which the RDataFrame will be created. - * @return RDataFrame created from input file list. - */ ROOT::RDataFrame CreateDataFrame(const std::string& filePath); } diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc index 7cd52dbb7..c7b90fef0 100644 --- a/src/ROOTDataSource.cc +++ b/src/ROOTDataSource.cc @@ -20,18 +20,18 @@ bool loadROOTDataSource() { } namespace podio { - /** - * \brief Construct the podio::ROOTDataSource from the provided file. - */ + /// + /// @brief Construct the podio::ROOTDataSource from the provided file. + /// ROOTDataSource::ROOTDataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { m_filePathList.emplace_back(filePath); SetupInput(nEvents); } - /** - * \brief Construct the podio::ROOTDataSource from the provided file list. - */ + /// + /// @brief Construct the podio::ROOTDataSource from the provided file list. + /// ROOTDataSource::ROOTDataSource( const std::vector& filePathList, int nEvents) : m_nSlots{1}, @@ -39,9 +39,9 @@ namespace podio { SetupInput(nEvents); } - /** - * \brief Setup input for the podio::ROOTDataSource. - */ + /// + /// @brief Setup input for the podio::ROOTDataSource. + /// void ROOTDataSource::SetupInput(int nEvents) { // std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl; @@ -51,7 +51,7 @@ namespace podio { for (const auto& filePath : m_filePathList) { // Check if file exists - // Warning: file can be coming from web or eos + // TODO: file can be coming from web or eos // if (!std::filesystem::exists(filePath)) { // throw std::runtime_error("podio::ROOTDataSource: Provided file \"" // + filePath + "\" does not exist!"); @@ -116,9 +116,9 @@ namespace podio { } - /** - * \brief Inform the podio::ROOTDataSource of the desired level of parallelism. - */ + /// + /// @brief Inform the podio::ROOTDataSource of the desired level of parallelism. + /// void ROOTDataSource::SetNSlots(unsigned int nSlots) { // std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots @@ -158,19 +158,19 @@ namespace podio { } - /** - * \brief Inform podio::ROOTDataSource that an event-loop is about to start. - */ + /// + /// @brief Inform podio::ROOTDataSource that an event-loop is about to start. + /// void ROOTDataSource::Initialize() { // std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl; } - /** - * \brief Retrieve from podio::ROOTDataSource a set of ranges of entries that can be - * processed concurrently. - */ + /// + /// @brief Retrieve from podio::ROOTDataSource a set of ranges of entries that can be + /// processed concurrently. + /// std::vector> ROOTDataSource::GetEntryRanges() { // std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl; @@ -217,10 +217,10 @@ namespace podio { } - /** - * \brief Inform podio::ROOTDataSource that a certain thread is about to start working - * on a certain range of entries. - */ + /// + /// @brief Inform podio::ROOTDataSource that a certain thread is about to start working + /// on a certain range of entries. + /// void ROOTDataSource::InitSlot([[maybe_unused]] unsigned int slot, [[maybe_unused]] ULong64_t firstEntry) { @@ -229,10 +229,10 @@ namespace podio { } - /** - * \brief Inform podio::ROOTDataSource that a certain thread is about to start working - * on a certain entry. - */ + /// + /// @brief Inform podio::ROOTDataSource that a certain thread is about to start working + /// on a certain entry. + /// bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { // std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: " @@ -258,39 +258,41 @@ namespace podio { } - /** - * \brief Inform podio::ROOTDataSource that a certain thread finished working on a - * certain range of entries. - */ + /// + /// @brief Inform podio::ROOTDataSource that a certain thread finished working on a + /// certain range of entries. + /// void ROOTDataSource::FinalizeSlot([[maybe_unused]] unsigned int slot) { - // std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; - // std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; - - // for (auto& collectionIndex: m_activeCollections) { - // std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - // std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - // if (m_Collections[collectionIndex][slot]->isValid()) { - // std::cout << "Collection valid\n"; - // } - // std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; - // } + /* + std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; + std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; + + for (auto& collectionIndex: m_activeCollections) { + std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; + std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; + if (m_Collections[collectionIndex][slot]->isValid()) { + std::cout << "Collection valid\n"; + } + std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; + } + */ } - /** - * \brief Inform podio::ROOTDataSource that an event-loop finished. - */ + /// + /// @brief Inform podio::ROOTDataSource that an event-loop finished. + /// void ROOTDataSource::Finalize() { // std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl; } - /** - * \brief Type-erased vector of pointers to pointers to column values --- one - * per slot - */ + /// + /// @brief Type-erased vector of pointers to pointers to column values --- one + /// per slot + /// Record_t ROOTDataSource::GetColumnReadersImpl( std::string_view columnName, @@ -322,11 +324,13 @@ namespace podio { Record_t columnReaders(m_nSlots); for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { - // std::cout << " Column index: " << columnIndex << "\n"; - // std::cout << " Slot index: " << slotIndex << "\n"; - // std::cout << " Address: " - // << &m_Collections[columnIndex][slotIndex] - // << std::endl; + /* + std::cout << " Column index: " << columnIndex << "\n"; + std::cout << " Slot index: " << slotIndex << "\n"; + std::cout << " Address: " + << &m_Collections[columnIndex][slotIndex] + << std::endl; + */ columnReaders[slotIndex] = (void*) &m_Collections[columnIndex][slotIndex]; } @@ -334,9 +338,9 @@ namespace podio { } - /** - * \brief Returns a reference to the collection of the dataset's column names - */ + /// + /// @brief Returns a reference to the collection of the dataset's column names + /// const std::vector& ROOTDataSource::GetColumnNames() const { // std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl; @@ -344,9 +348,9 @@ namespace podio { return m_columnNames; } - /** - * \brief Checks if the dataset has a certain column. - */ + /// + /// @brief Checks if the dataset has a certain column. + /// bool ROOTDataSource::HasColumn(std::string_view columnName) const { // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName @@ -362,9 +366,9 @@ namespace podio { } - /** - * \brief Type of a column as a string. Required for JITting. - */ + /// + /// @brief Type of a column as a string. Required for JITting. + /// std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { // std::cout << "podio::ROOTDataSource: Looking for type name of column: " @@ -383,6 +387,13 @@ namespace podio { return "float"; } + /// + /// @brief Create RDataFrame from multiple Podio files. + /// + /// @param[in] filePathList List of file paths from which the RDataFrame + /// will be created. + /// @return RDataFrame created from input file list. + /// ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { ROOT::RDataFrame rdf(std::make_unique(filePathList)); @@ -390,6 +401,12 @@ namespace podio { return rdf; } + /// + /// @brief Create RDataFrame from a Podio file. + /// + /// @param[in] filePath File path from which the RDataFrame will be created. + /// @return RDataFrame created from input file list. + /// ROOT::RDataFrame CreateDataFrame(const std::string& filePath) { std::vector filePathList; diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp index 975767a6e..1933e6c83 100644 --- a/tests/root_io/read_with_rdatasource_root.cpp +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -36,7 +36,6 @@ int main(int argc, const char* argv[]) { auto cluterEnergy = dframe.Define("cluster_energy", getEnergy, {"clusters"}) .Histo1D("cluster_energy"); - cluterEnergy->Print(); return EXIT_SUCCESS; From 684b2fba68ddd4d453cf3212f7404ae41455ce0a Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Fri, 3 May 2024 14:58:41 +0200 Subject: [PATCH 03/16] Moving doc strings to header file --- include/podio/ROOTDataSource.h | 89 +++++++++++++++++++++++++++++++--- src/ROOTDataSource.cc | 76 +++-------------------------- 2 files changed, 91 insertions(+), 74 deletions(-) diff --git a/include/podio/ROOTDataSource.h b/include/podio/ROOTDataSource.h index c1fd5401a..aaace54ca 100644 --- a/include/podio/ROOTDataSource.h +++ b/include/podio/ROOTDataSource.h @@ -19,34 +19,87 @@ namespace podio { class ROOTDataSource : public ROOT::RDF::RDataSource { public: + /// + /// @brief Construct the podio::ROOTDataSource from the provided file. + /// explicit ROOTDataSource(const std::string& filePath, int nEvents = -1); + + /// + /// @brief Construct the podio::ROOTDataSource from the provided file + /// list. + /// explicit ROOTDataSource(const std::vector& filePathList, int nEvents = -1); + /// + /// @brief Inform the podio::ROOTDataSource of the desired level of + /// parallelism. + /// void SetNSlots(unsigned int nSlots) override; + /// + /// @brief Retrieve from podio::ROOTDataSource per-thread readers for the + /// desired columns. + /// template std::vector GetColumnReaders(std::string_view columnName); + /// + /// @brief Inform podio::ROOTDataSource that an event-loop is about to + /// start. + /// void Initialize() override; + /// + /// @brief Retrieve from podio::ROOTDataSource a set of ranges of entries + /// that can be processed concurrently. + /// std::vector> GetEntryRanges() override; + /// + /// @brief Inform podio::ROOTDataSource that a certain thread is about to + /// start working on a certain range of entries. + /// void InitSlot(unsigned int slot, ULong64_t firstEntry) override; + /// + /// @brief Inform podio::ROOTDataSource that a certain thread is about to + /// start working on a certain entry. + /// bool SetEntry(unsigned int slot, ULong64_t entry) override; + /// + /// @brief Inform podio::ROOTDataSource that a certain thread finished + /// working on a certain range of entries. + /// void FinalizeSlot(unsigned int slot) override; + /// + /// @brief Inform podio::ROOTDataSource that an event-loop finished. + /// void Finalize() override; + /// + /// @brief Returns a reference to the collection of the dataset's column + /// names + /// const std::vector& GetColumnNames() const override; + /// + /// @brief Checks if the dataset has a certain column. + /// bool HasColumn(std::string_view columnName) const override; + /// + /// @brief Type of a column as a string. Required for JITting. + /// std::string GetTypeName(std::string_view columnName) const override; protected: + /// + /// @brief Type-erased vector of pointers to pointers to column + /// values --- one per slot. + /// Record_t GetColumnReadersImpl ( std::string_view name, const std::type_info& typeInfo) override; @@ -56,35 +109,46 @@ namespace podio { private: /// Number of slots/threads unsigned int m_nSlots = 1; + /// Input filename std::vector m_filePathList = {}; + /// Total number of events unsigned int m_nEvents = 0; + /// Ranges of events available to be processed std::vector> m_rangesAvailable = {}; + /// Ranges of events available ever created std::vector> m_rangesAll = {}; + /// Column names std::vector m_columnNames {}; + /// Column types std::vector m_columnTypes = {}; + /// Collections, m_Collections[columnIndex][slotIndex] std::vector> m_Collections = {}; + /// Active collections std::vector m_activeCollections = {}; + /// Root podio readers std::vector> m_podioReaders = {}; + /// Podio frames std::vector> m_frames = {}; - /// Setup input + + /// + /// @brief Setup input for the podio::ROOTDataSource. + /// + /// @param[in] Number of events. + /// @return void. + /// void SetupInput(int nEvents); }; - - /** - * \brief Retrieve from podio::ROOTDataSource per-thread readers for the - * desired columns. - */ template std::vector ROOTDataSource::GetColumnReaders(std::string_view columnName) { @@ -96,8 +160,21 @@ namespace podio { return readers; } + /// + /// @brief Create RDataFrame from multiple Podio files. + /// + /// @param[in] filePathList List of file paths from which the RDataFrame + /// will be created. + /// @return RDataFrame created from input file list. + /// ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); + /// + /// @brief Create RDataFrame from a Podio file. + /// + /// @param[in] filePath File path from which the RDataFrame will be created. + /// @return RDataFrame created from input file list. + /// ROOT::RDataFrame CreateDataFrame(const std::string& filePath); } diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc index c7b90fef0..3bfaf5e3a 100644 --- a/src/ROOTDataSource.cc +++ b/src/ROOTDataSource.cc @@ -15,23 +15,14 @@ #include #include -bool loadROOTDataSource() { - return true; -} - namespace podio { - /// - /// @brief Construct the podio::ROOTDataSource from the provided file. - /// ROOTDataSource::ROOTDataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { m_filePathList.emplace_back(filePath); SetupInput(nEvents); } - /// - /// @brief Construct the podio::ROOTDataSource from the provided file list. - /// + ROOTDataSource::ROOTDataSource( const std::vector& filePathList, int nEvents) : m_nSlots{1}, @@ -39,9 +30,9 @@ namespace podio { SetupInput(nEvents); } - /// - /// @brief Setup input for the podio::ROOTDataSource. - /// + + /// @TODO Check for the existence of the file, which might be coming from web + /// or EOS. void ROOTDataSource::SetupInput(int nEvents) { // std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl; @@ -51,7 +42,6 @@ namespace podio { for (const auto& filePath : m_filePathList) { // Check if file exists - // TODO: file can be coming from web or eos // if (!std::filesystem::exists(filePath)) { // throw std::runtime_error("podio::ROOTDataSource: Provided file \"" // + filePath + "\" does not exist!"); @@ -116,9 +106,6 @@ namespace podio { } - /// - /// @brief Inform the podio::ROOTDataSource of the desired level of parallelism. - /// void ROOTDataSource::SetNSlots(unsigned int nSlots) { // std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots @@ -158,19 +145,12 @@ namespace podio { } - /// - /// @brief Inform podio::ROOTDataSource that an event-loop is about to start. - /// void ROOTDataSource::Initialize() { // std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl; } - /// - /// @brief Retrieve from podio::ROOTDataSource a set of ranges of entries that can be - /// processed concurrently. - /// std::vector> ROOTDataSource::GetEntryRanges() { // std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl; @@ -217,10 +197,6 @@ namespace podio { } - /// - /// @brief Inform podio::ROOTDataSource that a certain thread is about to start working - /// on a certain range of entries. - /// void ROOTDataSource::InitSlot([[maybe_unused]] unsigned int slot, [[maybe_unused]] ULong64_t firstEntry) { @@ -229,10 +205,6 @@ namespace podio { } - /// - /// @brief Inform podio::ROOTDataSource that a certain thread is about to start working - /// on a certain entry. - /// bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { // std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: " @@ -258,10 +230,6 @@ namespace podio { } - /// - /// @brief Inform podio::ROOTDataSource that a certain thread finished working on a - /// certain range of entries. - /// void ROOTDataSource::FinalizeSlot([[maybe_unused]] unsigned int slot) { /* @@ -280,19 +248,12 @@ namespace podio { } - /// - /// @brief Inform podio::ROOTDataSource that an event-loop finished. - /// void ROOTDataSource::Finalize() { // std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl; } - /// - /// @brief Type-erased vector of pointers to pointers to column values --- one - /// per slot - /// Record_t ROOTDataSource::GetColumnReadersImpl( std::string_view columnName, @@ -338,9 +299,6 @@ namespace podio { } - /// - /// @brief Returns a reference to the collection of the dataset's column names - /// const std::vector& ROOTDataSource::GetColumnNames() const { // std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl; @@ -348,9 +306,7 @@ namespace podio { return m_columnNames; } - /// - /// @brief Checks if the dataset has a certain column. - /// + bool ROOTDataSource::HasColumn(std::string_view columnName) const { // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName @@ -366,9 +322,6 @@ namespace podio { } - /// - /// @brief Type of a column as a string. Required for JITting. - /// std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { // std::cout << "podio::ROOTDataSource: Looking for type name of column: " @@ -387,13 +340,7 @@ namespace podio { return "float"; } - /// - /// @brief Create RDataFrame from multiple Podio files. - /// - /// @param[in] filePathList List of file paths from which the RDataFrame - /// will be created. - /// @return RDataFrame created from input file list. - /// + ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { ROOT::RDataFrame rdf(std::make_unique(filePathList)); @@ -401,17 +348,10 @@ namespace podio { return rdf; } - /// - /// @brief Create RDataFrame from a Podio file. - /// - /// @param[in] filePath File path from which the RDataFrame will be created. - /// @return RDataFrame created from input file list. - /// + ROOT::RDataFrame CreateDataFrame(const std::string& filePath) { - std::vector filePathList; - filePathList.emplace_back(filePath); - ROOT::RDataFrame rdf(std::make_unique(filePathList)); + ROOT::RDataFrame rdf(std::make_unique(filePath)); return rdf; } From f63db32a8ca0caa3975afc0a180d70329cff5cc0 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Fri, 3 May 2024 14:59:19 +0200 Subject: [PATCH 04/16] Clang format --- include/podio/ROOTDataSource.h | 301 ++++++----- src/ROOTDataSource.cc | 498 +++++++++---------- tests/root_io/read_with_rdatasource_root.cpp | 12 +- 3 files changed, 377 insertions(+), 434 deletions(-) diff --git a/include/podio/ROOTDataSource.h b/include/podio/ROOTDataSource.h index aaace54ca..f336a078b 100644 --- a/include/podio/ROOTDataSource.h +++ b/include/podio/ROOTDataSource.h @@ -2,180 +2,177 @@ #define PODIO_DATASOURCE_H__ // STL -#include #include +#include // ROOT #include #include // Podio +#include #include #include -#include namespace podio { - using Record_t = std::vector; - - class ROOTDataSource : public ROOT::RDF::RDataSource { - public: - /// - /// @brief Construct the podio::ROOTDataSource from the provided file. - /// - explicit ROOTDataSource(const std::string& filePath, int nEvents = -1); - - /// - /// @brief Construct the podio::ROOTDataSource from the provided file - /// list. - /// - explicit ROOTDataSource(const std::vector& filePathList, - int nEvents = -1); - - /// - /// @brief Inform the podio::ROOTDataSource of the desired level of - /// parallelism. - /// - void SetNSlots(unsigned int nSlots) override; - - /// - /// @brief Retrieve from podio::ROOTDataSource per-thread readers for the - /// desired columns. - /// - template - std::vector GetColumnReaders(std::string_view columnName); - - /// - /// @brief Inform podio::ROOTDataSource that an event-loop is about to - /// start. - /// - void Initialize() override; - - /// - /// @brief Retrieve from podio::ROOTDataSource a set of ranges of entries - /// that can be processed concurrently. - /// - std::vector> GetEntryRanges() override; - - /// - /// @brief Inform podio::ROOTDataSource that a certain thread is about to - /// start working on a certain range of entries. - /// - void InitSlot(unsigned int slot, ULong64_t firstEntry) override; - - /// - /// @brief Inform podio::ROOTDataSource that a certain thread is about to - /// start working on a certain entry. - /// - bool SetEntry(unsigned int slot, ULong64_t entry) override; - - /// - /// @brief Inform podio::ROOTDataSource that a certain thread finished - /// working on a certain range of entries. - /// - void FinalizeSlot(unsigned int slot) override; - - /// - /// @brief Inform podio::ROOTDataSource that an event-loop finished. - /// - void Finalize() override; - - /// - /// @brief Returns a reference to the collection of the dataset's column - /// names - /// - const std::vector& GetColumnNames() const override; - - /// - /// @brief Checks if the dataset has a certain column. - /// - bool HasColumn(std::string_view columnName) const override; - - /// - /// @brief Type of a column as a string. Required for JITting. - /// - std::string GetTypeName(std::string_view columnName) const override; - - protected: - /// - /// @brief Type-erased vector of pointers to pointers to column - /// values --- one per slot. - /// - Record_t GetColumnReadersImpl ( - std::string_view name, - const std::type_info& typeInfo) override; - - std::string AsString() override { return "Podio data source"; }; - - private: - /// Number of slots/threads - unsigned int m_nSlots = 1; - - /// Input filename - std::vector m_filePathList = {}; - - /// Total number of events - unsigned int m_nEvents = 0; - - /// Ranges of events available to be processed - std::vector> m_rangesAvailable = {}; - - /// Ranges of events available ever created - std::vector> m_rangesAll = {}; - - /// Column names - std::vector m_columnNames {}; - - /// Column types - std::vector m_columnTypes = {}; - - /// Collections, m_Collections[columnIndex][slotIndex] - std::vector> m_Collections = {}; - - /// Active collections - std::vector m_activeCollections = {}; - - /// Root podio readers - std::vector> m_podioReaders = {}; - - /// Podio frames - std::vector> m_frames = {}; - - /// - /// @brief Setup input for the podio::ROOTDataSource. - /// - /// @param[in] Number of events. - /// @return void. - /// - void SetupInput(int nEvents); - }; +using Record_t = std::vector; - template - std::vector - ROOTDataSource::GetColumnReaders(std::string_view columnName) { - std::cout << "podio::ROOTDataSource: Getting column readers for column: " - << columnName << std::endl; +class ROOTDataSource : public ROOT::RDF::RDataSource { +public: + /// + /// @brief Construct the podio::ROOTDataSource from the provided file. + /// + explicit ROOTDataSource(const std::string& filePath, int nEvents = -1); - std::vector readers; + /// + /// @brief Construct the podio::ROOTDataSource from the provided file + /// list. + /// + explicit ROOTDataSource(const std::vector& filePathList, int nEvents = -1); - return readers; - } + /// + /// @brief Inform the podio::ROOTDataSource of the desired level of + /// parallelism. + /// + void SetNSlots(unsigned int nSlots) override; /// - /// @brief Create RDataFrame from multiple Podio files. + /// @brief Retrieve from podio::ROOTDataSource per-thread readers for the + /// desired columns. /// - /// @param[in] filePathList List of file paths from which the RDataFrame - /// will be created. - /// @return RDataFrame created from input file list. + template + std::vector GetColumnReaders(std::string_view columnName); + /// - ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); + /// @brief Inform podio::ROOTDataSource that an event-loop is about to + /// start. + /// + void Initialize() override; /// - /// @brief Create RDataFrame from a Podio file. + /// @brief Retrieve from podio::ROOTDataSource a set of ranges of entries + /// that can be processed concurrently. + /// + std::vector> GetEntryRanges() override; + /// - /// @param[in] filePath File path from which the RDataFrame will be created. - /// @return RDataFrame created from input file list. + /// @brief Inform podio::ROOTDataSource that a certain thread is about to + /// start working on a certain range of entries. + /// + void InitSlot(unsigned int slot, ULong64_t firstEntry) override; + + /// + /// @brief Inform podio::ROOTDataSource that a certain thread is about to + /// start working on a certain entry. + /// + bool SetEntry(unsigned int slot, ULong64_t entry) override; + + /// + /// @brief Inform podio::ROOTDataSource that a certain thread finished + /// working on a certain range of entries. + /// + void FinalizeSlot(unsigned int slot) override; + + /// + /// @brief Inform podio::ROOTDataSource that an event-loop finished. + /// + void Finalize() override; + + /// + /// @brief Returns a reference to the collection of the dataset's column + /// names + /// + const std::vector& GetColumnNames() const override; + /// - ROOT::RDataFrame CreateDataFrame(const std::string& filePath); + /// @brief Checks if the dataset has a certain column. + /// + bool HasColumn(std::string_view columnName) const override; + + /// + /// @brief Type of a column as a string. Required for JITting. + /// + std::string GetTypeName(std::string_view columnName) const override; + +protected: + /// + /// @brief Type-erased vector of pointers to pointers to column + /// values --- one per slot. + /// + Record_t GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override; + + std::string AsString() override { + return "Podio data source"; + }; + +private: + /// Number of slots/threads + unsigned int m_nSlots = 1; + + /// Input filename + std::vector m_filePathList = {}; + + /// Total number of events + unsigned int m_nEvents = 0; + + /// Ranges of events available to be processed + std::vector> m_rangesAvailable = {}; + + /// Ranges of events available ever created + std::vector> m_rangesAll = {}; + + /// Column names + std::vector m_columnNames{}; + + /// Column types + std::vector m_columnTypes = {}; + + /// Collections, m_Collections[columnIndex][slotIndex] + std::vector> m_Collections = {}; + + /// Active collections + std::vector m_activeCollections = {}; + + /// Root podio readers + std::vector> m_podioReaders = {}; + + /// Podio frames + std::vector> m_frames = {}; + + /// + /// @brief Setup input for the podio::ROOTDataSource. + /// + /// @param[in] Number of events. + /// @return void. + /// + void SetupInput(int nEvents); +}; + +template +std::vector ROOTDataSource::GetColumnReaders(std::string_view columnName) { + std::cout << "podio::ROOTDataSource: Getting column readers for column: " << columnName << std::endl; + + std::vector readers; + + return readers; } +/// +/// @brief Create RDataFrame from multiple Podio files. +/// +/// @param[in] filePathList List of file paths from which the RDataFrame +/// will be created. +/// @return RDataFrame created from input file list. +/// +ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); + +/// +/// @brief Create RDataFrame from a Podio file. +/// +/// @param[in] filePath File path from which the RDataFrame will be created. +/// @return RDataFrame created from input file list. +/// +ROOT::RDataFrame CreateDataFrame(const std::string& filePath); +} // namespace podio + #endif /* PODIO_DATASOURCE_H__ */ diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc index 3bfaf5e3a..9ee09cc53 100644 --- a/src/ROOTDataSource.cc +++ b/src/ROOTDataSource.cc @@ -3,9 +3,9 @@ // STL #include #include -#include -#include #include +#include +#include #include // ROOT @@ -16,343 +16,293 @@ #include namespace podio { - ROOTDataSource::ROOTDataSource(const std::string& filePath, - int nEvents) : m_nSlots{1} { - m_filePathList.emplace_back(filePath); - SetupInput(nEvents); - } - +ROOTDataSource::ROOTDataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { + m_filePathList.emplace_back(filePath); + SetupInput(nEvents); +} - ROOTDataSource::ROOTDataSource( - const std::vector& filePathList, - int nEvents) : m_nSlots{1}, - m_filePathList{filePathList} { - SetupInput(nEvents); - } +ROOTDataSource::ROOTDataSource(const std::vector& filePathList, int nEvents) : + m_nSlots{1}, m_filePathList{filePathList} { + SetupInput(nEvents); +} +/// @TODO Check for the existence of the file, which might be coming from web +/// or EOS. +void ROOTDataSource::SetupInput(int nEvents) { + // std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl; - /// @TODO Check for the existence of the file, which might be coming from web - /// or EOS. - void ROOTDataSource::SetupInput(int nEvents) { - // std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl; + if (m_filePathList.empty()) { + throw std::runtime_error("podio::ROOTDataSource: No input files provided!"); + } - if (m_filePathList.empty()) { - throw std::runtime_error("podio::ROOTDataSource: No input files provided!"); + for (const auto& filePath : m_filePathList) { + // Check if file exists + // if (!std::filesystem::exists(filePath)) { + // throw std::runtime_error("podio::ROOTDataSource: Provided file \"" + // + filePath + "\" does not exist!"); + // } + + // Check if the provided file contains required metadata + std::unique_ptr inFile(TFile::Open(filePath.data(), "READ")); + auto metadata = inFile->Get("podio_metadata"); + if (!metadata) { + throw std::runtime_error("podio::ROOTDataSource: Provided file is missing podio metadata!"); } + } + // Create probing frame + podio::Frame frame; + unsigned int nEventsInFiles = 0; + podio::ROOTReader podioReader; + podioReader.openFiles(m_filePathList); + nEventsInFiles = podioReader.getEntries("events"); + frame = podio::Frame(podioReader.readEntry("events", 0)); + + // Determine over how many events to run + if (nEventsInFiles > 0) { + /* + std::cout << "podio::ROOTDataSource: Found " << nEventsInFiles + << " events in files: \n"; for (const auto& filePath : m_filePathList) { - // Check if file exists - // if (!std::filesystem::exists(filePath)) { - // throw std::runtime_error("podio::ROOTDataSource: Provided file \"" - // + filePath + "\" does not exist!"); - // } - - // Check if the provided file contains required metadata - std::unique_ptr inFile(TFile::Open(filePath.data(), "READ")); - auto metadata = inFile->Get("podio_metadata"); - if (!metadata) { - throw std::runtime_error( - "podio::ROOTDataSource: Provided file is missing podio metadata!"); - } + std::cout << " - " << filePath << "\n"; } + */ + } else { + throw std::runtime_error("podio::ROOTDataSource: No events found!"); + } - // Create probing frame - podio::Frame frame; - unsigned int nEventsInFiles = 0; - podio::ROOTReader podioReader; - podioReader.openFiles(m_filePathList); - nEventsInFiles = podioReader.getEntries("events"); - frame = podio::Frame(podioReader.readEntry("events", 0)); - - // Determine over how many events to run - if (nEventsInFiles > 0) { - /* - std::cout << "podio::ROOTDataSource: Found " << nEventsInFiles - << " events in files: \n"; - for (const auto& filePath : m_filePathList) { - std::cout << " - " << filePath << "\n"; - } - */ - } else { - throw std::runtime_error("podio::ROOTDataSource: No events found!"); - } + if (nEvents < 0) { + m_nEvents = nEventsInFiles; + } else if (nEvents == 0) { + throw std::runtime_error("podio::ROOTDataSource: Requested to run over zero events!"); + } else { + m_nEvents = nEvents; + } + if (nEventsInFiles < m_nEvents) { + m_nEvents = nEventsInFiles; + } - if (nEvents < 0) { - m_nEvents = nEventsInFiles; - } else if (nEvents == 0) { - throw std::runtime_error( - "podio::ROOTDataSource: Requested to run over zero events!"); - } else { - m_nEvents = nEvents; - } - if (nEventsInFiles < m_nEvents) { - m_nEvents = nEventsInFiles; + // std::cout << "podio::ROOTDataSource: Running over " << m_nEvents << " events." + // << std::endl; + + // Get collections stored in the files + std::vector collNames = frame.getAvailableCollections(); + // std::cout << "podio::ROOTDataSource: Found following collections:\n"; + for (auto& collName : collNames) { + const podio::CollectionBase* coll = frame.get(collName); + if (coll->isValid()) { + m_columnNames.emplace_back(collName); + m_columnTypes.emplace_back(coll->getValueTypeName()); + // std::cout << " - " << collName << "\n"; } + } +} - // std::cout << "podio::ROOTDataSource: Running over " << m_nEvents << " events." - // << std::endl; - - // Get collections stored in the files - std::vector collNames = frame.getAvailableCollections(); - // std::cout << "podio::ROOTDataSource: Found following collections:\n"; - for (auto& collName: collNames) { - const podio::CollectionBase* coll = frame.get(collName); - if (coll->isValid()) { - m_columnNames.emplace_back(collName); - m_columnTypes.emplace_back(coll->getValueTypeName()); - // std::cout << " - " << collName << "\n"; - } - } +void ROOTDataSource::SetNSlots(unsigned int nSlots) { + // std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots + // << std::endl; + m_nSlots = nSlots; + + if (m_nSlots > m_nEvents) { + throw std::runtime_error("podio::ROOTDataSource: Number of events too small!"); } + int eventsPerSlot = m_nEvents / m_nSlots; + for (size_t i = 0; i < (m_nSlots - 1); ++i) { + m_rangesAll.emplace_back(eventsPerSlot * i, eventsPerSlot * (i + 1)); + } + m_rangesAll.emplace_back(eventsPerSlot * (m_nSlots - 1), m_nEvents); + m_rangesAvailable = m_rangesAll; - void - ROOTDataSource::SetNSlots(unsigned int nSlots) { - // std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots - // << std::endl; - m_nSlots = nSlots; + // Initialize set of addresses needed + m_Collections.resize(m_columnNames.size(), std::vector(m_nSlots, nullptr)); - if (m_nSlots > m_nEvents) { - throw std::runtime_error("podio::ROOTDataSource: Number of events too small!"); - } + // Initialize podio readers + for (size_t i = 0; i < m_nSlots; ++i) { + m_podioReaders.emplace_back(std::make_unique()); + } - int eventsPerSlot = m_nEvents / m_nSlots; - for (size_t i = 0; i < (m_nSlots - 1); ++i) { - m_rangesAll.emplace_back(eventsPerSlot * i, eventsPerSlot * (i + 1)); - } - m_rangesAll.emplace_back(eventsPerSlot * (m_nSlots - 1), m_nEvents); - m_rangesAvailable = m_rangesAll; + for (size_t i = 0; i < m_nSlots; ++i) { + m_podioReaders[i]->openFiles(m_filePathList); + } - // Initialize set of addresses needed - m_Collections.resize( - m_columnNames.size(), - std::vector(m_nSlots, nullptr)); + for (size_t i = 0; i < m_nSlots; ++i) { + m_frames.emplace_back(std::make_unique(podio::Frame(m_podioReaders[i]->readEntry("events", 0)))); + } +} - // Initialize podio readers - for (size_t i = 0; i < m_nSlots; ++i) { - m_podioReaders.emplace_back(std::make_unique()); - } +void ROOTDataSource::Initialize() { + // std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl; +} - for (size_t i = 0; i < m_nSlots; ++i) { - m_podioReaders[i]->openFiles(m_filePathList); - } +std::vector> ROOTDataSource::GetEntryRanges() { + // std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl; - for (size_t i = 0; i < m_nSlots; ++i) { - m_frames.emplace_back( - std::make_unique( - podio::Frame(m_podioReaders[i]->readEntry("events", 0)))); + std::vector> rangesToBeProcessed; + for (auto& range : m_rangesAvailable) { + rangesToBeProcessed.emplace_back(std::pair{range.first, range.second}); + if (rangesToBeProcessed.size() >= m_nSlots) { + break; } } - - void - ROOTDataSource::Initialize() { - // std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl; + if (m_rangesAvailable.size() > m_nSlots) { + m_rangesAvailable.erase(m_rangesAvailable.begin(), m_rangesAvailable.begin() + m_nSlots); + } else { + m_rangesAvailable.erase(m_rangesAvailable.begin(), m_rangesAvailable.end()); } + /* + std::cout << "podio::ROOTDataSource: Ranges to be processed:\n"; + for (auto& range: rangesToBeProcessed) { + std::cout << " {" << range.first << ", " << range.second + << "}\n"; + } - std::vector> - ROOTDataSource::GetEntryRanges() { - // std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl; + if (m_rangesAvailable.size() > 0) { - std::vector> rangesToBeProcessed; + std::cout << "podio::ROOTDataSource: Ranges remaining:\n"; for (auto& range: m_rangesAvailable) { - rangesToBeProcessed.emplace_back( - std::pair{range.first, range.second} - ); - if (rangesToBeProcessed.size() >= m_nSlots) { - break; - } - } - - if (m_rangesAvailable.size() > m_nSlots) { - m_rangesAvailable.erase(m_rangesAvailable.begin(), - m_rangesAvailable.begin() + m_nSlots); - } else { - m_rangesAvailable.erase(m_rangesAvailable.begin(), - m_rangesAvailable.end()); - } - - - /* - std::cout << "podio::ROOTDataSource: Ranges to be processed:\n"; - for (auto& range: rangesToBeProcessed) { std::cout << " {" << range.first << ", " << range.second << "}\n"; } - - if (m_rangesAvailable.size() > 0) { - - std::cout << "podio::ROOTDataSource: Ranges remaining:\n"; - for (auto& range: m_rangesAvailable) { - std::cout << " {" << range.first << ", " << range.second - << "}\n"; - } - } else { - std::cout << "podio::ROOTDataSource: No more remaining ranges.\n"; - } - */ - - return rangesToBeProcessed; + } else { + std::cout << "podio::ROOTDataSource: No more remaining ranges.\n"; } + */ + return rangesToBeProcessed; +} - void - ROOTDataSource::InitSlot([[maybe_unused]] unsigned int slot, - [[maybe_unused]] ULong64_t firstEntry) { - // std::cout << "podio::ROOTDataSource: Initializing slot: " << slot - // << " with first entry " << firstEntry << std::endl; - } - +void ROOTDataSource::InitSlot([[maybe_unused]] unsigned int slot, [[maybe_unused]] ULong64_t firstEntry) { + // std::cout << "podio::ROOTDataSource: Initializing slot: " << slot + // << " with first entry " << firstEntry << std::endl; +} - bool - ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { - // std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: " - // << entry << std::endl; - - m_frames[slot] = std::make_unique( - podio::Frame(m_podioReaders[slot]->readEntry("events", entry))); - - for (auto& collectionIndex: m_activeCollections) { - m_Collections[collectionIndex][slot] = - m_frames[slot]->get(m_columnNames.at(collectionIndex)); - /* - std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; - if (m_Collections[collectionIndex][slot]->isValid()) { - std::cout << "Collection valid\n"; - } - */ - } +bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { + // std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: " + // << entry << std::endl; - return true; - } + m_frames[slot] = std::make_unique(podio::Frame(m_podioReaders[slot]->readEntry("events", entry))); - - void - ROOTDataSource::FinalizeSlot([[maybe_unused]] unsigned int slot) { + for (auto& collectionIndex : m_activeCollections) { + m_Collections[collectionIndex][slot] = m_frames[slot]->get(m_columnNames.at(collectionIndex)); /* - std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; - std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; - - for (auto& collectionIndex: m_activeCollections) { - std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - if (m_Collections[collectionIndex][slot]->isValid()) { - std::cout << "Collection valid\n"; - } - std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; + std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; + std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; + std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; + if (m_Collections[collectionIndex][slot]->isValid()) { + std::cout << "Collection valid\n"; } */ } + return true; +} + +void ROOTDataSource::FinalizeSlot([[maybe_unused]] unsigned int slot) { + /* + std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; + std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; - void - ROOTDataSource::Finalize() { - // std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl; + for (auto& collectionIndex: m_activeCollections) { + std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; + std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; + if (m_Collections[collectionIndex][slot]->isValid()) { + std::cout << "Collection valid\n"; + } + std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; } + */ +} +void ROOTDataSource::Finalize() { + // std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl; +} - Record_t - ROOTDataSource::GetColumnReadersImpl( - std::string_view columnName, - [[maybe_unused]] const std::type_info& typeInfo) { - /* - std::cout << "podio::ROOTDataSource: Getting column reader implementation for column:\n" - << " " << columnName - << "\n with type: " << typeInfo.name() << std::endl; - */ +Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, + [[maybe_unused]] const std::type_info& typeInfo) { + /* + std::cout << "podio::ROOTDataSource: Getting column reader implementation for column:\n" + << " " << columnName + << "\n with type: " << typeInfo.name() << std::endl; + */ + + auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); + if (itr == m_columnNames.end()) { + std::string errMsg = "podio::ROOTDataSource: Can't find requested column \""; + errMsg += columnName; + errMsg += "\"!"; + throw std::runtime_error(errMsg); + } + auto columnIndex = std::distance(m_columnNames.begin(), itr); + m_activeCollections.emplace_back(columnIndex); + /* + std::cout << "podio::ROOTDataSource: Active collections so far:\n" + << " "; + for (auto& i: m_activeCollections) { + std::cout << i << ", "; + } + std::cout << std::endl; + */ - auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), - columnName); - if (itr == m_columnNames.end()) { - std::string errMsg = "podio::ROOTDataSource: Can't find requested column \""; - errMsg += columnName; - errMsg += "\"!"; - throw std::runtime_error(errMsg); - } - auto columnIndex = std::distance(m_columnNames.begin(), itr); - m_activeCollections.emplace_back(columnIndex); + Record_t columnReaders(m_nSlots); + for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { /* - std::cout << "podio::ROOTDataSource: Active collections so far:\n" - << " "; - for (auto& i: m_activeCollections) { - std::cout << i << ", "; - } - std::cout << std::endl; + std::cout << " Column index: " << columnIndex << "\n"; + std::cout << " Slot index: " << slotIndex << "\n"; + std::cout << " Address: " + << &m_Collections[columnIndex][slotIndex] + << std::endl; */ - - Record_t columnReaders(m_nSlots); - for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { - /* - std::cout << " Column index: " << columnIndex << "\n"; - std::cout << " Slot index: " << slotIndex << "\n"; - std::cout << " Address: " - << &m_Collections[columnIndex][slotIndex] - << std::endl; - */ - columnReaders[slotIndex] = (void*) &m_Collections[columnIndex][slotIndex]; - } - - return columnReaders; + columnReaders[slotIndex] = (void*)&m_Collections[columnIndex][slotIndex]; } + return columnReaders; +} - const std::vector& - ROOTDataSource::GetColumnNames() const { - // std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl; - - return m_columnNames; - } - +const std::vector& ROOTDataSource::GetColumnNames() const { + // std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl; - bool - ROOTDataSource::HasColumn(std::string_view columnName) const { - // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName - // << std::endl; + return m_columnNames; +} - if (std::find(m_columnNames.begin(), - m_columnNames.end(), - columnName) != m_columnNames.end()) { - return true; - } +bool ROOTDataSource::HasColumn(std::string_view columnName) const { + // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName + // << std::endl; - return false; + if (std::find(m_columnNames.begin(), m_columnNames.end(), columnName) != m_columnNames.end()) { + return true; } + return false; +} - std::string - ROOTDataSource::GetTypeName(std::string_view columnName) const { - // std::cout << "podio::ROOTDataSource: Looking for type name of column: " - // << columnName << std::endl; - - auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), - columnName); - if (itr != m_columnNames.end()) { - auto i = std::distance(m_columnNames.begin(), itr); - // std::cout << "podio::ROOTDataSource: Found type name: " - // << m_columnTypes.at(i) << std::endl; +std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { + // std::cout << "podio::ROOTDataSource: Looking for type name of column: " + // << columnName << std::endl; - return m_columnTypes.at(i) + "Collection"; - } + auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); + if (itr != m_columnNames.end()) { + auto i = std::distance(m_columnNames.begin(), itr); + // std::cout << "podio::ROOTDataSource: Found type name: " + // << m_columnTypes.at(i) << std::endl; - return "float"; + return m_columnTypes.at(i) + "Collection"; } + return "float"; +} - ROOT::RDataFrame - CreateDataFrame(const std::vector& filePathList) { - ROOT::RDataFrame rdf(std::make_unique(filePathList)); - - return rdf; - } +ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { + ROOT::RDataFrame rdf(std::make_unique(filePathList)); + return rdf; +} - ROOT::RDataFrame - CreateDataFrame(const std::string& filePath) { - ROOT::RDataFrame rdf(std::make_unique(filePath)); +ROOT::RDataFrame CreateDataFrame(const std::string& filePath) { + ROOT::RDataFrame rdf(std::make_unique(filePath)); - return rdf; - } + return rdf; } +} // namespace podio diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp index 1933e6c83..bb7e220d6 100644 --- a/tests/root_io/read_with_rdatasource_root.cpp +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -1,25 +1,22 @@ #include "read_frame.h" #include "read_frame_auxiliary.h" -#include "podio/ROOTDataSource.h" #include "datamodel/ExampleClusterCollection.h" +#include "podio/ROOTDataSource.h" #include #include - -ROOT::VecOps::RVec -getEnergy(const ExampleClusterCollection& inColl) { +ROOT::VecOps::RVec getEnergy(const ExampleClusterCollection& inColl) { ROOT::VecOps::RVec result; - for (const auto& cluster: inColl) { + for (const auto& cluster : inColl) { result.push_back(cluster.energy()); } return result; } - int main(int argc, const char* argv[]) { std::string inputFile = "example_frame.root"; if (argc == 2) { @@ -34,8 +31,7 @@ int main(int argc, const char* argv[]) { dframe.Describe().Print(); std::cout << std::endl; - auto cluterEnergy = dframe.Define("cluster_energy", getEnergy, {"clusters"}) - .Histo1D("cluster_energy"); + auto cluterEnergy = dframe.Define("cluster_energy", getEnergy, {"clusters"}).Histo1D("cluster_energy"); cluterEnergy->Print(); return EXIT_SUCCESS; From de92a7b6db57d55041c95a667a310ec8f8d0af5a Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Mon, 6 May 2024 15:36:01 +0200 Subject: [PATCH 05/16] Adding podio::ROOTDataSource class to the rootmap --- src/ROOTDataSource.cc | 2 +- src/root_selection.xml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc index 9ee09cc53..9d00ad85a 100644 --- a/src/ROOTDataSource.cc +++ b/src/ROOTDataSource.cc @@ -127,7 +127,7 @@ void ROOTDataSource::SetNSlots(unsigned int nSlots) { } for (size_t i = 0; i < m_nSlots; ++i) { - m_frames.emplace_back(std::make_unique(podio::Frame(m_podioReaders[i]->readEntry("events", 0)))); + m_frames.emplace_back(std::make_unique(podio::Frame{})); } } diff --git a/src/root_selection.xml b/src/root_selection.xml index 38db949c0..4accb24a3 100644 --- a/src/root_selection.xml +++ b/src/root_selection.xml @@ -5,5 +5,6 @@ + From 6d5cfbaab884e6833537eeabe492e7395b6c0d68 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Tue, 23 Jul 2024 15:39:01 +0200 Subject: [PATCH 06/16] Minor adjustments --- include/podio/ROOTDataSource.h | 23 +++++++++++------ src/ROOTDataSource.cc | 47 +++++++++++++--------------------- 2 files changed, 33 insertions(+), 37 deletions(-) diff --git a/include/podio/ROOTDataSource.h b/include/podio/ROOTDataSource.h index f336a078b..d9f753581 100644 --- a/include/podio/ROOTDataSource.h +++ b/include/podio/ROOTDataSource.h @@ -1,18 +1,22 @@ #ifndef PODIO_DATASOURCE_H__ #define PODIO_DATASOURCE_H__ -// STL -#include -#include +// Podio +#include +#include +#include // ROOT #include #include -// Podio -#include -#include -#include +// STL +#include +#include +#include +#include +#include + namespace podio { using Record_t = std::vector; @@ -148,9 +152,12 @@ class ROOTDataSource : public ROOT::RDF::RDataSource { void SetupInput(int nEvents); }; +/// +/// Not used. +/// template std::vector ROOTDataSource::GetColumnReaders(std::string_view columnName) { - std::cout << "podio::ROOTDataSource: Getting column readers for column: " << columnName << std::endl; + // std::cout << "podio::ROOTDataSource: Getting column readers for column: " << columnName << std::endl; std::vector readers; diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc index 9d00ad85a..37aad6d67 100644 --- a/src/ROOTDataSource.cc +++ b/src/ROOTDataSource.cc @@ -35,20 +35,8 @@ void ROOTDataSource::SetupInput(int nEvents) { throw std::runtime_error("podio::ROOTDataSource: No input files provided!"); } - for (const auto& filePath : m_filePathList) { - // Check if file exists - // if (!std::filesystem::exists(filePath)) { - // throw std::runtime_error("podio::ROOTDataSource: Provided file \"" - // + filePath + "\" does not exist!"); - // } - - // Check if the provided file contains required metadata - std::unique_ptr inFile(TFile::Open(filePath.data(), "READ")); - auto metadata = inFile->Get("podio_metadata"); - if (!metadata) { - throw std::runtime_error("podio::ROOTDataSource: Provided file is missing podio metadata!"); - } - } + // Check if the provided file(s) exists and contains required metadata is done + // inside ROOTReader::openFile // Create probing frame podio::Frame frame; @@ -174,7 +162,7 @@ std::vector> ROOTDataSource::GetEntryRanges() { return rangesToBeProcessed; } -void ROOTDataSource::InitSlot([[maybe_unused]] unsigned int slot, [[maybe_unused]] ULong64_t firstEntry) { +void ROOTDataSource::InitSlot(unsigned int, ULong64_t) { // std::cout << "podio::ROOTDataSource: Initializing slot: " << slot // << " with first entry " << firstEntry << std::endl; } @@ -200,7 +188,7 @@ bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { return true; } -void ROOTDataSource::FinalizeSlot([[maybe_unused]] unsigned int slot) { +void ROOTDataSource::FinalizeSlot(unsigned int) { /* std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; @@ -221,7 +209,7 @@ void ROOTDataSource::Finalize() { } Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, - [[maybe_unused]] const std::type_info& typeInfo) { + const std::type_info&) { /* std::cout << "podio::ROOTDataSource: Getting column reader implementation for column:\n" << " " << columnName @@ -271,29 +259,30 @@ bool ROOTDataSource::HasColumn(std::string_view columnName) const { // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName // << std::endl; - if (std::find(m_columnNames.begin(), m_columnNames.end(), columnName) != m_columnNames.end()) { - return true; - } - - return false; + return std::find(m_columnNames.begin(), m_columnNames.end(), columnName) != m_columnNames.end(); } + std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { // std::cout << "podio::ROOTDataSource: Looking for type name of column: " // << columnName << std::endl; auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); - if (itr != m_columnNames.end()) { - auto i = std::distance(m_columnNames.begin(), itr); - // std::cout << "podio::ROOTDataSource: Found type name: " - // << m_columnTypes.at(i) << std::endl; - - return m_columnTypes.at(i) + "Collection"; + if (itr == m_columnNames.end()) { + std::string errMsg = "podio::ROOTDataSource: Type name for \""; + errMsg += columnName; + errMsg += "\" not found!"; + throw std::runtime_error(errMsg); } - return "float"; + auto typeIndex = std::distance(m_columnNames.begin(), itr); + // std::cout << "podio::ROOTDataSource: Found type name: " + // << m_columnTypes.at(typeIndex) << std::endl; + + return m_columnTypes.at(typeIndex) + "Collection"; } + ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { ROOT::RDataFrame rdf(std::make_unique(filePathList)); From 12858beecbf3b66db00c39daa71116994a78e7bd Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Wed, 24 Jul 2024 09:57:56 +0200 Subject: [PATCH 07/16] Another set of small adjustments --- CMakeLists.txt | 3 + include/podio/ROOTDataSource.h | 17 +- src/CMakeLists.txt | 3 +- src/ROOTDataSource.cc | 147 +++--------------- tests/root_io/CMakeLists.txt | 3 +- tests/root_io/read_with_rdatasource_root.cpp | 5 +- tests/schema_evolution/root_io/CMakeLists.txt | 4 +- tests/unittests/CMakeLists.txt | 2 +- 8 files changed, 37 insertions(+), 147 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index caa4e3c23..8f4ce684d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,6 +64,9 @@ option(FORCE_RUN_ALL_TESTS "Run all the tests even those with known problems" OF option(CLANG_TIDY "Run clang-tidy after compilation." OFF) ADD_CLANG_TIDY() +# Export compile commands --- used by the tools from clang family +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + #--- Declare options ----------------------------------------------------------- option(CREATE_DOC "Whether or not to create doxygen doc target." OFF) option(ENABLE_SIO "Build SIO I/O support" OFF) diff --git a/include/podio/ROOTDataSource.h b/include/podio/ROOTDataSource.h index d9f753581..9f0fc6b95 100644 --- a/include/podio/ROOTDataSource.h +++ b/include/podio/ROOTDataSource.h @@ -4,23 +4,20 @@ // Podio #include #include -#include +#include // ROOT #include #include // STL +#include #include -#include -#include #include -#include - +#include +#include namespace podio { -using Record_t = std::vector; - class ROOTDataSource : public ROOT::RDF::RDataSource { public: /// @@ -103,7 +100,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource { /// @brief Type-erased vector of pointers to pointers to column /// values --- one per slot. /// - Record_t GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override; + std::vector GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override; std::string AsString() override { return "Podio data source"; @@ -138,7 +135,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource { std::vector m_activeCollections = {}; /// Root podio readers - std::vector> m_podioReaders = {}; + std::vector> m_podioReaders = {}; /// Podio frames std::vector> m_frames = {}; @@ -156,7 +153,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource { /// Not used. /// template -std::vector ROOTDataSource::GetColumnReaders(std::string_view columnName) { +std::vector ROOTDataSource::GetColumnReaders(std::string_view) { // std::cout << "podio::ROOTDataSource: Getting column readers for column: " << columnName << std::endl; std::vector readers; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 411408ac8..d701de049 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,7 +33,8 @@ FUNCTION(PODIO_ADD_LIB_AND_DICT libname headers sources selection ) target_include_directories(${dictname} PUBLIC $ $) - target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio ROOT::Core ROOT::Tree) + target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio + podio::podioIO ROOT::Core ROOT::Tree) if(ENABLE_RNTUPLE) target_link_libraries(${dictname} PUBLIC ROOT::ROOTNTuple) endif() diff --git a/src/ROOTDataSource.cc b/src/ROOTDataSource.cc index 37aad6d67..74f4a97ec 100644 --- a/src/ROOTDataSource.cc +++ b/src/ROOTDataSource.cc @@ -1,19 +1,16 @@ #include "podio/ROOTDataSource.h" +#include "podio/Reader.h" -// STL -#include -#include -#include -#include -#include -#include +// podio +#include // ROOT #include -// podio -#include -#include +// STL +#include +#include +#include namespace podio { ROOTDataSource::ROOTDataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { @@ -26,35 +23,23 @@ ROOTDataSource::ROOTDataSource(const std::vector& filePathList, int SetupInput(nEvents); } -/// @TODO Check for the existence of the file, which might be coming from web -/// or EOS. void ROOTDataSource::SetupInput(int nEvents) { - // std::cout << "podio::ROOTDataSource: Constructing the source ..." << std::endl; - if (m_filePathList.empty()) { throw std::runtime_error("podio::ROOTDataSource: No input files provided!"); } - // Check if the provided file(s) exists and contains required metadata is done - // inside ROOTReader::openFile + // Check if the provided file(s) exists and contain required metadata is done + // by podio::Reader // Create probing frame podio::Frame frame; unsigned int nEventsInFiles = 0; - podio::ROOTReader podioReader; - podioReader.openFiles(m_filePathList); - nEventsInFiles = podioReader.getEntries("events"); - frame = podio::Frame(podioReader.readEntry("events", 0)); + auto podioReader = podio::makeReader(m_filePathList); + nEventsInFiles = podioReader.getEntries(podio::Category::Event); + frame = podio::Frame(podioReader.readFrame(podio::Category::Event, 0)); // Determine over how many events to run if (nEventsInFiles > 0) { - /* - std::cout << "podio::ROOTDataSource: Found " << nEventsInFiles - << " events in files: \n"; - for (const auto& filePath : m_filePathList) { - std::cout << " - " << filePath << "\n"; - } - */ } else { throw std::runtime_error("podio::ROOTDataSource: No events found!"); } @@ -70,25 +55,18 @@ void ROOTDataSource::SetupInput(int nEvents) { m_nEvents = nEventsInFiles; } - // std::cout << "podio::ROOTDataSource: Running over " << m_nEvents << " events." - // << std::endl; - // Get collections stored in the files std::vector collNames = frame.getAvailableCollections(); - // std::cout << "podio::ROOTDataSource: Found following collections:\n"; - for (auto& collName : collNames) { + for (const auto& collName : collNames) { const podio::CollectionBase* coll = frame.get(collName); if (coll->isValid()) { m_columnNames.emplace_back(collName); - m_columnTypes.emplace_back(coll->getValueTypeName()); - // std::cout << " - " << collName << "\n"; + m_columnTypes.emplace_back(coll->getTypeName()); } } } void ROOTDataSource::SetNSlots(unsigned int nSlots) { - // std::cout << "podio::ROOTDataSource: Setting num. of slots to: " << nSlots - // << std::endl; m_nSlots = nSlots; if (m_nSlots > m_nEvents) { @@ -107,11 +85,7 @@ void ROOTDataSource::SetNSlots(unsigned int nSlots) { // Initialize podio readers for (size_t i = 0; i < m_nSlots; ++i) { - m_podioReaders.emplace_back(std::make_unique()); - } - - for (size_t i = 0; i < m_nSlots; ++i) { - m_podioReaders[i]->openFiles(m_filePathList); + m_podioReaders.emplace_back(std::make_unique(podio::makeReader(m_filePathList))); } for (size_t i = 0; i < m_nSlots; ++i) { @@ -120,15 +94,12 @@ void ROOTDataSource::SetNSlots(unsigned int nSlots) { } void ROOTDataSource::Initialize() { - // std::cout << "podio::ROOTDataSource: Initializing the source ..." << std::endl; } std::vector> ROOTDataSource::GetEntryRanges() { - // std::cout << "podio::ROOTDataSource: Getting entry ranges ..." << std::endl; - std::vector> rangesToBeProcessed; for (auto& range : m_rangesAvailable) { - rangesToBeProcessed.emplace_back(std::pair{range.first, range.second}); + rangesToBeProcessed.emplace_back(range.first, range.second); if (rangesToBeProcessed.size() >= m_nSlots) { break; } @@ -140,82 +111,29 @@ std::vector> ROOTDataSource::GetEntryRanges() { m_rangesAvailable.erase(m_rangesAvailable.begin(), m_rangesAvailable.end()); } - /* - std::cout << "podio::ROOTDataSource: Ranges to be processed:\n"; - for (auto& range: rangesToBeProcessed) { - std::cout << " {" << range.first << ", " << range.second - << "}\n"; - } - - if (m_rangesAvailable.size() > 0) { - - std::cout << "podio::ROOTDataSource: Ranges remaining:\n"; - for (auto& range: m_rangesAvailable) { - std::cout << " {" << range.first << ", " << range.second - << "}\n"; - } - } else { - std::cout << "podio::ROOTDataSource: No more remaining ranges.\n"; - } - */ - return rangesToBeProcessed; } void ROOTDataSource::InitSlot(unsigned int, ULong64_t) { - // std::cout << "podio::ROOTDataSource: Initializing slot: " << slot - // << " with first entry " << firstEntry << std::endl; } bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { - // std::cout << "podio::ROOTDataSource: In slot: " << slot << ", setting entry: " - // << entry << std::endl; - - m_frames[slot] = std::make_unique(podio::Frame(m_podioReaders[slot]->readEntry("events", entry))); + m_frames[slot] = std::make_unique(m_podioReaders[slot]->readFrame(podio::Category::Event, entry)); for (auto& collectionIndex : m_activeCollections) { m_Collections[collectionIndex][slot] = m_frames[slot]->get(m_columnNames.at(collectionIndex)); - /* - std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; - if (m_Collections[collectionIndex][slot]->isValid()) { - std::cout << "Collection valid\n"; - } - */ } return true; } void ROOTDataSource::FinalizeSlot(unsigned int) { - /* - std::cout << "podio::ROOTDataSource: Finalizing slot: " << slot << std::endl; - std::cout << "Reader: " << &m_podioReaderRefs[slot].get() << std::endl; - - for (auto& collectionIndex: m_activeCollections) { - std::cout << "CollName: " << m_columnNames.at(collectionIndex) << "\n"; - std::cout << "Address: " << m_Collections[collectionIndex][slot] << "\n"; - if (m_Collections[collectionIndex][slot]->isValid()) { - std::cout << "Collection valid\n"; - } - std::cout << "Coll size: " << m_Collections[collectionIndex][slot]->size() << "\n"; - } - */ } void ROOTDataSource::Finalize() { - // std::cout << "podio::ROOTDataSource: Finalizing ..." << std::endl; } -Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, - const std::type_info&) { - /* - std::cout << "podio::ROOTDataSource: Getting column reader implementation for column:\n" - << " " << columnName - << "\n with type: " << typeInfo.name() << std::endl; - */ - +std::vector ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, const std::type_info&) { auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); if (itr == m_columnNames.end()) { std::string errMsg = "podio::ROOTDataSource: Can't find requested column \""; @@ -225,24 +143,9 @@ Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, } auto columnIndex = std::distance(m_columnNames.begin(), itr); m_activeCollections.emplace_back(columnIndex); - /* - std::cout << "podio::ROOTDataSource: Active collections so far:\n" - << " "; - for (auto& i: m_activeCollections) { - std::cout << i << ", "; - } - std::cout << std::endl; - */ Record_t columnReaders(m_nSlots); for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { - /* - std::cout << " Column index: " << columnIndex << "\n"; - std::cout << " Slot index: " << slotIndex << "\n"; - std::cout << " Address: " - << &m_Collections[columnIndex][slotIndex] - << std::endl; - */ columnReaders[slotIndex] = (void*)&m_Collections[columnIndex][slotIndex]; } @@ -250,23 +153,14 @@ Record_t ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, } const std::vector& ROOTDataSource::GetColumnNames() const { - // std::cout << "podio::ROOTDataSource: Looking for column names" << std::endl; - return m_columnNames; } bool ROOTDataSource::HasColumn(std::string_view columnName) const { - // std::cout << "podio::ROOTDataSource: Looking for column: " << columnName - // << std::endl; - return std::find(m_columnNames.begin(), m_columnNames.end(), columnName) != m_columnNames.end(); } - std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { - // std::cout << "podio::ROOTDataSource: Looking for type name of column: " - // << columnName << std::endl; - auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); if (itr == m_columnNames.end()) { std::string errMsg = "podio::ROOTDataSource: Type name for \""; @@ -276,13 +170,10 @@ std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { } auto typeIndex = std::distance(m_columnNames.begin(), itr); - // std::cout << "podio::ROOTDataSource: Found type name: " - // << m_columnTypes.at(typeIndex) << std::endl; - return m_columnTypes.at(typeIndex) + "Collection"; + return m_columnTypes.at(typeIndex); } - ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { ROOT::RDataFrame rdf(std::make_unique(filePathList)); diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 3936a6cc5..5ea612a76 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -20,7 +20,7 @@ if(ENABLE_RNTUPLE) read_interface_rntuple.cpp ) endif() -set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO podio::podioIO) +set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioIO podio::podioRootIO) foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") endforeach() @@ -31,6 +31,7 @@ set_tests_properties( read_frame_root read_frame_root_multiple read_and_write_frame_root + read_with_rdatasource_root PROPERTIES DEPENDS write_frame_root diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp index bb7e220d6..b82a56c67 100644 --- a/tests/root_io/read_with_rdatasource_root.cpp +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -1,8 +1,5 @@ -#include "read_frame.h" -#include "read_frame_auxiliary.h" - -#include "datamodel/ExampleClusterCollection.h" #include "podio/ROOTDataSource.h" +#include "datamodel/ExampleClusterCollection.h" #include #include diff --git a/tests/schema_evolution/root_io/CMakeLists.txt b/tests/schema_evolution/root_io/CMakeLists.txt index 53e8b451f..e46c4deb2 100644 --- a/tests/schema_evolution/root_io/CMakeLists.txt +++ b/tests/schema_evolution/root_io/CMakeLists.txt @@ -1,4 +1,4 @@ -CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioRootIO") -PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioRootIO") +CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioIO;podioRootIO") +PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioIO;podio::podioRootIO") set_property(TEST read_new_data_root PROPERTY DEPENDS write_old_data_root) diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index 0cb3a7214..c3ebfbaa2 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -41,7 +41,7 @@ endif() find_package(Threads REQUIRED) add_executable(unittest_podio unittest.cpp frame.cpp buffer_factory.cpp interface_types.cpp std_interoperability.cpp) -target_link_libraries(unittest_podio PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO) +target_link_libraries(unittest_podio PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioIO podio::podioRootIO) if (ENABLE_SIO) target_link_libraries(unittest_podio PRIVATE podio::podioSioIO) endif() From 34d12578d8fa096e76358b76217d40e3069b916f Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Thu, 25 Jul 2024 09:30:12 +0200 Subject: [PATCH 08/16] Separating datasource into standalone library --- CMakeLists.txt | 12 ++- .../podio/{ROOTDataSource.h => DataSource.h} | 45 +++++------ src/CMakeLists.txt | 76 ++++++++++++++----- src/{ROOTDataSource.cc => DataSource.cc} | 48 ++++++------ src/rds_selection.xml | 5 ++ src/root_selection.xml | 1 - tests/root_io/CMakeLists.txt | 17 ++++- tests/root_io/read_with_rdatasource_root.cpp | 2 +- tests/schema_evolution/root_io/CMakeLists.txt | 4 +- tests/unittests/CMakeLists.txt | 2 +- 10 files changed, 134 insertions(+), 78 deletions(-) rename include/podio/{ROOTDataSource.h => DataSource.h} (71%) rename src/{ROOTDataSource.cc => DataSource.cc} (72%) create mode 100644 src/rds_selection.xml diff --git a/CMakeLists.txt b/CMakeLists.txt index 8f4ce684d..89b08ad9c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,6 +72,7 @@ option(CREATE_DOC "Whether or not to create doxygen doc target." OFF) option(ENABLE_SIO "Build SIO I/O support" OFF) option(PODIO_RELAX_PYVER "Do not require exact python version match with ROOT" OFF) option(ENABLE_RNTUPLE "Build with support for the new ROOT NTtuple format" OFF) +option(ENABLE_DATASOURCE "Build podio's ROOT DataSource" OFF) option(PODIO_USE_CLANG_FORMAT "Use clang-format to format the code" OFF) option(ENABLE_JULIA "Enable Julia support. When enabled, Julia datamodels will be generated, and Julia tests will run." OFF) @@ -79,14 +80,17 @@ option(ENABLE_JULIA "Enable Julia support. When enabled, Julia datamodels w #--- Declare ROOT dependency --------------------------------------------------- list(APPEND CMAKE_PREFIX_PATH $ENV{ROOTSYS}) set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) -if(NOT ENABLE_RNTUPLE) - find_package(ROOT REQUIRED COMPONENTS RIO Tree ROOTDataFrame) -else() - find_package(ROOT REQUIRED COMPONENTS RIO Tree ROOTNTuple) +set(root_components_needed RIO Tree) +if(ENABLE_RNTUPLE) + list(APPEND root_components_needed ROOTNTuple) if(${ROOT_VERSION} VERSION_LESS 6.28.02) message(FATAL_ERROR "You are trying to build podio with support for the new ROOT NTuple format, but your ROOT version is too old. Please update ROOT to at least version 6.28.02") endif() endif() +if(ENABLE_DATASOURCE) + list(APPEND root_components_needed ROOTDataFrame) +endif() +find_package(ROOT REQUIRED COMPONENTS ${root_components_needed}) # ROOT_CXX_STANDARD was introduced in https://github.com/root-project/root/pull/6466 # before that it's an empty variable so we check if it's any number > 0 diff --git a/include/podio/ROOTDataSource.h b/include/podio/DataSource.h similarity index 71% rename from include/podio/ROOTDataSource.h rename to include/podio/DataSource.h index 9f0fc6b95..6a3c5fde4 100644 --- a/include/podio/ROOTDataSource.h +++ b/include/podio/DataSource.h @@ -18,64 +18,61 @@ #include namespace podio { -class ROOTDataSource : public ROOT::RDF::RDataSource { +class DataSource : public ROOT::RDF::RDataSource { public: /// - /// @brief Construct the podio::ROOTDataSource from the provided file. + /// @brief Construct the podio::DataSource from the provided file. /// - explicit ROOTDataSource(const std::string& filePath, int nEvents = -1); + explicit DataSource(const std::string& filePath, int nEvents = -1); /// - /// @brief Construct the podio::ROOTDataSource from the provided file - /// list. + /// @brief Construct the podio::DataSource from the provided file list. /// - explicit ROOTDataSource(const std::vector& filePathList, int nEvents = -1); + explicit DataSource(const std::vector& filePathList, int nEvents = -1); /// - /// @brief Inform the podio::ROOTDataSource of the desired level of - /// parallelism. + /// @brief Inform the podio::DataSource of the desired level of parallelism. /// void SetNSlots(unsigned int nSlots) override; /// - /// @brief Retrieve from podio::ROOTDataSource per-thread readers for the - /// desired columns. + /// @brief Retrieve from podio::DataSource per-thread readers for the desired + /// columns. /// template std::vector GetColumnReaders(std::string_view columnName); /// - /// @brief Inform podio::ROOTDataSource that an event-loop is about to - /// start. + /// @brief Inform podio::DataSource that an event-loop is about to start. /// void Initialize() override; /// - /// @brief Retrieve from podio::ROOTDataSource a set of ranges of entries - /// that can be processed concurrently. + /// @brief Retrieve from podio::DataSource a set of ranges of entries that + /// can be processed concurrently. /// std::vector> GetEntryRanges() override; /// - /// @brief Inform podio::ROOTDataSource that a certain thread is about to - /// start working on a certain range of entries. + /// @brief Inform podio::DataSource that a certain thread is about to start + /// working on a certain range of entries. /// void InitSlot(unsigned int slot, ULong64_t firstEntry) override; /// - /// @brief Inform podio::ROOTDataSource that a certain thread is about to - /// start working on a certain entry. + /// @brief Inform podio::DataSource that a certain thread is about to start + /// working on a certain entry. /// bool SetEntry(unsigned int slot, ULong64_t entry) override; /// - /// @brief Inform podio::ROOTDataSource that a certain thread finished - /// working on a certain range of entries. + /// @brief Inform podio::DataSource that a certain thread finished working + /// on a certain range of entries. /// void FinalizeSlot(unsigned int slot) override; /// - /// @brief Inform podio::ROOTDataSource that an event-loop finished. + /// @brief Inform podio::DataSource that an event-loop finished. /// void Finalize() override; @@ -141,7 +138,7 @@ class ROOTDataSource : public ROOT::RDF::RDataSource { std::vector> m_frames = {}; /// - /// @brief Setup input for the podio::ROOTDataSource. + /// @brief Setup input for the podio::DataSource. /// /// @param[in] Number of events. /// @return void. @@ -153,8 +150,8 @@ class ROOTDataSource : public ROOT::RDF::RDataSource { /// Not used. /// template -std::vector ROOTDataSource::GetColumnReaders(std::string_view) { - // std::cout << "podio::ROOTDataSource: Getting column readers for column: " << columnName << std::endl; +std::vector DataSource::GetColumnReaders(std::string_view) { + // std::cout << "podio::DataSource: Getting column readers for column: " << columnName << std::endl; std::vector readers; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d701de049..1bedf474c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -33,8 +33,7 @@ FUNCTION(PODIO_ADD_LIB_AND_DICT libname headers sources selection ) target_include_directories(${dictname} PUBLIC $ $) - target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio - podio::podioIO ROOT::Core ROOT::Tree) + target_link_libraries(${dictname} PUBLIC podio::${libname} podio::podio ROOT::Core ROOT::Tree) if(ENABLE_RNTUPLE) target_link_libraries(${dictname} PUBLIC ROOT::ROOTNTuple) endif() @@ -83,7 +82,6 @@ SET(root_sources ROOTLegacyReader.cc ROOTFrameData.cc RootHelpers.cc - ROOTDataSource.cc ) if(ENABLE_RNTUPLE) list(APPEND root_sources @@ -98,7 +96,6 @@ SET(root_headers ${PROJECT_SOURCE_DIR}/include/podio/ROOTWriter.h ${PROJECT_SOURCE_DIR}/include/podio/ROOTFrameData.h ${PROJECT_SOURCE_DIR}/include/podio/utilities/RootHelpers.h - ${PROJECT_SOURCE_DIR}/include/podio/ROOTDataSource.h ) if(ENABLE_RNTUPLE) list(APPEND root_headers @@ -108,12 +105,7 @@ if(ENABLE_RNTUPLE) endif() PODIO_ADD_LIB_AND_DICT(podioRootIO "${root_headers}" "${root_sources}" root_selection.xml) -target_link_libraries(podioRootIO PUBLIC podio::podio - ROOT::Core - ROOT::RIO - ROOT::Tree - ROOT::ROOTVecOps - ROOT::ROOTDataFrame) +target_link_libraries(podioRootIO PUBLIC podio::podio ROOT::Core ROOT::RIO ROOT::Tree ROOT::ROOTVecOps) if(ENABLE_RNTUPLE) target_link_libraries(podioRootIO PUBLIC ROOT::ROOTNTuple) target_compile_definitions(podioRootIO PUBLIC PODIO_ENABLE_RNTUPLE=1) @@ -147,6 +139,7 @@ if(ENABLE_SIO) LIST(APPEND INSTALL_LIBRARIES podioSioIO podioSioIODict) endif() + # --- IO set(io_sources Writer.cc @@ -168,18 +161,57 @@ if(ENABLE_SIO) target_link_libraries(podioIO PUBLIC podio::podioSioIO) endif() + +# --- DataSource +if(ENABLE_DATASOURCE) + set(rds_sources + DataSource.cc + ) + + set(rds_headers + ${PROJECT_SOURCE_DIR}/include/podio/DataSource.h + ) + + podio_add_lib_and_dict(podioDataSource "${rds_headers}" "${rds_sources}" rds_selection.xml) + target_link_libraries(podioDataSource PUBLIC podio::podio + podio::podioIO + podio::podioRootIO + ROOT::Core + ROOT::RIO + ROOT::Tree + ROOT::ROOTVecOps + ROOT::ROOTDataFrame + ) + target_compile_definitions(podioDataSource PUBLIC PODIO_ENABLE_DATASOURCE=1) +endif() + + # --- Install everything -install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO ${INSTALL_LIBRARIES} - EXPORT podioTargets - DESTINATION "${CMAKE_INSTALL_LIBDIR}") +if (NOT ENABLE_DATASOURCE) + install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO ${INSTALL_LIBRARIES} + EXPORT podioTargets + DESTINATION "${CMAKE_INSTALL_LIBDIR}") +else() + install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO podioDataSource ${INSTALL_LIBRARIES} + EXPORT podioTargets + DESTINATION "${CMAKE_INSTALL_LIBDIR}") +endif() # Only install the necessary headers -if (ENABLE_SIO) - install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}") -else() - install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - REGEX SIO.*\\.h$ EXCLUDE ) +file(GLOB headers_necessary + RELATIVE ${PROJECT_SOURCE_DIR} + "${PROJECT_SOURCE_DIR}/install/podio/*.h") + +if (NOT ENABLE_SIO) + list(FILTER headers_necessary EXCLUDE REGEX SIO.*\\.h$) +endif() +if (NOT ENABLE_RNTUPLE) + list(FILTER headers_necessary EXCLUDE REGEX RNTuple.*\\.h$) endif() +if (NOT ENABLE_RNTUPLE) + list(FILTER headers_necessary EXCLUDE REGEX DataSource.h) +endif() +install(FILES ${headers_necessary} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/podio") install(FILES ${CMAKE_CURRENT_BINARY_DIR}/podioDictDict.rootmap @@ -196,6 +228,14 @@ if (ENABLE_SIO) ) endif() +if (ENABLE_DATASOURCE) + install(FILES + ${CMAKE_CURRENT_BINARY_DIR}/podioDataSourceDictDict.rootmap + ${CMAKE_CURRENT_BINARY_DIR}/libpodioDataSourceDict_rdict.pcm + DESTINATION "${CMAKE_INSTALL_LIBDIR}" + ) +endif() + add_executable(podio_test_hashes test_hashes.cpp) target_link_libraries(podio_test_hashes PRIVATE podio::podio) install(TARGETS podio_test_hashes diff --git a/src/ROOTDataSource.cc b/src/DataSource.cc similarity index 72% rename from src/ROOTDataSource.cc rename to src/DataSource.cc index 74f4a97ec..9d861a445 100644 --- a/src/ROOTDataSource.cc +++ b/src/DataSource.cc @@ -1,4 +1,4 @@ -#include "podio/ROOTDataSource.h" +#include "podio/DataSource.h" #include "podio/Reader.h" // podio @@ -13,19 +13,19 @@ #include namespace podio { -ROOTDataSource::ROOTDataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { +DataSource::DataSource(const std::string& filePath, int nEvents) : m_nSlots{1} { m_filePathList.emplace_back(filePath); SetupInput(nEvents); } -ROOTDataSource::ROOTDataSource(const std::vector& filePathList, int nEvents) : +DataSource::DataSource(const std::vector& filePathList, int nEvents) : m_nSlots{1}, m_filePathList{filePathList} { SetupInput(nEvents); } -void ROOTDataSource::SetupInput(int nEvents) { +void DataSource::SetupInput(int nEvents) { if (m_filePathList.empty()) { - throw std::runtime_error("podio::ROOTDataSource: No input files provided!"); + throw std::runtime_error("podio::DataSource: No input files provided!"); } // Check if the provided file(s) exists and contain required metadata is done @@ -41,13 +41,13 @@ void ROOTDataSource::SetupInput(int nEvents) { // Determine over how many events to run if (nEventsInFiles > 0) { } else { - throw std::runtime_error("podio::ROOTDataSource: No events found!"); + throw std::runtime_error("podio::DataSource: No events found!"); } if (nEvents < 0) { m_nEvents = nEventsInFiles; } else if (nEvents == 0) { - throw std::runtime_error("podio::ROOTDataSource: Requested to run over zero events!"); + throw std::runtime_error("podio::DataSource: Requested to run over zero events!"); } else { m_nEvents = nEvents; } @@ -66,11 +66,11 @@ void ROOTDataSource::SetupInput(int nEvents) { } } -void ROOTDataSource::SetNSlots(unsigned int nSlots) { +void DataSource::SetNSlots(unsigned int nSlots) { m_nSlots = nSlots; if (m_nSlots > m_nEvents) { - throw std::runtime_error("podio::ROOTDataSource: Number of events too small!"); + throw std::runtime_error("podio::DataSource: Number of events too small!"); } int eventsPerSlot = m_nEvents / m_nSlots; @@ -93,10 +93,10 @@ void ROOTDataSource::SetNSlots(unsigned int nSlots) { } } -void ROOTDataSource::Initialize() { +void DataSource::Initialize() { } -std::vector> ROOTDataSource::GetEntryRanges() { +std::vector> DataSource::GetEntryRanges() { std::vector> rangesToBeProcessed; for (auto& range : m_rangesAvailable) { rangesToBeProcessed.emplace_back(range.first, range.second); @@ -114,10 +114,10 @@ std::vector> ROOTDataSource::GetEntryRanges() { return rangesToBeProcessed; } -void ROOTDataSource::InitSlot(unsigned int, ULong64_t) { +void DataSource::InitSlot(unsigned int, ULong64_t) { } -bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { +bool DataSource::SetEntry(unsigned int slot, ULong64_t entry) { m_frames[slot] = std::make_unique(m_podioReaders[slot]->readFrame(podio::Category::Event, entry)); for (auto& collectionIndex : m_activeCollections) { @@ -127,16 +127,16 @@ bool ROOTDataSource::SetEntry(unsigned int slot, ULong64_t entry) { return true; } -void ROOTDataSource::FinalizeSlot(unsigned int) { +void DataSource::FinalizeSlot(unsigned int) { } -void ROOTDataSource::Finalize() { +void DataSource::Finalize() { } -std::vector ROOTDataSource::GetColumnReadersImpl(std::string_view columnName, const std::type_info&) { +std::vector DataSource::GetColumnReadersImpl(std::string_view columnName, const std::type_info&) { auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); if (itr == m_columnNames.end()) { - std::string errMsg = "podio::ROOTDataSource: Can't find requested column \""; + std::string errMsg = "podio::DataSource: Can't find requested column \""; errMsg += columnName; errMsg += "\"!"; throw std::runtime_error(errMsg); @@ -144,7 +144,7 @@ std::vector ROOTDataSource::GetColumnReadersImpl(std::string_view columnN auto columnIndex = std::distance(m_columnNames.begin(), itr); m_activeCollections.emplace_back(columnIndex); - Record_t columnReaders(m_nSlots); + std::vector columnReaders(m_nSlots); for (size_t slotIndex = 0; slotIndex < m_nSlots; ++slotIndex) { columnReaders[slotIndex] = (void*)&m_Collections[columnIndex][slotIndex]; } @@ -152,18 +152,18 @@ std::vector ROOTDataSource::GetColumnReadersImpl(std::string_view columnN return columnReaders; } -const std::vector& ROOTDataSource::GetColumnNames() const { +const std::vector& DataSource::GetColumnNames() const { return m_columnNames; } -bool ROOTDataSource::HasColumn(std::string_view columnName) const { +bool DataSource::HasColumn(std::string_view columnName) const { return std::find(m_columnNames.begin(), m_columnNames.end(), columnName) != m_columnNames.end(); } -std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { +std::string DataSource::GetTypeName(std::string_view columnName) const { auto itr = std::find(m_columnNames.begin(), m_columnNames.end(), columnName); if (itr == m_columnNames.end()) { - std::string errMsg = "podio::ROOTDataSource: Type name for \""; + std::string errMsg = "podio::DataSource: Type name for \""; errMsg += columnName; errMsg += "\" not found!"; throw std::runtime_error(errMsg); @@ -175,13 +175,13 @@ std::string ROOTDataSource::GetTypeName(std::string_view columnName) const { } ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList) { - ROOT::RDataFrame rdf(std::make_unique(filePathList)); + ROOT::RDataFrame rdf(std::make_unique(filePathList)); return rdf; } ROOT::RDataFrame CreateDataFrame(const std::string& filePath) { - ROOT::RDataFrame rdf(std::make_unique(filePath)); + ROOT::RDataFrame rdf(std::make_unique(filePath)); return rdf; } diff --git a/src/rds_selection.xml b/src/rds_selection.xml new file mode 100644 index 000000000..ead05ceac --- /dev/null +++ b/src/rds_selection.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/src/root_selection.xml b/src/root_selection.xml index 4accb24a3..38db949c0 100644 --- a/src/root_selection.xml +++ b/src/root_selection.xml @@ -5,6 +5,5 @@ - diff --git a/tests/root_io/CMakeLists.txt b/tests/root_io/CMakeLists.txt index 5ea612a76..9d97ab916 100644 --- a/tests/root_io/CMakeLists.txt +++ b/tests/root_io/CMakeLists.txt @@ -8,7 +8,6 @@ set(root_dependent_tests read_and_write_frame_root.cpp write_interface_root.cpp read_interface_root.cpp - read_with_rdatasource_root.cpp ) if(ENABLE_RNTUPLE) set(root_dependent_tests @@ -20,7 +19,16 @@ if(ENABLE_RNTUPLE) read_interface_rntuple.cpp ) endif() -set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioIO podio::podioRootIO) +if(ENABLE_DATASOURCE) + set(root_dependent_tests + ${root_dependent_tests} + read_with_rdatasource_root.cpp + ) +endif() +set(root_libs TestDataModelDict ExtensionDataModelDict podio::podioRootIO podio::podioIO) +if(ENABLE_DATASOURCE) + list(APPEND root_libs podio::podioDataSource) +endif() foreach( sourcefile ${root_dependent_tests} ) CREATE_PODIO_TEST(${sourcefile} "${root_libs}") endforeach() @@ -31,7 +39,6 @@ set_tests_properties( read_frame_root read_frame_root_multiple read_and_write_frame_root - read_with_rdatasource_root PROPERTIES DEPENDS write_frame_root @@ -42,6 +49,10 @@ if(ENABLE_RNTUPLE) set_property(TEST read_interface_rntuple PROPERTY DEPENDS write_interface_rntuple) endif() +if(ENABLE_DATASOURCE) + set_property(TEST read_with_rdatasource_root PROPERTY DEPENDS write_frame_root) +endif() + add_executable(read_frame_legacy_root read_frame_legacy_root.cpp) target_link_libraries(read_frame_legacy_root PRIVATE "${root_libs}") diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp index b82a56c67..cdcd691b5 100644 --- a/tests/root_io/read_with_rdatasource_root.cpp +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -1,4 +1,4 @@ -#include "podio/ROOTDataSource.h" +#include "podio/DataSource.h" #include "datamodel/ExampleClusterCollection.h" #include diff --git a/tests/schema_evolution/root_io/CMakeLists.txt b/tests/schema_evolution/root_io/CMakeLists.txt index e46c4deb2..53e8b451f 100644 --- a/tests/schema_evolution/root_io/CMakeLists.txt +++ b/tests/schema_evolution/root_io/CMakeLists.txt @@ -1,4 +1,4 @@ -CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioIO;podioRootIO") -PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioIO;podio::podioRootIO") +CREATE_PODIO_TEST(write_old_data_root.cpp "TestDataModelDict;podioRootIO") +PODIO_CREATE_READ_NEW_DATA_TEST(read_new_data_root.cpp "TestDataModel_v3Dict;podio::podioRootIO") set_property(TEST read_new_data_root PROPERTY DEPENDS write_old_data_root) diff --git a/tests/unittests/CMakeLists.txt b/tests/unittests/CMakeLists.txt index c3ebfbaa2..0cb3a7214 100644 --- a/tests/unittests/CMakeLists.txt +++ b/tests/unittests/CMakeLists.txt @@ -41,7 +41,7 @@ endif() find_package(Threads REQUIRED) add_executable(unittest_podio unittest.cpp frame.cpp buffer_factory.cpp interface_types.cpp std_interoperability.cpp) -target_link_libraries(unittest_podio PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioIO podio::podioRootIO) +target_link_libraries(unittest_podio PUBLIC TestDataModel PRIVATE Catch2::Catch2WithMain Threads::Threads podio::podioRootIO) if (ENABLE_SIO) target_link_libraries(unittest_podio PRIVATE podio::podioSioIO) endif() From a3e7bda831b468bb0a5775023e25f36eede8a8df Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Thu, 25 Jul 2024 10:02:44 +0200 Subject: [PATCH 09/16] Adding ON flag to all tests --- .github/workflows/key4hep.yml | 1 + .github/workflows/pre-commit.yml | 1 + .github/workflows/publish-docs.yml | 2 +- .github/workflows/test.yml | 1 + .github/workflows/ubuntu.yml | 1 + CMakeLists.txt | 6 +++--- 6 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/key4hep.yml b/.github/workflows/key4hep.yml index b9f540a43..8e015323f 100644 --- a/.github/workflows/key4hep.yml +++ b/.github/workflows/key4hep.yml @@ -34,6 +34,7 @@ jobs: -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ -DUSE_EXTERNAL_CATCH2=AUTO \ -DENABLE_RNTUPLE=ON \ + -DENABLE_DATASOURCE=ON \ -G Ninja .. echo "::endgroup::" echo "::group::Build" diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 1f2e9275a..4e2784bff 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -35,6 +35,7 @@ jobs: cmake .. -DENABLE_SIO=ON \ -DENABLE_JULIA=ON \ -DENABLE_RNTUPLE=ON \ + -DENABLE_DATASOURCE=ON \ -DCMAKE_CXX_STANDARD=20 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror "\ -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index 4c38d1038..52e4e620f 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -32,7 +32,7 @@ jobs: echo -e "::endgroup::\n::group::Build podio" cmake -B build . --install-prefix=$(pwd)/install \ -GNinja -DENABLE_SIO=ON -DENABLE_RNTUPLE=ON \ - -DBUILD_TESTING=OFF \ + -DENABLE_DATASOURCE=ON -DBUILD_TESTING=OFF \ -DCMAKE_CXX_STANDARD=20 cmake --build build --target install source ./init.sh && source ./env.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ec62e79fe..5b27058f9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,6 +31,7 @@ jobs: cmake -DENABLE_SIO=ON \ -DENABLE_JULIA=ON \ -DENABLE_RNTUPLE=ON \ + -DENABLE_DATASOURCE=ON \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=20 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 3ed25cb27..2ef4d8af8 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -27,6 +27,7 @@ jobs: cd build cmake -DENABLE_SIO=ON \ -DENABLE_JULIA=ON \ + -DENABLE_DATASOURCE=ON \ -DCMAKE_INSTALL_PREFIX=../install \ -DCMAKE_CXX_STANDARD=17 \ -DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \ diff --git a/CMakeLists.txt b/CMakeLists.txt index 89b08ad9c..7f26732a6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -83,14 +83,14 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) set(root_components_needed RIO Tree) if(ENABLE_RNTUPLE) list(APPEND root_components_needed ROOTNTuple) - if(${ROOT_VERSION} VERSION_LESS 6.28.02) - message(FATAL_ERROR "You are trying to build podio with support for the new ROOT NTuple format, but your ROOT version is too old. Please update ROOT to at least version 6.28.02") - endif() endif() if(ENABLE_DATASOURCE) list(APPEND root_components_needed ROOTDataFrame) endif() find_package(ROOT REQUIRED COMPONENTS ${root_components_needed}) +if((ENABLE_RNTUPLE) AND (${ROOT_VERSION} VERSION_LESS 6.28.02)) + message(FATAL_ERROR "You are trying to build podio with support for the new ROOT NTuple format, but your ROOT version is too old. Please update ROOT to at least version 6.28.02") +endif() # ROOT_CXX_STANDARD was introduced in https://github.com/root-project/root/pull/6466 # before that it's an empty variable so we check if it's any number > 0 From 90a940bfcfeec5f137f8f42fa0916f0e7c8dd2ae Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Thu, 25 Jul 2024 10:14:11 +0200 Subject: [PATCH 10/16] Formatting --- include/podio/DataSource.h | 6 +++--- tests/root_io/read_with_rdatasource_root.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/podio/DataSource.h b/include/podio/DataSource.h index 6a3c5fde4..d6a2c49e3 100644 --- a/include/podio/DataSource.h +++ b/include/podio/DataSource.h @@ -1,5 +1,5 @@ -#ifndef PODIO_DATASOURCE_H__ -#define PODIO_DATASOURCE_H__ +#ifndef PODIO_DATASOURCE_H +#define PODIO_DATASOURCE_H // Podio #include @@ -176,4 +176,4 @@ ROOT::RDataFrame CreateDataFrame(const std::vector& filePathList); ROOT::RDataFrame CreateDataFrame(const std::string& filePath); } // namespace podio -#endif /* PODIO_DATASOURCE_H__ */ +#endif /* PODIO_DATASOURCE_H */ diff --git a/tests/root_io/read_with_rdatasource_root.cpp b/tests/root_io/read_with_rdatasource_root.cpp index cdcd691b5..29bad4319 100644 --- a/tests/root_io/read_with_rdatasource_root.cpp +++ b/tests/root_io/read_with_rdatasource_root.cpp @@ -1,5 +1,5 @@ -#include "podio/DataSource.h" #include "datamodel/ExampleClusterCollection.h" +#include "podio/DataSource.h" #include #include From 4f18cf42a934ad1811d35e7b56f4d1b2274dc9ba Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Mon, 26 Aug 2024 17:26:40 +0200 Subject: [PATCH 11/16] The headers should install now --- src/CMakeLists.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1bedf474c..8c08cb9d4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -199,8 +199,7 @@ endif() # Only install the necessary headers file(GLOB headers_necessary - RELATIVE ${PROJECT_SOURCE_DIR} - "${PROJECT_SOURCE_DIR}/install/podio/*.h") + "${PROJECT_SOURCE_DIR}/include/podio/*.h") if (NOT ENABLE_SIO) list(FILTER headers_necessary EXCLUDE REGEX SIO.*\\.h$) @@ -208,10 +207,13 @@ endif() if (NOT ENABLE_RNTUPLE) list(FILTER headers_necessary EXCLUDE REGEX RNTuple.*\\.h$) endif() -if (NOT ENABLE_RNTUPLE) +if (NOT ENABLE_DATASOURCE) list(FILTER headers_necessary EXCLUDE REGEX DataSource.h) endif() -install(FILES ${headers_necessary} DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/podio") + +install(FILES ${headers_necessary} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/podio +) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/podioDictDict.rootmap From 943a690548e1b676f7171095b4483a4e83cc0f46 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Mon, 26 Aug 2024 17:27:39 +0200 Subject: [PATCH 12/16] Removing exporting of compile commands --- CMakeLists.txt | 3 --- 1 file changed, 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f26732a6..1f4517076 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,9 +64,6 @@ option(FORCE_RUN_ALL_TESTS "Run all the tests even those with known problems" OF option(CLANG_TIDY "Run clang-tidy after compilation." OFF) ADD_CLANG_TIDY() -# Export compile commands --- used by the tools from clang family -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - #--- Declare options ----------------------------------------------------------- option(CREATE_DOC "Whether or not to create doxygen doc target." OFF) option(ENABLE_SIO "Build SIO I/O support" OFF) From e6b73ad342ee3ecc27802f52020b44ba9726fd2f Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Mon, 26 Aug 2024 17:51:37 +0200 Subject: [PATCH 13/16] Other suggested adjustment --- include/podio/DataSource.h | 21 +-------------------- src/DataSource.cc | 4 ++-- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/include/podio/DataSource.h b/include/podio/DataSource.h index d6a2c49e3..17485c124 100644 --- a/include/podio/DataSource.h +++ b/include/podio/DataSource.h @@ -35,13 +35,6 @@ class DataSource : public ROOT::RDF::RDataSource { /// void SetNSlots(unsigned int nSlots) override; - /// - /// @brief Retrieve from podio::DataSource per-thread readers for the desired - /// columns. - /// - template - std::vector GetColumnReaders(std::string_view columnName); - /// /// @brief Inform podio::DataSource that an event-loop is about to start. /// @@ -111,7 +104,7 @@ class DataSource : public ROOT::RDF::RDataSource { std::vector m_filePathList = {}; /// Total number of events - unsigned int m_nEvents = 0; + ULong64_t m_nEvents = 0; /// Ranges of events available to be processed std::vector> m_rangesAvailable = {}; @@ -146,18 +139,6 @@ class DataSource : public ROOT::RDF::RDataSource { void SetupInput(int nEvents); }; -/// -/// Not used. -/// -template -std::vector DataSource::GetColumnReaders(std::string_view) { - // std::cout << "podio::DataSource: Getting column readers for column: " << columnName << std::endl; - - std::vector readers; - - return readers; -} - /// /// @brief Create RDataFrame from multiple Podio files. /// diff --git a/src/DataSource.cc b/src/DataSource.cc index 9d861a445..a01504268 100644 --- a/src/DataSource.cc +++ b/src/DataSource.cc @@ -36,7 +36,7 @@ void DataSource::SetupInput(int nEvents) { unsigned int nEventsInFiles = 0; auto podioReader = podio::makeReader(m_filePathList); nEventsInFiles = podioReader.getEntries(podio::Category::Event); - frame = podio::Frame(podioReader.readFrame(podio::Category::Event, 0)); + frame = podioReader.readFrame(podio::Category::Event, 0); // Determine over how many events to run if (nEventsInFiles > 0) { @@ -89,7 +89,7 @@ void DataSource::SetNSlots(unsigned int nSlots) { } for (size_t i = 0; i < m_nSlots; ++i) { - m_frames.emplace_back(std::make_unique(podio::Frame{})); + m_frames.emplace_back(std::make_unique()); } } From 1e1677cfa25b3eb17ce1951422ce27f15e413bfb Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Mon, 26 Aug 2024 18:02:32 +0200 Subject: [PATCH 14/16] Installing also utilities directory --- src/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c08cb9d4..b7f6aaf56 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -214,6 +214,9 @@ endif() install(FILES ${headers_necessary} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/podio ) +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio/utilities + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/podio +) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/podioDictDict.rootmap From b059d2349c5a8599b923fd37204dbb14aca14195 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Wed, 28 Aug 2024 10:08:15 +0200 Subject: [PATCH 15/16] Cleanup setup code slightly to avoid unnecessary copies --- src/DataSource.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/DataSource.cc b/src/DataSource.cc index a01504268..cdc678a98 100644 --- a/src/DataSource.cc +++ b/src/DataSource.cc @@ -39,8 +39,7 @@ void DataSource::SetupInput(int nEvents) { frame = podioReader.readFrame(podio::Category::Event, 0); // Determine over how many events to run - if (nEventsInFiles > 0) { - } else { + if (nEventsInFiles <= 0) { throw std::runtime_error("podio::DataSource: No events found!"); } @@ -57,10 +56,10 @@ void DataSource::SetupInput(int nEvents) { // Get collections stored in the files std::vector collNames = frame.getAvailableCollections(); - for (const auto& collName : collNames) { + for (auto&& collName : collNames) { const podio::CollectionBase* coll = frame.get(collName); if (coll->isValid()) { - m_columnNames.emplace_back(collName); + m_columnNames.emplace_back(std::move(collName)); m_columnTypes.emplace_back(coll->getTypeName()); } } From e54dce90296d1afbc677ee594571892e9a1d26f2 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko Date: Wed, 28 Aug 2024 15:14:11 +0200 Subject: [PATCH 16/16] Adding missing podioDataSourceDict target --- src/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7f6aaf56..6f425fe8f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -192,7 +192,7 @@ if (NOT ENABLE_DATASOURCE) EXPORT podioTargets DESTINATION "${CMAKE_INSTALL_LIBDIR}") else() - install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO podioDataSource ${INSTALL_LIBRARIES} + install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO podioDataSource podioDataSourceDict ${INSTALL_LIBRARIES} EXPORT podioTargets DESTINATION "${CMAKE_INSTALL_LIBDIR}") endif()