Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an RDataSource for podio files and collections #593

Merged
merged 16 commits into from
Aug 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/key4hep.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
-DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \
-DUSE_EXTERNAL_CATCH2=AUTO \
-DENABLE_RNTUPLE=ON \
-DENABLE_DATASOURCE=ON \
-G Ninja ..
echo "::endgroup::"
echo "::group::Build"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jobs:
cmake .. -DENABLE_SIO=ON \
-DENABLE_JULIA=ON \
-DENABLE_RNTUPLE=ON \
-DENABLE_DATASOURCE=ON \
-DCMAKE_CXX_STANDARD=20 \
-DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror "\
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
echo -e "::endgroup::\n::group::Build podio"
cmake -B build . --install-prefix=$(pwd)/install \
-GNinja -DENABLE_SIO=ON -DENABLE_RNTUPLE=ON \
-DBUILD_TESTING=OFF \
-DENABLE_DATASOURCE=ON -DBUILD_TESTING=OFF \
-DCMAKE_CXX_STANDARD=20
cmake --build build --target install
source ./init.sh && source ./env.sh
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ jobs:
cmake -DENABLE_SIO=ON \
-DENABLE_JULIA=ON \
-DENABLE_RNTUPLE=ON \
-DENABLE_DATASOURCE=ON \
-DCMAKE_INSTALL_PREFIX=../install \
-DCMAKE_CXX_STANDARD=20 \
-DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/ubuntu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
cd build
cmake -DENABLE_SIO=ON \
-DENABLE_JULIA=ON \
-DENABLE_DATASOURCE=ON \
-DCMAKE_INSTALL_PREFIX=../install \
-DCMAKE_CXX_STANDARD=17 \
-DCMAKE_CXX_FLAGS=" -fdiagnostics-color=always -Werror -Wno-error=deprecated-declarations " \
Expand Down
16 changes: 10 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,20 +69,24 @@ option(CREATE_DOC "Whether or not to create doxygen doc target." OFF)
option(ENABLE_SIO "Build SIO I/O support" OFF)
option(PODIO_RELAX_PYVER "Do not require exact python version match with ROOT" OFF)
option(ENABLE_RNTUPLE "Build with support for the new ROOT NTtuple format" OFF)
option(ENABLE_DATASOURCE "Build podio's ROOT DataSource" OFF)
option(PODIO_USE_CLANG_FORMAT "Use clang-format to format the code" OFF)
option(ENABLE_JULIA "Enable Julia support. When enabled, Julia datamodels will be generated, and Julia tests will run." OFF)


#--- Declare ROOT dependency ---------------------------------------------------
list(APPEND CMAKE_PREFIX_PATH $ENV{ROOTSYS})
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
if(NOT ENABLE_RNTUPLE)
find_package(ROOT REQUIRED COMPONENTS RIO Tree)
else()
find_package(ROOT REQUIRED COMPONENTS RIO Tree ROOTNTuple)
if(${ROOT_VERSION} VERSION_LESS 6.28.02)
set(root_components_needed RIO Tree)
if(ENABLE_RNTUPLE)
list(APPEND root_components_needed ROOTNTuple)
endif()
if(ENABLE_DATASOURCE)
list(APPEND root_components_needed ROOTDataFrame)
endif()
find_package(ROOT REQUIRED COMPONENTS ${root_components_needed})
if((ENABLE_RNTUPLE) AND (${ROOT_VERSION} VERSION_LESS 6.28.02))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a minimal version for RDataSource that we could / should check here for the RDataSource as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The RDataSource is in ROOT for a long time (not experimental since ROOT 6.14), I tested this in nightlies stack (6.32)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alright, then I think we can also skip the check here. Thanks for checking.

message(FATAL_ERROR "You are trying to build podio with support for the new ROOT NTuple format, but your ROOT version is too old. Please update ROOT to at least version 6.28.02")
endif()
endif()

# ROOT_CXX_STANDARD was introduced in https://github.com/root-project/root/pull/6466
Expand Down
160 changes: 160 additions & 0 deletions include/podio/DataSource.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#ifndef PODIO_DATASOURCE_H
#define PODIO_DATASOURCE_H

// Podio
#include <podio/CollectionBase.h>
#include <podio/Frame.h>
#include <podio/Reader.h>

// ROOT
#include <ROOT/RDataFrame.hxx>
#include <ROOT/RDataSource.hxx>

// STL
#include <memory>
#include <string>
#include <typeinfo>
#include <utility>
#include <vector>

namespace podio {
class DataSource : public ROOT::RDF::RDataSource {
public:
///
/// @brief Construct the podio::DataSource from the provided file.
///
explicit DataSource(const std::string& filePath, int nEvents = -1);

///
/// @brief Construct the podio::DataSource from the provided file list.
///
explicit DataSource(const std::vector<std::string>& filePathList, int nEvents = -1);

///
/// @brief Inform the podio::DataSource of the desired level of parallelism.
///
void SetNSlots(unsigned int nSlots) override;

///
/// @brief Inform podio::DataSource that an event-loop is about to start.
///
void Initialize() override;

///
/// @brief Retrieve from podio::DataSource a set of ranges of entries that
/// can be processed concurrently.
///
std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() override;

///
/// @brief Inform podio::DataSource that a certain thread is about to start
/// working on a certain range of entries.
///
void InitSlot(unsigned int slot, ULong64_t firstEntry) override;

///
/// @brief Inform podio::DataSource that a certain thread is about to start
/// working on a certain entry.
///
bool SetEntry(unsigned int slot, ULong64_t entry) override;

///
/// @brief Inform podio::DataSource that a certain thread finished working
/// on a certain range of entries.
///
void FinalizeSlot(unsigned int slot) override;

///
/// @brief Inform podio::DataSource that an event-loop finished.
///
void Finalize() override;

///
/// @brief Returns a reference to the collection of the dataset's column
/// names
///
const std::vector<std::string>& GetColumnNames() const override;

///
/// @brief Checks if the dataset has a certain column.
///
bool HasColumn(std::string_view columnName) const override;

///
/// @brief Type of a column as a string. Required for JITting.
///
std::string GetTypeName(std::string_view columnName) const override;

protected:
///
/// @brief Type-erased vector of pointers to pointers to column
/// values --- one per slot.
///
std::vector<void*> GetColumnReadersImpl(std::string_view name, const std::type_info& typeInfo) override;

std::string AsString() override {
return "Podio data source";
};

private:
/// Number of slots/threads
unsigned int m_nSlots = 1;

/// Input filename
std::vector<std::string> m_filePathList = {};

/// Total number of events
ULong64_t m_nEvents = 0;

/// Ranges of events available to be processed
std::vector<std::pair<ULong64_t, ULong64_t>> m_rangesAvailable = {};

/// Ranges of events available ever created
std::vector<std::pair<ULong64_t, ULong64_t>> m_rangesAll = {};

/// Column names
std::vector<std::string> m_columnNames{};

/// Column types
std::vector<std::string> m_columnTypes = {};

/// Collections, m_Collections[columnIndex][slotIndex]
std::vector<std::vector<const podio::CollectionBase*>> m_Collections = {};

/// Active collections
std::vector<unsigned int> m_activeCollections = {};

/// Root podio readers
std::vector<std::unique_ptr<podio::Reader>> m_podioReaders = {};

/// Podio frames
std::vector<std::unique_ptr<podio::Frame>> m_frames = {};

///
/// @brief Setup input for the podio::DataSource.
///
/// @param[in] Number of events.
/// @return void.
///
void SetupInput(int nEvents);
};

///
/// @brief Create RDataFrame from multiple Podio files.
///
/// @param[in] filePathList List of file paths from which the RDataFrame
/// will be created.
/// @return RDataFrame created from input file list.
///
ROOT::RDataFrame CreateDataFrame(const std::vector<std::string>& filePathList);

///
/// @brief Create RDataFrame from a Podio file.
///
/// @param[in] filePath File path from which the RDataFrame will be created.
/// @return RDataFrame created from input file list.
///
ROOT::RDataFrame CreateDataFrame(const std::string& filePath);
} // namespace podio

#endif /* PODIO_DATASOURCE_H */
69 changes: 61 additions & 8 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ if(ENABLE_SIO)
LIST(APPEND INSTALL_LIBRARIES podioSioIO podioSioIODict)
endif()


# --- IO
set(io_sources
Writer.cc
Expand All @@ -160,19 +161,63 @@ if(ENABLE_SIO)
target_link_libraries(podioIO PUBLIC podio::podioSioIO)
endif()


# --- DataSource
if(ENABLE_DATASOURCE)
set(rds_sources
DataSource.cc
)

set(rds_headers
${PROJECT_SOURCE_DIR}/include/podio/DataSource.h
)

podio_add_lib_and_dict(podioDataSource "${rds_headers}" "${rds_sources}" rds_selection.xml)
target_link_libraries(podioDataSource PUBLIC podio::podio
podio::podioIO
podio::podioRootIO
ROOT::Core
ROOT::RIO
ROOT::Tree
ROOT::ROOTVecOps
ROOT::ROOTDataFrame
)
target_compile_definitions(podioDataSource PUBLIC PODIO_ENABLE_DATASOURCE=1)
endif()


# --- Install everything
install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO ${INSTALL_LIBRARIES}
EXPORT podioTargets
DESTINATION "${CMAKE_INSTALL_LIBDIR}")
if (NOT ENABLE_DATASOURCE)
install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO ${INSTALL_LIBRARIES}
EXPORT podioTargets
DESTINATION "${CMAKE_INSTALL_LIBDIR}")
else()
install(TARGETS podio podioDict podioRootIO podioRootIODict podioIO podioDataSource podioDataSourceDict ${INSTALL_LIBRARIES}
EXPORT podioTargets
DESTINATION "${CMAKE_INSTALL_LIBDIR}")
endif()

# Only install the necessary headers
if (ENABLE_SIO)
install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
else()
install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
REGEX SIO.*\\.h$ EXCLUDE )
file(GLOB headers_necessary
"${PROJECT_SOURCE_DIR}/include/podio/*.h")

if (NOT ENABLE_SIO)
list(FILTER headers_necessary EXCLUDE REGEX SIO.*\\.h$)
endif()
if (NOT ENABLE_RNTUPLE)
list(FILTER headers_necessary EXCLUDE REGEX RNTuple.*\\.h$)
endif()
if (NOT ENABLE_DATASOURCE)
list(FILTER headers_necessary EXCLUDE REGEX DataSource.h)
endif()

install(FILES ${headers_necessary}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/podio
)
install(DIRECTORY ${PROJECT_SOURCE_DIR}/include/podio/utilities
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/podio
)

install(FILES
${CMAKE_CURRENT_BINARY_DIR}/podioDictDict.rootmap
${CMAKE_CURRENT_BINARY_DIR}/libpodioDict_rdict.pcm
Expand All @@ -188,6 +233,14 @@ if (ENABLE_SIO)
)
endif()

if (ENABLE_DATASOURCE)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/podioDataSourceDictDict.rootmap
${CMAKE_CURRENT_BINARY_DIR}/libpodioDataSourceDict_rdict.pcm
DESTINATION "${CMAKE_INSTALL_LIBDIR}"
)
endif()

add_executable(podio_test_hashes test_hashes.cpp)
target_link_libraries(podio_test_hashes PRIVATE podio::podio)
install(TARGETS podio_test_hashes
Expand Down
Loading