From 27058a96364b2b75cde17ac56bd1ec01aaa4fc1e Mon Sep 17 00:00:00 2001 From: Marcel Breyer Date: Tue, 10 Dec 2024 17:56:58 +0100 Subject: [PATCH] Implement general new MPI communicator API. Note: MPI isn't used, i.e., each MPI rank does the classification all for itself! --- include/plssvm/backends/CUDA/csvm.hpp | 51 ++- include/plssvm/backends/gpu_csvm.hpp | 10 +- include/plssvm/csvm.hpp | 31 +- include/plssvm/data_set.hpp | 309 +++++++++++++++--- .../plssvm/detail/cmd/data_set_variants.hpp | 38 ++- include/plssvm/detail/cmd/parser_predict.hpp | 4 +- include/plssvm/detail/cmd/parser_train.hpp | 4 +- .../plssvm/detail/io/libsvm_model_parsing.hpp | 10 +- include/plssvm/detail/logging.hpp | 25 +- .../logging_without_performance_tracking.hpp | 21 ++ include/plssvm/model.hpp | 31 +- src/main_predict.cpp | 72 ++-- src/main_train.cpp | 59 ++-- src/plssvm/backends/CUDA/csvm.cu | 18 +- src/plssvm/csvm.cpp | 6 + src/plssvm/detail/cmd/parser_predict.cpp | 46 ++- src/plssvm/detail/cmd/parser_train.cpp | 60 +++- 17 files changed, 646 insertions(+), 149 deletions(-) diff --git a/include/plssvm/backends/CUDA/csvm.hpp b/include/plssvm/backends/CUDA/csvm.hpp index 5e0eed30d..565648e7a 100644 --- a/include/plssvm/backends/CUDA/csvm.hpp +++ b/include/plssvm/backends/CUDA/csvm.hpp @@ -21,6 +21,7 @@ #include "plssvm/csvm.hpp" // plssvm::detail::csvm_backend_exists #include "plssvm/detail/memory_size.hpp" // plssvm::detail::memory_size #include "plssvm/detail/type_traits.hpp" // PLSSVM_REQUIRES +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/target_platforms.hpp" // plssvm::target_platform @@ -59,6 +60,16 @@ class csvm : public ::plssvm::detail::gpu_csvm)> explicit csvm(Args &&...named_args) : - csvm{ plssvm::target_platform::automatic, std::forward(named_args)... } { } + csvm{ mpi::communicator{}, std::forward(named_args)... } { } + /** + * @brief Construct a new C-SVM using the CUDA backend and the optionally provided @p named_args. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] named_args the additional optional named arguments + * @throws plssvm::exception all exceptions thrown in the base class constructor + * @throws plssvm::cuda::backend_exception if the target platform isn't plssvm::target_platform::automatic or plssvm::target_platform::gpu_nvidia + * @throws plssvm::cuda::backend_exception if the plssvm::target_platform::gpu_nvidia target isn't available + * @throws plssvm::cuda::backend_exception if no CUDA capable devices could be found + */ + template )> + explicit csvm(mpi::communicator comm, Args &&...named_args) : + csvm{ std::move(comm), plssvm::target_platform::automatic, std::forward(named_args)... } { } /** * @brief Construct a new C-SVM using the CUDA backend on the @p target platform and the optionally provided @p named_args. @@ -93,7 +127,20 @@ class csvm : public ::plssvm::detail::gpu_csvm)> explicit csvm(const target_platform target, Args &&...named_args) : - base_type{ std::forward(named_args)... } { + csvm{ mpi::communicator{}, target, std::forward(named_args)... } { } + /** + * @brief Construct a new C-SVM using the CUDA backend on the @p target platform and the optionally provided @p named_args. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] target the target platform used for this C-SVM + * @param[in] named_args the additional optional named-parameters + * @throws plssvm::exception all exceptions thrown in the base class constructor + * @throws plssvm::cuda::backend_exception if the target platform isn't plssvm::target_platform::automatic or plssvm::target_platform::gpu_nvidia + * @throws plssvm::cuda::backend_exception if the plssvm::target_platform::gpu_nvidia target isn't available + * @throws plssvm::cuda::backend_exception if no CUDA capable devices could be found + */ + template )> + explicit csvm(mpi::communicator comm, const target_platform target, Args &&...named_args) : + base_type{ std::move(comm), std::forward(named_args)... } { this->init(target); } diff --git a/include/plssvm/backends/gpu_csvm.hpp b/include/plssvm/backends/gpu_csvm.hpp index cf2641b38..6634491d1 100644 --- a/include/plssvm/backends/gpu_csvm.hpp +++ b/include/plssvm/backends/gpu_csvm.hpp @@ -21,6 +21,7 @@ #include "plssvm/detail/move_only_any.hpp" // plssvm::detail::{move_only_any, move_only_any_cast} #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/matrix.hpp" // plssvm::aos_matrix, plssvm::soa_matrix +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/shape.hpp" // plssvm::shape #include "plssvm/solver_types.hpp" // plssvm::solver_type @@ -56,17 +57,18 @@ class gpu_csvm : public ::plssvm::csvm { /** * @copydoc plssvm::csvm::csvm() */ - explicit gpu_csvm(parameter params = {}) : - ::plssvm::csvm{ params } { } + explicit gpu_csvm(mpi::communicator comm, parameter params = {}) : + ::plssvm::csvm{ std::move(comm), params } { } /** * @brief Construct a C-SVM forwarding all parameters @p args to the plssvm::parameter constructor. * @tparam Args the type of the (named-)parameters + * @param[in] comm the used MPI communicator (**note**: currently unused) * @param[in] args the parameters used to construct a plssvm::parameter */ template - explicit gpu_csvm(Args &&...args) : - ::plssvm::csvm{ std::forward(args)... } { } + explicit gpu_csvm(mpi::communicator comm, Args &&...args) : + ::plssvm::csvm{ std::move(comm), std::forward(args)... } { } /** * @copydoc plssvm::csvm::csvm(const plssvm::csvm &) diff --git a/include/plssvm/csvm.hpp b/include/plssvm/csvm.hpp index 3e0ea2472..edc145f49 100644 --- a/include/plssvm/csvm.hpp +++ b/include/plssvm/csvm.hpp @@ -31,6 +31,7 @@ #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/matrix.hpp" // plssvm::aos_matrix #include "plssvm/model.hpp" // plssvm::model +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/shape.hpp" // plssvm::shape #include "plssvm/solver_types.hpp" // plssvm::solver_type @@ -69,16 +70,18 @@ class csvm { /** * @brief Construct a C-SVM using the SVM parameter @p params. * @details Uses the default SVM parameter if none are provided. + * @param[in] comm the used MPI communicator (**note**: currently unused) * @param[in] params the SVM parameter */ - explicit csvm(parameter params = {}); + explicit csvm(mpi::communicator comm, parameter params = {}); /** * @brief Construct a C-SVM forwarding all parameters @p args to the plssvm::parameter constructor. * @tparam Args the type of the (named-)parameters + * @param[in] comm the used MPI communicator (**note**: currently unused) * @param[in] args the parameters used to construct a plssvm::parameter */ template - explicit csvm(Args &&...args); + explicit csvm(mpi::communicator comm, Args &&...args); /** * @brief Delete copy-constructor since a CSVM is a move-only type. @@ -255,6 +258,9 @@ class csvm { /// The data distribution on the available devices. mutable std::unique_ptr data_distribution_{}; + /// The used MPI communicator. + mpi::communicator comm_{}; + protected: // necessary for tests, would otherwise be private /** * @brief Perform some sanity checks on the passed SVM parameters. @@ -311,13 +317,15 @@ class csvm { parameter params_{}; }; -inline csvm::csvm(parameter params) : +inline csvm::csvm(mpi::communicator comm, parameter params) : + comm_{ std::move(comm) }, params_{ params } { this->sanity_check_parameter(); } template -csvm::csvm(Args &&...named_args) : +csvm::csvm(mpi::communicator comm, Args &&...named_args) : + comm_{ std::move(comm) }, params_{ std::forward(named_args)... } { this->sanity_check_parameter(); } @@ -376,6 +384,7 @@ model csvm::fit(const data_set &data, Args &&...named_ar const std::chrono::time_point start_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full, + comm_, "Using {} ({}) as multi-class classification strategy.\n", used_classification, classification_type_to_full_string(used_classification)); @@ -417,6 +426,7 @@ model csvm::fit(const data_set &data, Args &&...named_ar if (num_classes == 2) { // special optimization for binary case (no temporary copies necessary) detail::log(verbosity_level::full, + comm_, "\nClassifying 0 vs 1 ({} vs {}) (1/1):\n", data.mapping_->get_label_by_mapped_index(0), data.mapping_->get_label_by_mapped_index(1)); @@ -460,6 +470,7 @@ model csvm::fit(const data_set &data, Args &&...named_ar // solve the minimization problem -> note that only a single rhs is present detail::log(verbosity_level::full, + comm_, "\nClassifying {} vs {} ({} vs {}) ({}/{}):\n", i, j, @@ -486,6 +497,7 @@ model csvm::fit(const data_set &data, Args &&...named_ar const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "\nLearned the SVM classifier for {} multi-class classification in {}.\n\n", classification_type_to_full_string(used_classification), detail::tracking::tracking_entry{ "cg", "total_runtime", std::chrono::duration_cast(end_time - start_time) }); @@ -804,6 +816,7 @@ std::tuple, std::vector, std::vector, std::vector, std::vector failed_cg_implicit_constraints = check_sizes(total_memory_needed_implicit_per_device, usable_device_memory_per_device); failed_cg_implicit_constraints.empty()) { @@ -865,6 +881,7 @@ std::tuple, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector>; * @brief Encapsulate all necessary data that is needed for training or predicting using an SVM. * @details May or may not contain labels! * Internally, saves all data using [`std::shared_ptr`](https://en.cppreference.com/w/cpp/memory/shared_ptr) to make a plssvm::data_set relatively cheap to copy! + * @note Currently, **each** MPI rank loads/stores the whole data set (if MPI is available). * @tparam U the label type of the data (must be an arithmetic type or `std::string`; default: `int`) */ template @@ -96,6 +98,15 @@ class data_set { * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file */ explicit data_set(const std::string &filename); + /** + * @brief Read the data points from the file @p filename. + * Automatically determines the plssvm::file_format_type based on the file extension. + * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] filename the file to read the data points from + * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file + */ + explicit data_set(mpi::communicator comm, const std::string &filename); /** * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type. * @param[in] filename the file to read the data points from @@ -103,6 +114,14 @@ class data_set { * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file */ data_set(const std::string &filename, file_format_type format); + /** + * @brief Read the data points from the file @p filename assuming that the file is given in the @p plssvm::file_format_type. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] filename the file to read the data points from + * @param[in] format the assumed file format used to parse the data points + * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file + */ + data_set(mpi::communicator comm, const std::string &filename, file_format_type format); /** * @brief Read the data points from the file @p filename and scale it using the provided @p scale_parameter. * Automatically determines the plssvm::file_format_type based on the file extension. @@ -113,6 +132,17 @@ class data_set { * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale */ data_set(const std::string &filename, scaling scale_parameter); + /** + * @brief Read the data points from the file @p filename and scale it using the provided @p scale_parameter. + * Automatically determines the plssvm::file_format_type based on the file extension. + * @details If @p filename ends with `.arff` it uses the ARFF parser, otherwise the LIBSVM parser is used. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] filename the file to read the data points from + * @param[in] scale_parameter the parameters used to scale the data set feature values to a given range + * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file + * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale + */ + data_set(mpi::communicator comm, const std::string &filename, scaling scale_parameter); /** * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and * scale it using the provided @p scale_parameter. @@ -123,6 +153,17 @@ class data_set { * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale */ data_set(const std::string &filename, file_format_type format, scaling scale_parameter); + /** + * @brief Read the data points from the file @p filename assuming that the file is given in the plssvm::file_format_type @p format and + * scale it using the provided @p scale_parameter. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] filename the file to read the data points from + * @param[in] format the assumed file format used to parse the data points + * @param[in] scale_parameter the parameters used to scale the data set feature values to a given range + * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::data_set::read_file + * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale + */ + data_set(mpi::communicator comm, const std::string &filename, file_format_type format, scaling scale_parameter); /** * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix. @@ -133,6 +174,16 @@ class data_set { * @throws plssvm::data_set_exception if any @p data_point has no features */ explicit data_set(const std::vector> &data_points); + /** + * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix. + * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvm::fit! + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + */ + explicit data_set(mpi::communicator comm, const std::vector> &data_points); /** * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels. * @param[in] data_points the data points used in this data set @@ -143,6 +194,17 @@ class data_set { * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch */ data_set(const std::vector> &data_points, std::vector labels); + /** + * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @param[in] labels the labels used in this data set + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch + */ + data_set(mpi::communicator comm, const std::vector> &data_points, std::vector labels); /** * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scale_parameter. * @param[in] data_points the data points used in this data set @@ -153,6 +215,17 @@ class data_set { * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale */ data_set(const std::vector> &data_points, scaling scale_parameter); + /** + * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and scale them using the provided @p scale_parameter. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @param[in] scale_parameter the parameters used to scale the data set feature values to a given range + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale + */ + data_set(mpi::communicator comm, const std::vector> &data_points, scaling scale_parameter); /** * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scale_parameter. * @param[in] data_points the data points used in this data set @@ -165,6 +238,19 @@ class data_set { * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale */ data_set(const std::vector> &data_points, std::vector labels, scaling scale_parameter); + /** + * @brief Create a new data set by converting the provided @p data_points to a plssvm::matrix and copying the @p labels and scale the @p data_points using the provided @p scale_parameter. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @param[in] labels the labels used in this data set + * @param[in] scale_parameter the parameters used to scale the data set feature values to a given range + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch + * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale + */ + data_set(mpi::communicator comm, const std::vector> &data_points, std::vector labels, scaling scale_parameter); /** * @brief Create a new data set from the provided @p data_points. @@ -178,6 +264,19 @@ class data_set { */ template explicit data_set(const matrix &data_points); + /** + * @brief Create a new data set from the provided @p data_points. + * @details Since no labels are provided, this data set may **not** be used to a call to plssvm::csvm::fit! + * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. + * @tparam layout the layout type of the input matrix + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + */ + template + explicit data_set(mpi::communicator comm, const matrix &data_points); /** * @brief Create a new data set from the provided @p data_points and @p labels. * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. @@ -191,6 +290,20 @@ class data_set { */ template data_set(const matrix &data_points, std::vector labels); + /** + * @brief Create a new data set from the provided @p data_points and @p labels. + * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. + * @tparam layout the layout type of the input matrix + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @param[in] labels the labels used in this data set + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch + */ + template + data_set(mpi::communicator comm, const matrix &data_points, std::vector labels); /** * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scale_parameter. * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. @@ -204,6 +317,20 @@ class data_set { */ template data_set(const matrix &data_points, scaling scale_parameter); + /** + * @brief Create a new data set from the the provided @p data_points and scale them using the provided @p scale_parameter. + * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. + * @tparam layout the layout type of the input matrix + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @param[in] scale_parameter the parameters used to scale the data set feature values to a given range + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale + */ + template + data_set(mpi::communicator comm, const matrix &data_points, scaling scale_parameter); /** * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scale_parameter. * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. @@ -219,9 +346,26 @@ class data_set { */ template data_set(const matrix &data_points, std::vector labels, scaling scale_parameter); + /** + * @brief Create a new data set from the the provided @p data_points and @p labels and scale the @p data_points using the provided @p scale_parameter. + * @note If the provided matrix isn't padded, adds the necessary padding entries automatically. + * @tparam layout the layout type of the input matrix + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] data_points the data points used in this data set + * @param[in] labels the labels used in this data set + * @param[in] scale_parameter the parameters used to scale the data set feature values to a given range + * @throws plssvm::data_set_exception if the @p data_points vector is empty + * @throws plssvm::data_set_exception if the data points in @p data_points have mismatching number of features + * @throws plssvm::data_set_exception if any @p data_point has no features + * @throws plssvm::data_set_exception if the number of data points in @p data_points and number of @p labels mismatch + * @throws plssvm::data_set_exception all exceptions thrown by plssvm::data_set::scale + */ + template + data_set(mpi::communicator comm, const matrix &data_points, std::vector labels, scaling scale_parameter); /** * @brief Save the data points and potential labels of this data set to the file @p filename using the file @p format type. + * @note Only the main MPI rank (traditionally rank 0) saves the whole data set (if MPI is available). * @param[in] filename the file to save the data points and labels to * @param[in] format the file format */ @@ -230,6 +374,7 @@ class data_set { * @brief Save the data points and potential labels of this data set to the file @p filename. * @details Automatically determines the plssvm::file_format_type based on the file extension. * If the file extension isn't `.arff`, saves the data as `.libsvm` file. + * @note Only the main MPI rank (traditionally rank 0) saves the whole data set (if MPI is available). * @param[in] filename the file to save the data points and labels to */ void save(const std::string &filename) const; @@ -295,6 +440,14 @@ class data_set { */ [[nodiscard]] optional_ref scaling_factors() const noexcept; + /** + * @brief Get the associated MPI communicator. + * @return the MPI communicator (`[[nodiscard]]`) + */ + [[nodiscard]] mpi::communicator communicator() noexcept { + return comm_; + } + private: /** * @brief Default construct an empty data set. @@ -332,6 +485,9 @@ class data_set { /// The number of features in this data set. size_type num_features_{ 0 }; + /// The used MPI communicator. + mpi::communicator comm_{}; + /// A pointer to the two-dimensional data points. std::shared_ptr> data_ptr_{ nullptr }; /// A pointer to the original labels of this data set; may be `nullptr` if no labels have been provided. @@ -390,12 +546,14 @@ class data_set::scaling { * @throws plssvm::data_set_exception if lower is greater or equal than upper */ scaling(real_type lower, real_type upper); + scaling(mpi::communicator comm, real_type lower, real_type upper); /** * @brief Read the scaling interval and factors from the provided file @p filename. * @param[in] filename the filename to read the scaling information from * @throws plssvm::invalid_file_format_exception all exceptions thrown by the plssvm::detail::io::parse_scaling_factors function */ - scaling(const std::string &filename); // can't be explicit due to the data_set_variant + scaling(const std::string &filename); // can't be explicit due to the data_set_variant + scaling(mpi::communicator comm, const std::string &filename); // can't be explicit due to the data_set_variant /** * @brief Save the scaling factors to the file @p filename. @@ -408,18 +566,31 @@ class data_set::scaling { std::pair scaling_interval{}; /// The scaling factors for all features. std::vector scaling_factors{}; + + /// The used MPI communicator. + mpi::communicator comm_{}; }; template data_set::scaling::scaling(const real_type lower, const real_type upper) : - scaling_interval{ std::make_pair(lower, upper) } { + scaling{ mpi::communicator{}, lower, upper } { } + +template +data_set::scaling::scaling(mpi::communicator comm, const real_type lower, const real_type upper) : + scaling_interval{ std::make_pair(lower, upper) }, + comm_{ std::move(comm) } { if (lower >= upper) { throw data_set_exception{ fmt::format("Inconsistent scaling interval specification: lower ({}) must be less than upper ({})!", lower, upper) }; } } template -data_set::scaling::scaling(const std::string &filename) { +data_set::scaling::scaling(const std::string &filename) : + scaling{ mpi::communicator{}, filename } { } + +template +data_set::scaling::scaling(mpi::communicator comm, const std::string &filename) : + comm_{ std::move(comm) } { // open the file detail::io::file_reader reader{ filename }; reader.read_lines('#'); @@ -437,6 +608,7 @@ void data_set::scaling::save(const std::string &filename) const { const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Write {} scaling factors in {} to the file '{}'.\n", detail::tracking::tracking_entry{ "scaling_factors_write", "num_scaling_factors", scaling_factors.size() }, detail::tracking::tracking_entry{ "scaling_factors_write", "time", std::chrono::duration_cast(end_time - start_time) }, @@ -545,21 +717,35 @@ auto data_set::label_mapper::labels() const -> std::vector { //*************************************************************************************************************************************// template -data_set::data_set(const std::string &filename) { +data_set::data_set(const std::string &filename) : + data_set{ mpi::communicator{}, filename } { } + +template +data_set::data_set(mpi::communicator comm, const std::string &filename) : + comm_{ std::move(comm) } { // read data set from file // if the file doesn't end with .arff, assume a LIBSVM file this->read_file(filename, detail::ends_with(filename, ".arff") ? file_format_type::arff : file_format_type::libsvm); } template -data_set::data_set(const std::string &filename, const file_format_type format) { +data_set::data_set(const std::string &filename, const file_format_type format) : + data_set{ mpi::communicator{}, filename, format } { } + +template +data_set::data_set(mpi::communicator comm, const std::string &filename, const file_format_type format) : + comm_{ std::move(comm) } { // read data set from file this->read_file(filename, format); } template data_set::data_set(const std::string &filename, scaling scale_parameter) : - data_set{ filename } { + data_set{ mpi::communicator{}, filename, std::move(scale_parameter) } { } + +template +data_set::data_set(mpi::communicator comm, const std::string &filename, scaling scale_parameter) : + data_set{ std::move(comm), filename } { // initialize scaling scale_parameters_ = std::make_shared(std::move(scale_parameter)); // scale data set @@ -568,7 +754,11 @@ data_set::data_set(const std::string &filename, scaling scale_parameter) : template data_set::data_set(const std::string &filename, file_format_type format, scaling scale_parameter) : - data_set{ filename, format } { + data_set{ mpi::communicator{}, filename, format, std::move(scale_parameter) } { } + +template +data_set::data_set(mpi::communicator comm, const std::string &filename, file_format_type format, scaling scale_parameter) : + data_set{ std::move(comm), filename, format } { // initialize scaling scale_parameters_ = std::make_shared(std::move(scale_parameter)); // scale data set @@ -577,29 +767,45 @@ data_set::data_set(const std::string &filename, file_format_type format, scal // clang-format off template -data_set::data_set(const std::vector> &data_points) try : - data_set{ soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } } } {} +data_set::data_set(const std::vector> &data_points) : + data_set{ mpi::communicator{}, data_points } { } + +template +data_set::data_set(mpi::communicator comm, const std::vector> &data_points) try : + data_set{ std::move(comm), soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } } } {} catch (const matrix_exception &e) { throw data_set_exception{ e.what() }; } template -data_set::data_set(const std::vector> &data_points, std::vector labels) try : - data_set{ soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels) } {} +data_set::data_set(const std::vector> &data_points, std::vector labels) : + data_set{ mpi::communicator{}, data_points, labels } { } + +template +data_set::data_set(mpi::communicator comm, const std::vector> &data_points, std::vector labels) try : + data_set{ std::move(comm), soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels) } {} catch (const matrix_exception &e) { throw data_set_exception{ e.what() }; } template -data_set::data_set(const std::vector> &data_points, scaling scale_parameter) try : - data_set{ soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(scale_parameter) } {} +data_set::data_set(const std::vector> &data_points, scaling scale_parameter) : + data_set{ mpi::communicator{}, data_points, std::move(scale_parameter) } { } + +template +data_set::data_set(mpi::communicator comm, const std::vector> &data_points, scaling scale_parameter) try : + data_set{ std::move(comm), soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(scale_parameter) } {} catch (const matrix_exception &e) { throw data_set_exception{ e.what() }; } template -data_set::data_set(const std::vector> &data_points, std::vector labels, scaling scale_parameter) try : - data_set{ soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels), std::move(scale_parameter) } {} +data_set::data_set(const std::vector> &data_points, std::vector labels, scaling scale_parameter) : + data_set{ mpi::communicator{}, data_points, std::move(labels), std::move(scale_parameter) } { } + +template +data_set::data_set(mpi::communicator comm, const std::vector> &data_points, std::vector labels, scaling scale_parameter) try : + data_set{ std::move(comm), soa_matrix{ data_points, shape{ PADDING_SIZE, PADDING_SIZE } }, std::move(labels), std::move(scale_parameter) } {} catch (const matrix_exception &e) { throw data_set_exception{ e.what() }; } @@ -609,8 +815,14 @@ data_set::data_set(const std::vector> &data_points, st template template data_set::data_set(const matrix &data_points) : + data_set{ mpi::communicator{}, data_points } { } + +template +template +data_set::data_set(mpi::communicator comm, const matrix &data_points) : num_data_points_{ data_points.num_rows() }, num_features_{ data_points.num_cols() }, + comm_{ std::move(comm) }, data_ptr_{ std::make_shared>(data_points, shape{ PADDING_SIZE, PADDING_SIZE }) } { // the provided data points vector may not be empty if (data_ptr_->num_rows() == 0) { @@ -621,6 +833,7 @@ data_set::data_set(const matrix &data_points) : } detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Created a data set with {} data points and {} features.\n", detail::tracking::tracking_entry{ "data_set_create", "num_data_points", num_data_points_ }, detail::tracking::tracking_entry{ "data_set_create", "num_features", num_features_ }); @@ -629,8 +842,14 @@ data_set::data_set(const matrix &data_points) : template template data_set::data_set(const matrix &data_points, std::vector labels) : + data_set{ mpi::communicator{}, data_points, std::move(labels) } { } + +template +template +data_set::data_set(mpi::communicator comm, const matrix &data_points, std::vector labels) : num_data_points_{ data_points.num_rows() }, num_features_{ data_points.num_cols() }, + comm_{ std::move(comm) }, data_ptr_{ std::make_shared>(data_points, shape{ PADDING_SIZE, PADDING_SIZE }) }, labels_ptr_{ std::make_shared>(std::move(labels)) } { // the number of labels must be equal to the number of data points! @@ -643,6 +862,7 @@ data_set::data_set(const matrix &data_points, std::vector< this->create_mapping(std::vector(unique_labels.cbegin(), unique_labels.cend())); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Created a data set with {} data points, {} features, and {} classes.\n", detail::tracking::tracking_entry{ "data_set_create", "num_data_points", num_data_points_ }, detail::tracking::tracking_entry{ "data_set_create", "num_features", num_features_ }, @@ -652,7 +872,12 @@ data_set::data_set(const matrix &data_points, std::vector< template template data_set::data_set(const matrix &data_points, scaling scale_parameter) : - data_set{ std::move(data_points) } { + data_set{ mpi::communicator{}, data_points, std::move(scale_parameter) } { } + +template +template +data_set::data_set(mpi::communicator comm, const matrix &data_points, scaling scale_parameter) : + data_set{ std::move(data_points), std::move(comm) } { // initialize scaling scale_parameters_ = std::make_shared(std::move(scale_parameter)); // scale data set @@ -662,7 +887,12 @@ data_set::data_set(const matrix &data_points, scaling scal template template data_set::data_set(const matrix &data_points, std::vector labels, scaling scale_parameter) : - data_set{ std::move(data_points), std::move(labels) } { + data_set{ mpi::communicator{}, data_points, std::move(labels), std::move(scale_parameter) } { } + +template +template +data_set::data_set(mpi::communicator comm, const matrix &data_points, std::vector labels, scaling scale_parameter) : + data_set{ std::move(data_points), std::move(labels), std::move(comm) } { // initialize scaling scale_parameters_ = std::make_shared(std::move(scale_parameter)); // scale data set @@ -673,31 +903,34 @@ template void data_set::save(const std::string &filename, const file_format_type format) const { const std::chrono::time_point start_time = std::chrono::steady_clock::now(); - // save the data set - if (this->has_labels()) { - // save data with labels - switch (format) { - case file_format_type::libsvm: - detail::io::write_libsvm_data(filename, *data_ptr_, *labels_ptr_); - break; - case file_format_type::arff: - detail::io::write_arff_data(filename, *data_ptr_, *labels_ptr_); - break; - } - } else { - // save data without labels - switch (format) { - case file_format_type::libsvm: - detail::io::write_libsvm_data(filename, *data_ptr_); - break; - case file_format_type::arff: - detail::io::write_arff_data(filename, *data_ptr_); - break; + if (comm_.is_main_rank()) { + // save the data set + if (this->has_labels()) { + // save data with labels + switch (format) { + case file_format_type::libsvm: + detail::io::write_libsvm_data(filename, *data_ptr_, *labels_ptr_); + break; + case file_format_type::arff: + detail::io::write_arff_data(filename, *data_ptr_, *labels_ptr_); + break; + } + } else { + // save data without labels + switch (format) { + case file_format_type::libsvm: + detail::io::write_libsvm_data(filename, *data_ptr_); + break; + case file_format_type::arff: + detail::io::write_arff_data(filename, *data_ptr_); + break; + } } } const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Write {} data points with {} features and {} classes in {} to the {} file '{}'.\n", detail::tracking::tracking_entry{ "data_set_write", "num_data_points", num_data_points_ }, detail::tracking::tracking_entry{ "data_set_write", "num_features", num_features_ }, @@ -829,6 +1062,7 @@ void data_set::scale() { const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Scaled the data set to the range [{}, {}] in {}.\n", detail::tracking::tracking_entry{ "data_set_scale", "lower", lower }, detail::tracking::tracking_entry{ "data_set_scale", "upper", upper }, @@ -884,6 +1118,7 @@ void data_set::read_file(const std::string &filename, file_format_type format const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Read {} data points with {} features and {} classes in {} using the {} parser from file '{}'.\n", detail::tracking::tracking_entry{ "data_set_read", "num_data_points", num_data_points_ }, detail::tracking::tracking_entry{ "data_set_read", "num_features", num_features_ }, diff --git a/include/plssvm/detail/cmd/data_set_variants.hpp b/include/plssvm/detail/cmd/data_set_variants.hpp index 239d9a007..43d30cee3 100644 --- a/include/plssvm/detail/cmd/data_set_variants.hpp +++ b/include/plssvm/detail/cmd/data_set_variants.hpp @@ -17,6 +17,7 @@ #include "plssvm/detail/cmd/parser_predict.hpp" // plssvm::detail::cmd::parser_predict #include "plssvm/detail/cmd/parser_scale.hpp" // plssvm::detail::cmd::parser_scale #include "plssvm/detail/cmd/parser_train.hpp" // plssvm::detail::cmd::parser_train +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include // std::string #include // std::variant @@ -31,37 +32,40 @@ using data_set_variants = std::variant, plssvm::data_set::label_type> -[[nodiscard]] inline data_set_variants data_set_factory_impl(const cmd::parser_train &cmd_parser) { - return data_set_variants{ plssvm::data_set{ cmd_parser.input_filename } }; +[[nodiscard]] inline data_set_variants data_set_factory_impl(mpi::communicator comm, const cmd::parser_train &cmd_parser) { + return data_set_variants{ plssvm::data_set{ std::move(comm), cmd_parser.input_filename } }; } /** * @brief Return the correct data set based on the plssvm::detail::cmd::parser_predict command line options. * @tparam label_type the type of the labels + * @param[in] comm the MPI communicator wrapper * @param[in] cmd_parser the provided command line parser * @return the data set based on the provided command line parser (`[[nodiscard]]`) */ template ::label_type> -[[nodiscard]] inline data_set_variants data_set_factory_impl(const cmd::parser_predict &cmd_parser) { - return data_set_variants{ plssvm::data_set{ cmd_parser.input_filename } }; +[[nodiscard]] inline data_set_variants data_set_factory_impl(mpi::communicator comm, const cmd::parser_predict &cmd_parser) { + return data_set_variants{ plssvm::data_set{ std::move(comm), cmd_parser.input_filename } }; } /** * @brief Return the correct data set based on the plssvm::detail::cmd::parser_scale command line options. * @tparam label_type the type of the labels + * @param[in] comm the MPI communicator wrapper * @param[in] cmd_parser the provided command line parser * @return the data set based on the provided command line parser (`[[nodiscard]]`) */ template ::label_type> -[[nodiscard]] inline data_set_variants data_set_factory_impl(const cmd::parser_scale &cmd_parser) { +[[nodiscard]] inline data_set_variants data_set_factory_impl(mpi::communicator comm, const cmd::parser_scale &cmd_parser) { if (!cmd_parser.restore_filename.empty()) { - return data_set_variants{ plssvm::data_set{ cmd_parser.input_filename, { cmd_parser.restore_filename } } }; + return data_set_variants{ plssvm::data_set{ comm, cmd_parser.input_filename, { comm, cmd_parser.restore_filename } } }; } else { - return data_set_variants{ plssvm::data_set{ cmd_parser.input_filename, { cmd_parser.lower, cmd_parser.upper } } }; + return data_set_variants{ plssvm::data_set{ comm, cmd_parser.input_filename, { comm, cmd_parser.lower, cmd_parser.upper } } }; } } @@ -74,9 +78,25 @@ template ::label_type> template [[nodiscard]] inline data_set_variants data_set_factory(const cmd_parser_type &cmd_parser) { if (cmd_parser.strings_as_labels) { - return data_set_factory_impl(cmd_parser); + return data_set_factory_impl(mpi::communicator{}, cmd_parser); } else { - return data_set_factory_impl(cmd_parser); + return data_set_factory_impl(mpi::communicator{}, cmd_parser); + } +} + +/** + * @brief Based on the provided command line @p cmd_parser, return the correct plssvm::data_set. + * @tparam cmd_parser_type the type of the command line parser (train, predict, or scale) + * @param[in] comm the MPI communicator wrapper + * @param[in] cmd_parser the provided command line parser + * @return the data set based on the provided command line parser (`[[nodiscard]]`) + */ +template +[[nodiscard]] inline data_set_variants data_set_factory(mpi::communicator comm, const cmd_parser_type &cmd_parser) { + if (cmd_parser.strings_as_labels) { + return data_set_factory_impl(std::move(comm), cmd_parser); + } else { + return data_set_factory_impl(std::move(comm), cmd_parser); } } diff --git a/include/plssvm/detail/cmd/parser_predict.hpp b/include/plssvm/detail/cmd/parser_predict.hpp index 4ba2e1a65..9c1cb880c 100644 --- a/include/plssvm/detail/cmd/parser_predict.hpp +++ b/include/plssvm/detail/cmd/parser_predict.hpp @@ -16,6 +16,7 @@ #include "plssvm/backend_types.hpp" // plssvm::backend_type #include "plssvm/backends/Kokkos/execution_space.hpp" // plssvm::kokkos::execution_space #include "plssvm/backends/SYCL/implementation_types.hpp" // plssvm::sycl::implementation_type +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/target_platforms.hpp" // plssvm::target_platform #include "fmt/base.h" // fmt::formatter @@ -33,10 +34,11 @@ struct parser_predict { /** * @brief Parse the command line arguments @p argv using [`cxxopts`](https://github.com/jarro2783/cxxopts) and set the predict parameters accordingly. * @details If no output filename is given, uses the input filename and appends a ".predict". The output file is than saved in the current working directory. + * @param[in] comm the MPI communicator wrapper * @param[in] argc the number of passed command line arguments * @param[in] argv the command line arguments */ - parser_predict(int argc, char **argv); + parser_predict(const mpi::communicator &comm, int argc, char **argv); /// The used backend: automatic (depending on the specified target_platforms), OpenMP, HPX, stdpar, CUDA, HIP, OpenCL, SYCL, or Kokkos. backend_type backend{ backend_type::automatic }; diff --git a/include/plssvm/detail/cmd/parser_train.hpp b/include/plssvm/detail/cmd/parser_train.hpp index 73897249a..dc762b7aa 100644 --- a/include/plssvm/detail/cmd/parser_train.hpp +++ b/include/plssvm/detail/cmd/parser_train.hpp @@ -19,6 +19,7 @@ #include "plssvm/backends/SYCL/kernel_invocation_types.hpp" // plssvm::sycl::kernel_invocation_type #include "plssvm/classification_types.hpp" // plssvm::classification_type #include "plssvm/constants.hpp" // plssvm::real_type +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/solver_types.hpp" // plssvm::solving_type #include "plssvm/target_platforms.hpp" // plssvm::target_platform @@ -39,10 +40,11 @@ struct parser_train { /** * @brief Parse the command line arguments @p argv using [`cxxopts`](https://github.com/jarro2783/cxxopts) and set the training parameters accordingly. * @details If no model filename is given, uses the input filename and appends a ".model". The model file is than saved in the current working directory. + * @param[in] comm the MPI communicator wrapper * @param[in] argc the number of passed command line arguments * @param[in] argv the command line arguments */ - parser_train(int argc, char **argv); + parser_train(const mpi::communicator &comm, int argc, char **argv); /// Other base C-SVM parameters plssvm::parameter csvm_params{}; diff --git a/include/plssvm/detail/io/libsvm_model_parsing.hpp b/include/plssvm/detail/io/libsvm_model_parsing.hpp index c42c82e8e..9fef6c8b0 100644 --- a/include/plssvm/detail/io/libsvm_model_parsing.hpp +++ b/include/plssvm/detail/io/libsvm_model_parsing.hpp @@ -26,6 +26,7 @@ #include "plssvm/gamma.hpp" // plssvm::get_gamma_string #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type #include "plssvm/matrix.hpp" // plssvm::soa_matrix +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/shape.hpp" // plssvm::shape #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level @@ -580,6 +581,7 @@ template * @endcode * @tparam label_type the type of the labels (any arithmetic type, except bool, or std::string) * @param[in,out] out the output-stream to write the header information to + * @param[in] comm the used MPI communicator * @param[in] params the SVM parameters * @param[in] rho the rho values for the different classes resulting from the hyperplane learning * @param[in] data the data used to create the model @@ -587,7 +589,7 @@ template * @return the order of the different classes as it should appear in the following data section (`[[nodiscard]]`) */ template -[[nodiscard]] inline std::vector write_libsvm_model_header(fmt::ostream &out, const plssvm::parameter ¶ms, const std::vector &rho, const data_set &data) { +[[nodiscard]] inline std::vector write_libsvm_model_header(fmt::ostream &out, const mpi::communicator &comm, const plssvm::parameter ¶ms, const std::vector &rho, const data_set &data) { PLSSVM_ASSERT(data.has_labels(), "Cannot write a model file that does not include labels!"); PLSSVM_ASSERT(!rho.empty(), "At least one rho value must be provided!"); @@ -634,6 +636,7 @@ template // print model header detail::log(verbosity_level::full | verbosity_level::libsvm, + comm, "\n{}\n", out_string); // write model header to file @@ -665,6 +668,7 @@ template * @endcode * @tparam label_type the type of the labels (any arithmetic type, except bool, or std::string) * @param[in] filename the file to write the LIBSVM model to + * @param[in] comm the used MPI communicator * @param[in] params the SVM parameters * @param[in] classification the used multi-class classification strategy * @param[in] rho the rho value resulting from the hyperplane learning @@ -674,7 +678,7 @@ template * @attention The PLSSVM model file is only compatible with LIBSVM for the one vs. one classification type. */ template -inline void write_libsvm_model_data(const std::string &filename, const plssvm::parameter ¶ms, const classification_type classification, const std::vector &rho, const std::vector> &alpha, const std::vector> &index_sets, const data_set &data) { +inline void write_libsvm_model_data(const std::string &filename, const mpi::communicator &comm, const plssvm::parameter ¶ms, const classification_type classification, const std::vector &rho, const std::vector> &alpha, const std::vector> &index_sets, const data_set &data) { PLSSVM_ASSERT(!filename.empty(), "The provided model filename must not be empty!"); PLSSVM_ASSERT(data.has_labels(), "Cannot write a model file that does not include labels!"); PLSSVM_ASSERT(rho.size() == calculate_number_of_classifiers(classification, data.num_classes()), @@ -725,7 +729,7 @@ inline void write_libsvm_model_data(const std::string &filename, const plssvm::p fmt::ostream out = fmt::output_file(filename); // write header information - const std::vector label_order = write_libsvm_model_header(out, params, rho, data); + const std::vector label_order = write_libsvm_model_header(out, comm, params, rho, data); // the maximum size of one formatted LIBSVM entry, e.g., 1234:1.365363e+10 // biggest number representable as std::size_t: 18446744073709551615 -> 20 chars diff --git a/include/plssvm/detail/logging.hpp b/include/plssvm/detail/logging.hpp index 8cccb39b9..ee6350d9e 100644 --- a/include/plssvm/detail/logging.hpp +++ b/include/plssvm/detail/logging.hpp @@ -15,6 +15,7 @@ #include "plssvm/detail/tracking/performance_tracker.hpp" // plssvm::detail::tracking::is_tracking_entry_v, // PLSSVM_PERFORMANCE_TRACKER_ENABLED, PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level #include "fmt/chrono.h" // format std::chrono types @@ -38,7 +39,7 @@ namespace plssvm::detail { * @param[in] args the values to fill the {fmt}-like placeholders in @p msg */ template -void log(const verbosity_level verb, const std::string_view msg, Args &&...args) { +void log(const verbosity_level verb, const std::string_view msg, Args &&...args) { // TODO: remove // if the verbosity level is quiet, nothing is logged // otherwise verb must contain the bit-flag currently set by plssvm::verbosity if (verbosity != verbosity_level::quiet && (verb & verbosity) != verbosity_level::quiet) { @@ -62,6 +63,28 @@ void log(const verbosity_level verb, const std::string_view msg, Args &&...args) #endif } +/** + * @brief Output the message @p msg filling the {fmt} like placeholders with @p args to the standard output stream if @p comm represents the current main MPI rank. + * @details If a value in @p args is of type plssvm::detail::tracking_entry and performance tracking is enabled, + * this is also added to the `plssvm::detail::performance_tracker`. + * Only logs the message if the verbosity level matches the `plssvm::verbosity` level. + * @tparam Args the types of the placeholder values + * @param[in] verb the verbosity level of the message to log; must match the `plssvm::verbosity` level to log the message + * @param[in] comm the used MPI communicator + * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`) + * @param[in] args the values to fill the {fmt}-like placeholders in @p msg + */ +template +void log(const verbosity_level verb, const mpi::communicator &comm, const std::string_view msg, Args &&...args) { + if (comm.is_main_rank()) { + // only print on the main MPI rank + log(verb, msg, std::forward(args)...); + } else { + // set output to quiet otherwise + log(verbosity_level::quiet, msg, std::forward(args)...); + } +} + } // namespace plssvm::detail #endif // PLSSVM_DETAIL_LOGGING_HPP_ diff --git a/include/plssvm/detail/logging_without_performance_tracking.hpp b/include/plssvm/detail/logging_without_performance_tracking.hpp index a92729a66..650646081 100644 --- a/include/plssvm/detail/logging_without_performance_tracking.hpp +++ b/include/plssvm/detail/logging_without_performance_tracking.hpp @@ -13,6 +13,7 @@ #define PLSSVM_DETAIL_LOGGING_WITHOUT_PERFORMANCE_TRACKING_HPP_ #pragma once +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level, plssvm::verbosity, bitwise-operators on plssvm::verbosity_level #include "fmt/chrono.h" // format std::chrono types @@ -46,6 +47,26 @@ void log_untracked(const verbosity_level verb, const std::string_view msg, Args } } +/** + * @brief Output the message @p msg filling the {fmt} like placeholders with @p args to the standard output stream if @p comm represents the current main MPI rank. + * @details Only logs the message if the verbosity level matches the `plssvm::verbosity` level. + * @tparam Args the types of the placeholder values + * @param[in] verb the verbosity level of the message to log; must match the `plssvm::verbosity` level to log the message + * @param[in] comm the used MPI communicator + * @param[in] msg the message to print on the standard output stream if requested (i.e., `plssvm::verbosity` isn't `plssvm::verbosity_level::quiet`) + * @param[in] args the values to fill the {fmt}-like placeholders in @p msg + */ +template +void log_untracked(const verbosity_level verb, const mpi::communicator &comm, const std::string_view msg, Args &&...args) { + if (comm.is_main_rank()) { + // only print on the main MPI rank + log_untracked(verb, msg, std::forward(args)...); + } else { + // set output to quiet otherwise + log_untracked(verbosity_level::quiet, msg, std::forward(args)...); + } +} + } // namespace plssvm::detail #endif // PLSSVM_DETAIL_LOGGING_WITHOUT_PERFORMANCE_TRACKING_HPP_ diff --git a/include/plssvm/model.hpp b/include/plssvm/model.hpp index 6522526b3..0bec38869 100644 --- a/include/plssvm/model.hpp +++ b/include/plssvm/model.hpp @@ -23,6 +23,7 @@ #include "plssvm/detail/tracking/performance_tracker.hpp" // PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY, plssvm::detail::tracking::tracking_entry #include "plssvm/detail/type_list.hpp" // plssvm::detail::{supported_label_types, tuple_contains_v} #include "plssvm/matrix.hpp" // plssvm::soa_matrix, plssvm::aos_matrix +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity_level @@ -45,6 +46,7 @@ namespace plssvm { /** * @brief Implements a class encapsulating the result of a call to the SVM fit function. A model is used to predict the labels of a new data set. + * @note Currently, **each** MPI rank loads/stores the whole data set (if MPI is available). * @tparam U the type of the used labels (must be an arithmetic type or `std:string`; default: `int`) */ template @@ -67,9 +69,17 @@ class model { * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::detail::io::parse_libsvm_model_header and plssvm::detail::io::parse_libsvm_data */ explicit model(const std::string &filename); + /** + * @brief Read a previously learned model from the LIBSVM model file @p filename. + * @param[in] comm the used MPI communicator (**note**: currently unused) + * @param[in] filename the model file to read + * @throws plssvm::invalid_file_format_exception all exceptions thrown by plssvm::detail::io::parse_libsvm_model_header and plssvm::detail::io::parse_libsvm_data + */ + model(mpi::communicator comm, const std::string &filename); /** * @brief Save the model to a LIBSVM model file for later usage. + * @note Only the main MPI rank (traditionally rank 0) saves the whole data set (if MPI is available). * @param[in] filename the file to save the model to */ void save(const std::string &filename) const; @@ -177,6 +187,9 @@ class model { /// The number of iterations needed to fit this model. std::optional> num_iters_{}; + /// The used MPI communicator. + mpi::communicator comm_{}; + /** * @brief The learned weights for each support vector. * @details For one vs. all the vector contains a single matrix representing all weights. @@ -213,10 +226,16 @@ model::model(parameter params, data_set data, const classificatio classification_strategy_{ classification_strategy }, data_{ std::move(data) }, num_support_vectors_{ data_.num_data_points() }, - num_features_{ data_.num_features() } { } + num_features_{ data_.num_features() }, + comm_{ data_.communicator() } { } + +template +model::model(const std::string &filename) : + model{ mpi::communicator{}, filename } { } template -model::model(const std::string &filename) { +model::model(mpi::communicator comm, const std::string &filename) : + comm_{ std::move(comm) } { const std::chrono::time_point start_time = std::chrono::steady_clock::now(); // open the file @@ -271,6 +290,7 @@ model::model(const std::string &filename) { const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Read {} support vectors with {} features and {} classes using {} classification in {} using the libsvm model parser from file '{}'.\n\n", detail::tracking::tracking_entry{ "model_read", "num_support_vectors", num_support_vectors_ }, detail::tracking::tracking_entry{ "model_read", "num_features", num_features_ }, @@ -290,11 +310,14 @@ void model::save(const std::string &filename) const { const std::chrono::time_point start_time = std::chrono::steady_clock::now(); - // save model file header and support vectors - detail::io::write_libsvm_model_data(filename, params_, classification_strategy_, *rho_ptr_, *alpha_ptr_, *index_sets_ptr_, data_); + if (comm_.is_main_rank()) { + // save model file header and support vectors + detail::io::write_libsvm_model_data(filename, comm_, params_, classification_strategy_, *rho_ptr_, *alpha_ptr_, *index_sets_ptr_, data_); + } const std::chrono::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Write {} support vectors with {} features and {} classes using {} classification in {} to the libsvm model file '{}'.\n", detail::tracking::tracking_entry{ "model_write", "num_support_vectors", num_support_vectors_ }, detail::tracking::tracking_entry{ "model_write", "num_features", num_features_ }, diff --git a/src/main_predict.cpp b/src/main_predict.cpp index 3d47ad53f..f87e5c3f3 100644 --- a/src/main_predict.cpp +++ b/src/main_predict.cpp @@ -22,7 +22,7 @@ #include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler #endif -#include "fmt/format.h" // fmt::print +#include "fmt/format.h" // fmt::print, fmt::format #include "fmt/os.h" // fmt::ostream, fmt::output_file #include "fmt/ranges.h" // fmt::join @@ -34,6 +34,7 @@ #include // std::mem_fn #include // std::cerr, std::endl #include // std::unique_ptr, std::make_unique +#include // std::string #include // std::pair #include // std::visit #include // std::vector @@ -41,9 +42,11 @@ using namespace std::chrono_literals; int main(int argc, char *argv[]) { - // create std::unique_ptr containing a plssvm::scope_guard - // -> used to automatically handle necessary environment teardown operations - std::unique_ptr environment_guard{}; + // create environment scoped guard + const plssvm::environment::scope_guard environment_guard{}; + // create a PLSSVM communicator -> use MPI_COMM_WORLD for our executables + // if MPI is not supported, does nothing + const plssvm::mpi::communicator comm{}; try { const std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); @@ -56,17 +59,22 @@ int main(int argc, char *argv[]) { #endif // parse SVM parameter from command line - const plssvm::detail::cmd::parser_predict cmd_parser{ argc, argv }; + const plssvm::detail::cmd::parser_predict cmd_parser{ comm, argc, argv }; + + // add MPI related tracking entries + PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "mpi", "", comm })); // send warning if the build type is release and assertions are enabled if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) { plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning, + comm, "WARNING: The build type is set to Release, but assertions are enabled. " "This may result in a noticeable performance degradation in parts of PLSSVM!\n"); } // output used parameter plssvm::detail::log(plssvm::verbosity_level::full, + comm, "\ntask: prediction\n{}\n", plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser }); @@ -76,39 +84,28 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; - // check whether HPX is used as backend (it is either requested directly or as automatic backend) - const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) }; // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; - // initialize environments if necessary - std::vector backends_to_initialize{}; - if (use_hpx_as_backend) { - backends_to_initialize.push_back(plssvm::backend_type::hpx); - } - if (use_kokkos_as_backend) { - backends_to_initialize.push_back(plssvm::backend_type::kokkos); - } - environment_guard = std::make_unique(backends_to_initialize); - // create default csvm const std::unique_ptr svm = [&]() { if (use_sycl_as_backend) { - return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type); + return plssvm::make_csvm(cmd_parser.backend, comm, cmd_parser.target, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type); } else if (use_kokkos_as_backend) { - return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space); + return plssvm::make_csvm(cmd_parser.backend, comm, cmd_parser.target, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space); } else { - return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target); + return plssvm::make_csvm(cmd_parser.backend, comm, cmd_parser.target); } }(); // create model - const plssvm::model model{ cmd_parser.model_filename }; + const plssvm::model model{ comm, cmd_parser.model_filename }; // output parameter used to learn the model { const plssvm::parameter params = model.get_params(); plssvm::detail::log(plssvm::verbosity_level::full, + comm, "Parameter used to train the model:\n" " kernel_type: {} -> {}\n", params.kernel_type, @@ -118,6 +115,7 @@ int main(int argc, char *argv[]) { break; case plssvm::kernel_function_type::polynomial: plssvm::detail::log(plssvm::verbosity_level::full, + comm, " degree: {}\n" " gamma: {}\n" " coef0: {}\n", @@ -128,10 +126,11 @@ int main(int argc, char *argv[]) { case plssvm::kernel_function_type::rbf: case plssvm::kernel_function_type::laplacian: case plssvm::kernel_function_type::chi_squared: - plssvm::detail::log(plssvm::verbosity_level::full, " gamma: {}\n", plssvm::get_gamma_string(params.gamma)); + plssvm::detail::log(plssvm::verbosity_level::full, comm, " gamma: {}\n", plssvm::get_gamma_string(params.gamma)); break; case plssvm::kernel_function_type::sigmoid: plssvm::detail::log(plssvm::verbosity_level::full, + comm, " gamma: {}\n" " coef0: {}\n", plssvm::get_gamma_string(params.gamma), @@ -147,11 +146,15 @@ int main(int argc, char *argv[]) { { const std::chrono::time_point write_start_time = std::chrono::steady_clock::now(); - fmt::ostream out = fmt::output_file(cmd_parser.predict_filename); - out.print("{}", fmt::join(predicted_labels, "\n")); + // only write predict file on the main MPI rank + if (comm.is_main_rank()) { + fmt::ostream out = fmt::output_file(cmd_parser.predict_filename); + out.print("{}", fmt::join(predicted_labels, "\n")); + } const std::chrono::time_point write_end_time = std::chrono::steady_clock::now(); plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::timing, + comm, "Write {} predictions in {} to the file '{}'.\n", plssvm::detail::tracking::tracking_entry{ "predictions_write", "num_predictions", predicted_labels.size() }, plssvm::detail::tracking::tracking_entry{ "predictions_write", "time", std::chrono::duration_cast(write_end_time - write_start_time) }, @@ -165,15 +168,15 @@ int main(int argc, char *argv[]) { const plssvm::classification_report report{ correct_labels, predicted_labels }; // print complete report - plssvm::detail::log(plssvm::verbosity_level::full, "\n{}\n", report); + plssvm::detail::log(plssvm::verbosity_level::full, comm, "\n{}\n", report); // print only accuracy for LIBSVM conformity - plssvm::detail::log(plssvm::verbosity_level::libsvm, "{} (classification)\n", report.accuracy()); + plssvm::detail::log(plssvm::verbosity_level::libsvm, comm, "{} (classification)\n", report.accuracy()); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "achieved_accuracy", report.accuracy().achieved_accuracy })); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "num_correct", report.accuracy().num_correct })); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "accuracy", "num_total", report.accuracy().num_total })); } }; - std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser)); + std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(comm, cmd_parser)); // stop CPU hardware sampler and dump results if available #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED) @@ -183,16 +186,25 @@ int main(int argc, char *argv[]) { const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now(); plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::timing, + comm, "\nTotal runtime: {}\n", plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast(end_time - start_time) }); - PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename); + // TODO: really change file name? what to output on the command line? + std::string performance_tracking_filename{ cmd_parser.performance_tracking_filename }; +#if defined(PLSSVM_HAS_MPI_ENABLED) + if (!performance_tracking_filename.empty()) { + // only append rank name to the file name if a file name has been provided + performance_tracking_filename += fmt::format(".{}", comm.rank()); + } +#endif + PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(performance_tracking_filename); } catch (const plssvm::exception &e) { - std::cerr << e.what_with_loc() << std::endl; + std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what_with_loc()) << std::endl; return EXIT_FAILURE; } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; + std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what()) << std::endl; return EXIT_FAILURE; } diff --git a/src/main_train.cpp b/src/main_train.cpp index 2e2a39905..c28a8d328 100644 --- a/src/main_train.cpp +++ b/src/main_train.cpp @@ -21,6 +21,10 @@ #include "hws/system_hardware_sampler.hpp" // hws::system_hardware_sampler #endif +#include "plssvm/mpi/detail/version.hpp" + +#include "fmt/format.h" // fmt::format + #include // std::for_each #include // std::chrono::{steady_clock, duration, milliseconds}, std::chrono_literals namespace #include // std::size_t @@ -29,6 +33,7 @@ #include // std::mem_fn #include // std::cerr, std::endl #include // std::unique_ptr, std::make_unique +#include // std::string #include // std::remove_reference_t #include // std::pair #include // std::visit @@ -37,9 +42,11 @@ using namespace std::chrono_literals; int main(int argc, char *argv[]) { - // create std::unique_ptr containing a plssvm::scope_guard - // -> used to automatically handle necessary environment teardown operations - std::unique_ptr environment_guard{}; + // create environment scoped guard + const plssvm::environment::scope_guard environment_guard{}; + // create a PLSSVM communicator -> use MPI_COMM_WORLD for our executables + // if MPI is not supported, does nothing + const plssvm::mpi::communicator comm{}; try { const std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); @@ -52,17 +59,22 @@ int main(int argc, char *argv[]) { #endif // parse SVM parameter from command line - plssvm::detail::cmd::parser_train cmd_parser{ argc, argv }; + const plssvm::detail::cmd::parser_train cmd_parser{ comm, argc, argv }; + + // add MPI related tracking entries + PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "mpi", "", comm })); // send warning if the build type is release and assertions are enabled if constexpr (std::string_view{ PLSSVM_BUILD_TYPE } == "Release" && PLSSVM_IS_DEFINED(PLSSVM_ENABLE_ASSERTS)) { plssvm::detail::log(plssvm::verbosity_level::full | plssvm::verbosity_level::warning, + comm, "WARNING: The build type is set to Release, but assertions are enabled. " "This may result in a noticeable performance degradation in parts of PLSSVM!\n"); } // output used parameter plssvm::detail::log(plssvm::verbosity_level::full, + comm, "\ntask: training\n{}\n\n\n", plssvm::detail::tracking::tracking_entry{ "parameter", "", cmd_parser }); @@ -72,29 +84,17 @@ int main(int argc, char *argv[]) { // check whether SYCL is used as backend (it is either requested directly or as automatic backend) const bool use_sycl_as_backend{ cmd_parser.backend == plssvm::backend_type::sycl || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::sycl) }; - // check whether HPX is used as backend (it is either requested directly or as automatic backend) - const bool use_hpx_as_backend{ cmd_parser.backend == plssvm::backend_type::hpx || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::hpx) }; // check whether Kokkos is used as backend (it is either requested directly or as automatic backend) const bool use_kokkos_as_backend{ cmd_parser.backend == plssvm::backend_type::kokkos || (cmd_parser.backend == plssvm::backend_type::automatic && plssvm::determine_default_backend() == plssvm::backend_type::kokkos) }; - // initialize environments if necessary - std::vector backends_to_initialize{}; - if (use_hpx_as_backend) { - backends_to_initialize.push_back(plssvm::backend_type::hpx); - } - if (use_kokkos_as_backend) { - backends_to_initialize.push_back(plssvm::backend_type::kokkos); - } - environment_guard = std::make_unique(backends_to_initialize); - // create SVM const std::unique_ptr svm = [&]() { if (use_sycl_as_backend) { - return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type); + return plssvm::make_csvm(cmd_parser.backend, comm, cmd_parser.target, cmd_parser.csvm_params, plssvm::sycl_implementation_type = cmd_parser.sycl_implementation_type, plssvm::sycl_kernel_invocation_type = cmd_parser.sycl_kernel_invocation_type); } else if (use_kokkos_as_backend) { - return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space); + return plssvm::make_csvm(cmd_parser.backend, comm, cmd_parser.target, cmd_parser.csvm_params, plssvm::kokkos_execution_space = cmd_parser.kokkos_execution_space); } else { - return plssvm::make_csvm(cmd_parser.backend, cmd_parser.target, cmd_parser.csvm_params); + return plssvm::make_csvm(cmd_parser.backend, comm, cmd_parser.target, cmd_parser.csvm_params); } }(); @@ -110,10 +110,11 @@ int main(int argc, char *argv[]) { plssvm::max_iter = cmd_parser.max_iter, plssvm::classification = cmd_parser.classification, plssvm::solver = cmd_parser.solver); + // save model to file model.save(cmd_parser.model_filename); }; - std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(cmd_parser)); + std::visit(data_set_visitor, plssvm::detail::cmd::data_set_factory(comm, cmd_parser)); // stop CPU hardware sampler and dump results if available #if defined(PLSSVM_HARDWARE_SAMPLING_ENABLED) @@ -121,18 +122,30 @@ int main(int argc, char *argv[]) { PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_HWS_ENTRY(sampler); #endif + // wait until all MPI processes reach this point + comm.barrier(); + const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now(); plssvm::detail::log(plssvm::verbosity_level::full, + comm, "\nTotal runtime: {}\n", plssvm::detail::tracking::tracking_entry{ "", "total_time", std::chrono::duration_cast(end_time - start_time) }); - PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(cmd_parser.performance_tracking_filename); + // TODO: really change file name? what to output on the command line? + std::string performance_tracking_filename{ cmd_parser.performance_tracking_filename }; +#if defined(PLSSVM_HAS_MPI_ENABLED) + if (!performance_tracking_filename.empty()) { + // only append rank name to the file name if a file name has been provided + performance_tracking_filename += fmt::format(".{}", comm.rank()); + } +#endif + PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_SAVE(performance_tracking_filename); } catch (const plssvm::exception &e) { - std::cerr << e.what_with_loc() << std::endl; + std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what_with_loc()) << std::endl; return EXIT_FAILURE; } catch (const std::exception &e) { - std::cerr << e.what() << std::endl; + std::cerr << fmt::format("An exception occurred on MPI rank {}!: {}", comm.rank(), e.what()) << std::endl; return EXIT_FAILURE; } diff --git a/src/plssvm/backends/CUDA/csvm.cu b/src/plssvm/backends/CUDA/csvm.cu index 9eebc97e3..ba29de3d7 100644 --- a/src/plssvm/backends/CUDA/csvm.cu +++ b/src/plssvm/backends/CUDA/csvm.cu @@ -26,6 +26,7 @@ #include "plssvm/exceptions/exceptions.hpp" // plssvm::exception #include "plssvm/gamma.hpp" // plssvm::gamma_type #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_function_type +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/parameter.hpp" // plssvm::parameter #include "plssvm/shape.hpp" // plssvm::shape #include "plssvm/target_platforms.hpp" // plssvm::target_platform @@ -43,16 +44,23 @@ #include // std::cout, std::endl #include // std::iota #include // std::string +#include // std::move #include // std::get #include // std:vector namespace plssvm::cuda { csvm::csvm(parameter params) : - csvm{ plssvm::target_platform::automatic, params } { } + csvm{ mpi::communicator{}, plssvm::target_platform::automatic, params } { } + +csvm::csvm(mpi::communicator comm, parameter params) : + csvm{ std::move(comm), plssvm::target_platform::automatic, params } { } csvm::csvm(target_platform target, parameter params) : - base_type{ params } { + csvm{ mpi::communicator{}, target, params } { } + +csvm::csvm(mpi::communicator comm, target_platform target, parameter params) : + base_type{ std::move(comm), params } { this->init(target); } @@ -78,7 +86,10 @@ void csvm::init(const target_platform target) { #endif } + // TODO: how to handle device output on multiple MPI ranks?! + plssvm::detail::log(verbosity_level::full, + comm_, "\nUsing CUDA ({}) as backend.\n", plssvm::detail::tracking::tracking_entry{ "dependencies", "cuda_runtime_version", detail::get_runtime_version() }); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "backend", plssvm::backend_type::cuda })); @@ -98,6 +109,7 @@ void csvm::init(const target_platform target) { // print found CUDA devices plssvm::detail::log(verbosity_level::full, + comm_, "Found {} CUDA device(s):\n", plssvm::detail::tracking::tracking_entry{ "backend", "num_devices", devices_.size() }); std::vector device_names; @@ -106,6 +118,7 @@ void csvm::init(const target_platform target) { cudaDeviceProp prop{}; PLSSVM_CUDA_ERROR_CHECK(cudaGetDeviceProperties(&prop, device)) plssvm::detail::log(verbosity_level::full, + comm_, " [{}, {}, {}.{}]\n", device, prop.name, @@ -115,6 +128,7 @@ void csvm::init(const target_platform target) { } PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((plssvm::detail::tracking::tracking_entry{ "backend", "device", device_names })); plssvm::detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "\n"); } diff --git a/src/plssvm/csvm.cpp b/src/plssvm/csvm.cpp index b2e1edfda..5018cf115 100644 --- a/src/plssvm/csvm.cpp +++ b/src/plssvm/csvm.cpp @@ -144,6 +144,7 @@ std::pair, std::vector> csvm::conjugat const std::size_t max_residual_difference_idx = rhs_idx_max_residual_difference(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Start Iteration {} (max: {}) with {}/{} converged rhs (max residual {} with target residual {} for rhs {}). ", iter + 1, max_cg_iter, @@ -189,6 +190,7 @@ std::pair, std::vector> csvm::conjugat const std::chrono::steady_clock::time_point iteration_end_time = std::chrono::steady_clock::now(); const std::chrono::duration iteration_duration = std::chrono::duration_cast(iteration_end_time - iteration_start_time); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Done in {}.\n", iteration_duration); total_iteration_time += iteration_duration; @@ -199,6 +201,7 @@ std::pair, std::vector> csvm::conjugat } const std::size_t max_residual_difference_idx = rhs_idx_max_residual_difference(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Finished after {}/{} iterations with {}/{} converged rhs (max residual {} with target residual {} for rhs {}) and an average iteration time of {}.\n", detail::tracking::tracking_entry{ "cg", "iterations", iter }, detail::tracking::tracking_entry{ "cg", "max_iterations", max_cg_iter }, @@ -213,6 +216,7 @@ std::pair, std::vector> csvm::conjugat PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "cg", "target_residuals", eps * eps * delta0 })); PLSSVM_DETAIL_TRACKING_PERFORMANCE_TRACKER_ADD_TRACKING_ENTRY((detail::tracking::tracking_entry{ "cg", "epsilon", eps })); detail::log(verbosity_level::libsvm, + comm_, "optimization finished, #iter = {}\n", iter); @@ -271,6 +275,7 @@ std::pair, real_type> csvm::perform_dimensional_reduction const real_type QA_cost = kernel_function(A, num_rows_reduced, A, num_rows_reduced, params) + real_type{ 1.0 } / params.cost; const std::chrono::steady_clock::time_point dimension_reduction_end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Performed dimensional reduction in {}.\n", detail::tracking::tracking_entry{ "cg", "dimensional_reduction", std::chrono::duration_cast(dimension_reduction_end_time - dimension_reduction_start_time) }); @@ -296,6 +301,7 @@ aos_matrix csvm::run_predict_values(const parameter ¶ms, const so const std::chrono::steady_clock::time_point end_time = std::chrono::steady_clock::now(); detail::log(verbosity_level::full | verbosity_level::timing, + comm_, "Predicted the values of {} predict points using {} support vectors with {} features each in {}.\n", predict_points.num_rows(), support_vectors.num_rows(), diff --git a/src/plssvm/detail/cmd/parser_predict.cpp b/src/plssvm/detail/cmd/parser_predict.cpp index 656d9a76d..31a764626 100644 --- a/src/plssvm/detail/cmd/parser_predict.cpp +++ b/src/plssvm/detail/cmd/parser_predict.cpp @@ -14,6 +14,7 @@ #include "plssvm/constants.hpp" // plssvm::real_type #include "plssvm/detail/assert.hpp" // PLSSVM_ASSERT #include "plssvm/detail/logging_without_performance_tracking.hpp" // plssvm::detail::log_untracked +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator #include "plssvm/target_platforms.hpp" // plssvm::list_available_target_platforms #include "plssvm/verbosity_levels.hpp" // plssvm::verbosity, plssvm::verbosity_level #include "plssvm/version/version.hpp" // plssvm::version::detail::get_version_info @@ -24,6 +25,7 @@ #include "fmt/ranges.h" // fmt::join #include // std::exit, EXIT_SUCCESS, EXIT_FAILURE +#include // std::atexit #include // std::exception #include // std::filesystem::path #include // std::cout, std::cerr, std::endl @@ -32,11 +34,18 @@ namespace plssvm::detail::cmd { -parser_predict::parser_predict(int argc, char **argv) { +parser_predict::parser_predict(const mpi::communicator &comm, int argc, char **argv) { // check for basic argc and argv correctness PLSSVM_ASSERT(argc >= 1, fmt::format("At least one argument is always given (the executable name), but argc is {}!", argc)); PLSSVM_ASSERT(argv != nullptr, "At least one argument is always given (the executable name), but argv is a nullptr!"); + // register a std::atexit handler since our parser may directly call std::exit + std::atexit([]() { + if (mpi::is_active()) { + mpi::finalize(); + } + }); + // setup command line parser with all available options cxxopts::Options options("plssvm-predict", "LS-SVM with multiple (GPU-)backends"); options @@ -74,27 +83,35 @@ parser_predict::parser_predict(int argc, char **argv) { options.parse_positional({ "test", "model", "output" }); result = options.parse(argc, argv); } catch (const std::exception &e) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } // print help message and exit if (result.count("help")) { - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cout << options.help() << std::endl; + } std::exit(EXIT_SUCCESS); } // print version info if (result.count("version")) { - std::cout << version::detail::get_version_info("plssvm-predict") << std::endl; + if (comm.is_main_rank()) { + std::cout << version::detail::get_version_info("plssvm-predict") << std::endl; + } std::exit(EXIT_SUCCESS); } // check if the number of positional arguments is not too large if (!result.unmatched().empty()) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to three positional options may be given, but {} (\"{}\") additional option(s) where provided!", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to three positional options may be given, but {} (\"{}\") additional option(s) where provided!", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } @@ -116,6 +133,7 @@ parser_predict::parser_predict(int argc, char **argv) { // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n", sycl_implementation_type); } @@ -134,6 +152,7 @@ parser_predict::parser_predict(int argc, char **argv) { // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n", kokkos_execution_space); } @@ -151,6 +170,7 @@ parser_predict::parser_predict(int argc, char **argv) { const verbosity_level verb = result["verbosity"].as(); if (quiet && verb != verbosity_level::quiet) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set the -q/--quiet flag, but the provided verbosity level isn't \"quiet\"; setting --verbosity={} to --verbosity=quiet\n", verb); verbosity = verbosity_level::quiet; @@ -163,16 +183,20 @@ parser_predict::parser_predict(int argc, char **argv) { // parse test data filename if (!result.count("test")) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing test file!\n") << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing test file!\n") << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } input_filename = result["test"].as(); // parse model filename if (!result.count("model")) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing model file!\n") << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing model file!\n") << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } model_filename = result["model"].as(); diff --git a/src/plssvm/detail/cmd/parser_train.cpp b/src/plssvm/detail/cmd/parser_train.cpp index 31d5b8719..ef9dc1f21 100644 --- a/src/plssvm/detail/cmd/parser_train.cpp +++ b/src/plssvm/detail/cmd/parser_train.cpp @@ -19,9 +19,11 @@ #include "plssvm/detail/utility.hpp" // plssvm::detail::to_underlying #include "plssvm/gamma.hpp" // plssvm::get_gamma_string #include "plssvm/kernel_function_types.hpp" // plssvm::kernel_type_to_math_string -#include "plssvm/target_platforms.hpp" // plssvm::list_available_target_platforms -#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity, plssvm::verbosity_level -#include "plssvm/version/version.hpp" // plssvm::version::detail::get_version_info +#include "plssvm/mpi/communicator.hpp" // plssvm::mpi::communicator +#include "plssvm/mpi/environment.hpp" +#include "plssvm/target_platforms.hpp" // plssvm::list_available_target_platforms +#include "plssvm/verbosity_levels.hpp" // plssvm::verbosity, plssvm::verbosity_level +#include "plssvm/version/version.hpp" // plssvm::version::detail::get_version_info #include "cxxopts.hpp" // cxxopts::Options, cxxopts::value,cxxopts::ParseResult #include "fmt/color.h" // fmt::fg, fmt::color::red @@ -29,6 +31,7 @@ #include "fmt/ranges.h" // fmt::join #include // std::exit, EXIT_SUCCESS, EXIT_FAILURE +#include // std::atexit #include // std::exception #include // std::filesystem::path #include // std::cout, std::cerr, std::endl @@ -39,11 +42,18 @@ namespace plssvm::detail::cmd { -parser_train::parser_train(int argc, char **argv) { +parser_train::parser_train(const mpi::communicator &comm, int argc, char **argv) { // check for basic argc and argv correctness PLSSVM_ASSERT(argc >= 1, fmt::format("At least one argument is always given (the executable name), but argc is {}!", argc)); PLSSVM_ASSERT(argv != nullptr, "At least one argument is always given (the executable name), but argv is a nullptr!"); + // register a std::atexit handler since our parser may directly call std::exit + std::atexit([]() { + if (mpi::is_active()) { + mpi::finalize(); + } + }); + // create the help message for the kernel function type const auto kernel_type_to_help_entry = [](const kernel_function_type kernel) { return fmt::format("\t {} -- {}: {}\n", detail::to_underlying(kernel), kernel, kernel_function_type_to_math_string(kernel)); @@ -99,27 +109,35 @@ parser_train::parser_train(int argc, char **argv) { options.parse_positional({ "input", "model" }); result = options.parse(argc, argv); } catch (const std::exception &e) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: {}\n", e.what()) << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } // print help message and exit if (result.count("help")) { - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cout << options.help() << std::endl; + } std::exit(EXIT_SUCCESS); } // print version info if (result.count("version")) { - std::cout << version::detail::get_version_info("plssvm-train") << std::endl; + if (comm.is_main_rank()) { + std::cout << version::detail::get_version_info("plssvm-train") << std::endl; + } std::exit(EXIT_SUCCESS); } // check if the number of positional arguments is not too large if (!result.unmatched().empty()) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to two positional options may be given, but {} (\"{}\") additional option(s) where provided!\n", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: only up to two positional options may be given, but {} (\"{}\") additional option(s) where provided!\n", result.unmatched().size(), fmt::join(result.unmatched(), " ")) << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } @@ -138,8 +156,10 @@ parser_train::parser_train(int argc, char **argv) { const decltype(csvm_params.gamma) gamma_input = result["gamma"].as(); // check if the provided gamma is legal iff a real_type has been provided if (std::holds_alternative(gamma_input) && std::get(gamma_input) <= real_type{ 0.0 }) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: gamma must be greater than 0.0, but is {}!\n", std::get(gamma_input)) << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: gamma must be greater than 0.0, but is {}!\n", std::get(gamma_input)) << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } // provided gamma was legal -> override default value @@ -166,8 +186,10 @@ parser_train::parser_train(int argc, char **argv) { const auto max_iter_input = result["max_iter"].as(); // check if the provided max_iter is legal if (max_iter_input <= decltype(max_iter_input){ 0 }) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: max_iter must be greater than 0, but is {}!\n", max_iter_input) << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: max_iter must be greater than 0, but is {}!\n", max_iter_input) << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } // provided max_iter was legal -> override default value @@ -200,6 +222,7 @@ parser_train::parser_train(int argc, char **argv) { // warn if kernel invocation type is explicitly set but SYCL isn't the current (automatic) backend if (!sycl_backend_is_used && sycl_kernel_invocation_type != sycl::kernel_invocation_type::automatic) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set a SYCL kernel invocation type but the current backend isn't SYCL; ignoring --sycl_kernel_invocation_type={}\n", sycl_kernel_invocation_type); } @@ -210,6 +233,7 @@ parser_train::parser_train(int argc, char **argv) { // warn if a SYCL implementation type is explicitly set but SYCL isn't the current (automatic) backend if (!sycl_backend_is_used && sycl_implementation_type != sycl::implementation_type::automatic) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set a SYCL implementation type but the current backend isn't SYCL; ignoring --sycl_implementation_type={}\n", sycl_implementation_type); } @@ -228,6 +252,7 @@ parser_train::parser_train(int argc, char **argv) { // warn if the kokkos execution space is explicitly set but Kokkos isn't the current (automatic) backend if (!kokkos_backend_is_used && kokkos_execution_space != kokkos::execution_space::automatic) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set a Kokkos execution space but the current backend isn't Kokkos; ignoring --kokkos_execution_space={}\n", kokkos_execution_space); } @@ -245,6 +270,7 @@ parser_train::parser_train(int argc, char **argv) { const verbosity_level verb = result["verbosity"].as(); if (quiet && verb != verbosity_level::quiet) { detail::log_untracked(verbosity_level::full | verbosity_level::warning, + comm, "WARNING: explicitly set the -q/--quiet flag, but the provided verbosity level isn't \"quiet\"; setting --verbosity={} to --verbosity=quiet\n", verb); verbosity = verbosity_level::quiet; @@ -257,8 +283,10 @@ parser_train::parser_train(int argc, char **argv) { // parse input data filename if (!result.count("input")) { - std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing input file!\n") << std::endl; - std::cout << options.help() << std::endl; + if (comm.is_main_rank()) { + std::cerr << fmt::format(fmt::fg(fmt::color::red), "ERROR: missing input file!\n") << std::endl; + std::cout << options.help() << std::endl; + } std::exit(EXIT_FAILURE); } input_filename = result["input"].as();