From 57667db4e4a133209c169f311898466452901d63 Mon Sep 17 00:00:00 2001 From: Arkadiusz Szczepkowicz Date: Wed, 4 Sep 2024 19:45:33 +0200 Subject: [PATCH] #2299: Move logic for checking max load to BaseLB --- src/vt/vrt/collection/balance/baselb/baselb.h | 19 ++++++++++++++++++- .../collection/balance/greedylb/greedylb.cc | 6 +----- .../balance/hierarchicallb/hierlb.cc | 6 +----- .../balance/temperedlb/temperedlb.cc | 6 +----- 4 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.h b/src/vt/vrt/collection/balance/baselb/baselb.h index a29b3b5708..ab54b60368 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.h +++ b/src/vt/vrt/collection/balance/baselb/baselb.h @@ -190,7 +190,7 @@ struct BaseLB { ); /** - * \brief Get the estimated time needed for load balancing + * \brief Get the estimated time needed for load-balancing * * \return the estimated time */ @@ -198,6 +198,23 @@ struct BaseLB { return std::chrono::nanoseconds(100); } + /** + * \brief Check if load-balancing should be done + * + * \return true when the maximum load exceeds the cost of load balancing; false otherwise + */ + bool maxLoadExceedsLBCost() const { + auto const max = base_stats_->at(lb::Statistic::Rank_load_modeled).at( + lb::StatisticQuantity::max + ); + auto max_in_ns = std::chrono::duration_cast( + std::chrono::duration(max) + ); + + // Compare the maximum rank load to the estimated load-balancing cost + return max_in_ns > getCollectiveEpochCost(); + } + private: TransferVecType transfers_ = {}; TransferType off_node_migrate_ = {}; diff --git a/src/vt/vrt/collection/balance/greedylb/greedylb.cc b/src/vt/vrt/collection/balance/greedylb/greedylb.cc index 22b92bad38..263cba9813 100644 --- a/src/vt/vrt/collection/balance/greedylb/greedylb.cc +++ b/src/vt/vrt/collection/balance/greedylb/greedylb.cc @@ -160,11 +160,7 @@ void GreedyLB::loadStats() { bool should_lb = false; this_load_begin = this_load; - // Use an estimated load-balancing cost on average rank load to load-balance - auto avg_ns = std::chrono::duration_cast( - std::chrono::duration(avg_load) - ); - if (avg_ns > getCollectiveEpochCost()) { + if (maxLoadExceedsLBCost()) { should_lb = I > greedy_tolerance; } diff --git a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc index 8c37ec57a8..dab4a5638d 100644 --- a/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc +++ b/src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc @@ -263,11 +263,7 @@ void HierarchicalLB::loadStats() { bool should_lb = false; this_load_begin = this_load; - // Use an estimated load-balancing cost on average rank load to load-balance - auto avg_ns = std::chrono::duration_cast( - std::chrono::duration(avg_load) - ); - if (avg_ns > getCollectiveEpochCost()) { + if (maxLoadExceedsLBCost()) { should_lb = I > hierlb_tolerance; } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 2007684c67..f73a54d01f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -449,11 +449,7 @@ void TemperedLB::runLB(LoadType total_load) { target_max_load_ = avg; } - // Use an estimated load-balancing cost on average rank load to load-balance - auto avg_ns = std::chrono::duration_cast( - std::chrono::duration(avg) - ); - if (avg_ns > getCollectiveEpochCost()) { + if (maxLoadExceedsLBCost()) { should_lb = max > (run_temperedlb_tolerance + 1.0) * target_max_load_; }