Skip to content

Commit

Permalink
#2299: Add method with hardcoded load balancing cost
Browse files Browse the repository at this point in the history
  • Loading branch information
thearusable committed Aug 9, 2024
1 parent e2a152b commit f6ffd20
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 8 deletions.
10 changes: 10 additions & 0 deletions src/vt/vrt/collection/balance/baselb/baselb.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,16 @@ struct BaseLB {
bool isCommAware() const { return comm_aware_; }
void recvSharedEdges(CommMsg* msg);

/**
* \brief Get the estimated time needed for load balancing
*
* \return the estimated time
*/
double getCollectiveEpochCost() const {
// 100 ns
return 0.0000001;
}

protected:
void getArgs(PhaseType phase);

Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/greedylb/greedylb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void GreedyLB::loadStats() {
bool should_lb = false;
this_load_begin = this_load;

if (avg_load > 0.0000000001) {
if (avg_load > getCollectiveEpochCost()) {
should_lb = I > greedy_tolerance;
}

Expand Down
2 changes: 1 addition & 1 deletion src/vt/vrt/collection/balance/hierarchicallb/hierlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ void HierarchicalLB::loadStats() {
bool should_lb = false;
this_load_begin = this_load;

if (avg_load > 0.0000000001) {
if (avg_load > getCollectiveEpochCost()) {
should_lb = I > hierlb_tolerance;
}

Expand Down
8 changes: 2 additions & 6 deletions src/vt/vrt/collection/balance/temperedlb/temperedlb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@
#include <vector>
#include <unordered_set>
#include <set>
#include <limits>

namespace vt { namespace vrt { namespace collection { namespace lb {

Expand Down Expand Up @@ -439,9 +438,6 @@ void TemperedLB::runLB(LoadType total_load) {
auto const imb = stats.at(lb::Statistic::Rank_load_modeled).at(
lb::StatisticQuantity::imb
);
auto const min = stats.at(lb::Statistic::Object_load_modeled).at(
lb::StatisticQuantity::min
);
auto const load = this_load;

if (target_pole_) {
Expand All @@ -453,8 +449,8 @@ void TemperedLB::runLB(LoadType total_load) {
target_max_load_ = avg;
}

// Use an minimal object load on average rank load to load-balance
if (avg > min / theContext()->getNumNodes()) {
// Use an estimated load-balancing cost on average rank load to load-balance
if (avg > getCollectiveEpochCost()) {
should_lb = max > (run_temperedlb_tolerance + 1.0) * target_max_load_;
}

Expand Down

0 comments on commit f6ffd20

Please sign in to comment.