From 3cd93098d0fae5c95cf59da033ff27d7860f41a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 29 Nov 2023 11:12:46 -0500 Subject: [PATCH] #2201: fixed print errors; added pseudocode; and epoch boilerplate --- .../balance/temperedlb/temperedlb.cc | 53 ++++++++++++++++--- 1 file changed, 47 insertions(+), 6 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index cc7ca33b38..59eb5a8874 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1434,7 +1434,7 @@ void TemperedLB::originalTransfer() { // Initialize transfer and rejection counters int n_transfers = 0, n_rejected = 0; - // Try to migrate objects only from overloaded objects + // Try to migrate objects only from overloaded ranks if (is_overloaded_) { std::vector under = makeUnderloaded(); std::unordered_map migrate_objs; @@ -1473,7 +1473,7 @@ void TemperedLB::originalTransfer() { vt_debug_print( verbose, temperedlb, - "TemperedLB::decide: selected_node={}, load_info_.size()={}\n", + "TemperedLB::originalTransfer: selected_node={}, load_info_.size()={}\n", selected_node, load_info_.size() ); @@ -1488,7 +1488,7 @@ void TemperedLB::originalTransfer() { ); vt_debug_print( verbose, temperedlb, - "TemperedLB::decide: trial={}, iter={}, under.size()={}, " + "TemperedLB::originalTransfer: trial={}, iter={}, under.size()={}, " "selected_node={}, selected_load={:e}, obj_id={:x}, home={}, " "obj_load={}, target_max_load={}, this_new_load_={}, " "criterion={}\n", @@ -1544,7 +1544,7 @@ void TemperedLB::originalTransfer() { if (theConfig()->vt_debug_temperedlb) { // compute rejection rate because it will be printed - runInEpochCollective("TemperedLB::decide -> compute rejection", [=] { + runInEpochCollective("TemperedLB::originalTransfer -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( n_rejected, n_transfers ); @@ -1553,11 +1553,52 @@ void TemperedLB::originalTransfer() { } void TemperedLB::swapClusters() { - //auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); + auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); // Initialize transfer and rejection counters - //int n_transfers = 0, n_rejected = 0; + int n_transfers = 0, n_rejected = 0; + + // Try to migrate objects only from overloaded ranks + if (is_overloaded_) { + // Compute collection of potential targets + std::vector under = makeUnderloaded(); + std::unordered_map migrate_objs; + if (under.size() > 0) { + std::vector ordered_obj_ids = orderObjects( + obj_ordering_, cur_objs_, this_new_load_, target_max_load_ + ); + + // Cluster migratable objects on source rank + + // Iterage over potential targets to try to swap clusters + + //// Iteratr over target clusters + + ////// Decide whether swap is beneficial + + //////// If swap is beneficial compute source cluster size + //////// Test whether criterion is creater than swap RTOL times source size + + ////////// Only in this case perform swap + ////////// Else reject swap + + } // if (under.size() > 0) + } // if (is_overloaded_) + + // Finalize epoch + theTerm()->finishedEpoch(lazy_epoch); + vt::runSchedulerThrough(lazy_epoch); + + // Report on rejection rate in debug mode + if (theConfig()->vt_debug_temperedlb) { + runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { + proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( + n_rejected, n_transfers + ); + }); + } + } // void TemperedLB::originalTransfer() void TemperedLB::thunkMigrations() {