From 195528de7e05574888f92bfae5e6c260f861de30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 18 Oct 2023 03:25:42 -0400 Subject: [PATCH 001/126] #2201: added enums to specify transfer strategy --- .../balance/temperedlb/tempered_enums.h | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index b56414a333..abb55cb285 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -165,6 +165,36 @@ enum struct KnowledgeEnum : uint8_t { Log = 2 }; +/// Enum for the strategy to be used in transfer stage +enum struct TransferStrategyEnum : uint8_t { + /** + * \brief Original strategy + * + * Transfer one object per transfer as in original Grapevine approach. + */ + Original = 0, + /** + * \brief Original strategy improved by recursion + * + * When single object transfer is rejected, attempt to recurse in order to + * pull more objects into the transfer and hereby minimize work added by + * said transfer. + * This is especially useful when communication is taken into account, as + * object transfers typically disrupt local vs. global communication edges. + */ + Recursive = 1, + /** + * \brief Form object clusters and attempt to perform swaps. + * + * Object can be clustered according to arbitrary definition, and swaps + * of entire clusters, according the nullset, between ranks are attempted. + * This is especially useful when shared memory constraints are present, + * as breaking shared memory clusters results in higher overall memory + * footprint, in constrast with whole cluster swaps. + */ + ClusterSwap = 2, +}; + }}}} /* end namespace vt::vrt::collection::lb */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDLB_TEMPERED_ENUMS_H*/ From 2cbd2556ac7aa19bef37c229d3d7485c49e05668 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 18 Oct 2023 06:38:00 -0400 Subject: [PATCH 002/126] #2201: created framework to integrate transfer strategy ivar --- .../balance/temperedlb/tempered_enums.h | 60 +++++++++---------- .../balance/temperedlb/temperedlb.cc | 10 ++++ .../balance/temperedlb/temperedlb.h | 7 +++ 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index abb55cb285..fd3878bcff 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -71,6 +71,36 @@ enum struct InformTypeEnum : uint8_t { AsyncInform = 1 }; +/// Enum for the strategy to be used in transfer stage +enum struct TransferTypeEnum : uint8_t { + /** + * \brief Original strategy + * + * Transfer one object per transfer as in original Grapevine approach. + */ + Original = 0, + /** + * \brief Original strategy improved by recursion + * + * When single object transfer is rejected, attempt to recurse in order to + * pull more objects into the transfer and hereby minimize work added by + * said transfer. + * This is especially useful when communication is taken into account, as + * object transfers typically disrupt local vs. global communication edges. + */ + Recursive = 1, + /** + * \brief Form object clusters and attempt to perform swaps. + * + * Object can be clustered according to arbitrary definition, and swaps + * of entire clusters, according the nullset, between ranks are attempted. + * This is especially useful when shared memory constraints are present, + * as breaking shared memory clusters results in higher overall memory + * footprint, in constrast with whole cluster swaps. + */ + SwapClusters = 2, +}; + /// Enum for the order in which local objects are considered for transfer enum struct ObjectOrderEnum : uint8_t { Arbitrary = 0, //< Arbitrary order: iterate as defined by the unordered_map @@ -165,36 +195,6 @@ enum struct KnowledgeEnum : uint8_t { Log = 2 }; -/// Enum for the strategy to be used in transfer stage -enum struct TransferStrategyEnum : uint8_t { - /** - * \brief Original strategy - * - * Transfer one object per transfer as in original Grapevine approach. - */ - Original = 0, - /** - * \brief Original strategy improved by recursion - * - * When single object transfer is rejected, attempt to recurse in order to - * pull more objects into the transfer and hereby minimize work added by - * said transfer. - * This is especially useful when communication is taken into account, as - * object transfers typically disrupt local vs. global communication edges. - */ - Recursive = 1, - /** - * \brief Form object clusters and attempt to perform swaps. - * - * Object can be clustered according to arbitrary definition, and swaps - * of entire clusters, according the nullset, between ranks are attempted. - * This is especially useful when shared memory constraints are present, - * as breaking shared memory clusters results in higher overall memory - * footprint, in constrast with whole cluster swaps. - */ - ClusterSwap = 2, -}; - }}}} /* end namespace vt::vrt::collection::lb */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDLB_TEMPERED_ENUMS_H*/ diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index f73a54d01f..fb2b98c094 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -373,6 +373,15 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { ); inform_type_ = inform_type_converter_.getFromConfig(config, inform_type_); + balance::LBArgsEnumConverter transfer_type_converter_( + "cmf", "TransferTypeEnum", { + {TransferTypeEnum::Original, "Original"}, + {TransferTypeEnum::Recursive, "Recursive"}, + {TransferTypeEnum::SwapClusters, "SwapClusters"} + } + ); + transfer_type_ = transfer_type_converter_.getFromConfig(config, transfer_type_); + balance::LBArgsEnumConverter obj_ordering_converter_( "ordering", "ObjectOrderEnum", { {ObjectOrderEnum::Arbitrary, "Arbitrary"}, @@ -414,6 +423,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { knowledge_converter_.getString(knowledge_), f_, k_max_, num_iters_, criterion_converter_.getString(criterion_), num_trials_, deterministic_, inform_type_converter_.getString(inform_type_), + transfer_type_converter_.getString(transfer_type_), obj_ordering_converter_.getString(obj_ordering_), cmf_type_converter_.getString(cmf_type_), rollback_, target_pole_ ); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index e6f8854569..6c3fb651f2 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -171,6 +171,13 @@ struct TemperedLB : BaseLB { LoadType target_max_load_ = 0.0; CriterionEnum criterion_ = CriterionEnum::ModifiedGrapevine; InformTypeEnum inform_type_ = InformTypeEnum::AsyncInform; + /** + * \brief Type of strategy to be used in transfer stage + * + * Available strategies include: Original, Recursive, and SwapClusters + * and are adapted to different kinds of problems. + */ + TransferTypeEnum transfer_type_ = TransferTypeEnum::Original; ObjectOrderEnum obj_ordering_ = ObjectOrderEnum::FewestMigrations; CMFTypeEnum cmf_type_ = CMFTypeEnum::NormByMax; KnowledgeEnum knowledge_ = KnowledgeEnum::Log; From e24b454ebfce830afcf90a112060ce88f5e6e537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Thu, 19 Oct 2023 15:27:20 -0400 Subject: [PATCH 003/126] #2201: added transfer type key to getInputKeysWithHelp() --- .../balance/temperedlb/temperedlb.cc | 27 ++++++++++++++++--- .../balance/temperedlb/temperedlb.h | 7 +---- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index fb2b98c094..d235384f75 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -166,6 +166,27 @@ Default: AsyncInform loads when the first message for a round is received, avoiding the synchronization cost but delaying the propagation of some information until the following round. +)" + }, + { + "transfer", + R"( +Values: {Original, Recursive, SwapClusters} +Default: Original +Description: + Transfer strategy to be used in transfer stage. Options are: + Original: transfer one object per transfer as in original Grapevine approach. Recursive: original strategy improved by recursion. + When single object transfer is rejected, attempt to recurse in order to + pull more objects into the transfer and hereby minimize work added by + said transfer. + This is especially useful when communication is taken into account, as + object transfers typically disrupt local vs. global communication edges. + SwapClusters: form object clusters and attempt to perform swaps. + Object can be clustered according to arbitrary definition, and swaps + of entire clusters, according the nullset, between ranks are attempted. + This is especially useful when shared memory constraints are present, + as breaking shared memory clusters results in higher overall memory + footprint, in constrast with whole cluster swaps. )" }, { @@ -375,9 +396,9 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { balance::LBArgsEnumConverter transfer_type_converter_( "cmf", "TransferTypeEnum", { - {TransferTypeEnum::Original, "Original"}, - {TransferTypeEnum::Recursive, "Recursive"}, - {TransferTypeEnum::SwapClusters, "SwapClusters"} + {TransferTypeEnum::Original, "Original"}, + {TransferTypeEnum::Recursive, "Recursive"}, + {TransferTypeEnum::SwapClusters, "SwapClusters"} } ); transfer_type_ = transfer_type_converter_.getFromConfig(config, transfer_type_); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 6c3fb651f2..4c5bd8a20d 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -171,12 +171,7 @@ struct TemperedLB : BaseLB { LoadType target_max_load_ = 0.0; CriterionEnum criterion_ = CriterionEnum::ModifiedGrapevine; InformTypeEnum inform_type_ = InformTypeEnum::AsyncInform; - /** - * \brief Type of strategy to be used in transfer stage - * - * Available strategies include: Original, Recursive, and SwapClusters - * and are adapted to different kinds of problems. - */ + /// Type of strategy to be used in transfer stage TransferTypeEnum transfer_type_ = TransferTypeEnum::Original; ObjectOrderEnum obj_ordering_ = ObjectOrderEnum::FewestMigrations; CMFTypeEnum cmf_type_ = CMFTypeEnum::NormByMax; From 1bfe7b47849118603a63728f39449a8149487d9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Sat, 18 Nov 2023 03:54:53 -0500 Subject: [PATCH 004/126] #2201: addressed PR2203 NS reviww comments --- src/vt/vrt/collection/balance/temperedlb/tempered_enums.h | 4 ++-- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index fd3878bcff..260db5fec0 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -92,8 +92,8 @@ enum struct TransferTypeEnum : uint8_t { /** * \brief Form object clusters and attempt to perform swaps. * - * Object can be clustered according to arbitrary definition, and swaps - * of entire clusters, according the nullset, between ranks are attempted. + * Object can be clustered including to arbitrary definition, and swaps + * of entire clusters, including the nullset, between ranks are attempted. * This is especially useful when shared memory constraints are present, * as breaking shared memory clusters results in higher overall memory * footprint, in constrast with whole cluster swaps. diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index d235384f75..dd1615c1bc 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -174,8 +174,9 @@ Default: AsyncInform Values: {Original, Recursive, SwapClusters} Default: Original Description: - Transfer strategy to be used in transfer stage. Options are: - Original: transfer one object per transfer as in original Grapevine approach. Recursive: original strategy improved by recursion. + Transfer strategy to be used in transfer stage. Options are: + Original: transfer one object per transfer as in original Grapevine approach. + Recursive: original strategy improved by recursion. When single object transfer is rejected, attempt to recurse in order to pull more objects into the transfer and hereby minimize work added by said transfer. From ee5c3338309c75ee40b1d4ef30eadc3fdc103b1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 22 Nov 2023 16:55:45 -0500 Subject: [PATCH 005/126] #2201: Update src/vt/vrt/collection/balance/temperedlb/temperedlb.cc Co-authored-by: Jonathan Lifflander --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index dd1615c1bc..eb3116eee5 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -396,7 +396,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { inform_type_ = inform_type_converter_.getFromConfig(config, inform_type_); balance::LBArgsEnumConverter transfer_type_converter_( - "cmf", "TransferTypeEnum", { + "transfer", "TransferTypeEnum", { {TransferTypeEnum::Original, "Original"}, {TransferTypeEnum::Recursive, "Recursive"}, {TransferTypeEnum::SwapClusters, "SwapClusters"} From 8ad40568888fa531ed523d39545d7ee48cd91ccb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Mon, 27 Nov 2023 16:21:28 -0500 Subject: [PATCH 006/126] #2201: checkpoint of non-breaking changes (documentation and style) --- .../balance/temperedlb/temperedlb.cc | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index eb3116eee5..7e886ecb1d 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -455,9 +455,9 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { void TemperedLB::runLB(LoadType total_load) { bool should_lb = false; + // Compute load statistics this_load = total_load; stats = *getStats(); - auto const avg = stats.at(lb::Statistic::Rank_load_modeled).at( lb::StatisticQuantity::avg ); @@ -485,6 +485,7 @@ void TemperedLB::runLB(LoadType total_load) { should_lb = max > (run_temperedlb_tolerance + 1.0) * target_max_load_; } + // Report statistics from head rank if (theContext()->getNode() == 0) { vt_debug_print( terse, temperedlb, @@ -501,6 +502,7 @@ void TemperedLB::runLB(LoadType total_load) { } } + // Perform load rebalancing when deemed necessary if (should_lb) { doLBStages(imb); } @@ -814,8 +816,8 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { selected.insert(this_node); } + // Determine fanout factor capped by number of nodes auto const fanout = std::min(f_, static_cast(num_nodes - 1)); - vt_debug_print( verbose, temperedlb, "TemperedLB::propagateRound: trial={}, iter={}, k_max={}, k_cur={}, " @@ -823,6 +825,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { trial_, iter_, k_max_, k_cur, selected.size(), fanout ); + // Iterate over fanout factor for (int i = 0; i < fanout; i++) { // This implies full knowledge of all processors if (selected.size() >= static_cast(num_nodes)) { @@ -849,6 +852,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { // Send message with load if (sync) { + // Message in synchronous mode auto msg = makeMessage(this_node, load_info_); if (epoch != no_epoch) { envelopeSetEpoch(msg->env, epoch); @@ -858,6 +862,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { LoadMsgSync, &TemperedLB::propagateIncomingSync >(msg.get()); } else { + // Message in asynchronous mode auto msg = makeMessage(this_node, load_info_, k_cur); if (epoch != no_epoch) { envelopeSetEpoch(msg->env, epoch); @@ -1217,8 +1222,10 @@ std::vector TemperedLB::orderObjects( void TemperedLB::decide() { auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: decide"); + // Initialize transfer and rejection counters int n_transfers = 0, n_rejected = 0; + // Try to migrate objects only from overloaded objects if (is_overloaded_) { std::vector under = makeUnderloaded(); std::unordered_map migrate_objs; @@ -1251,6 +1258,7 @@ void TemperedLB::decide() { } // Rebuild the CMF with the new loads taken into account auto cmf = createCMF(under); + // Select a node using the CMF auto const selected_node = sampleFromCMF(under, cmf); @@ -1260,16 +1268,15 @@ void TemperedLB::decide() { selected_node, load_info_.size() ); + // Find load of selected node auto load_iter = load_info_.find(selected_node); vtAssert(load_iter != load_info_.end(), "Selected node not found"); - - // The load of the node selected auto& selected_load = load_iter->second; + // Evaluate criterion for proposed transfer bool eval = Criterion(criterion_)( this_new_load_, selected_load, obj_load, target_max_load_ ); - vt_debug_print( verbose, temperedlb, "TemperedLB::decide: trial={}, iter={}, under.size()={}, " @@ -1289,9 +1296,10 @@ void TemperedLB::decide() { eval ); + // Decide about proposed migration based on criterion evaluation if (eval) { ++n_transfers; - // transfer the object load in seconds + // Transfer the object load in seconds // to match the object load units on the receiving end migrate_objs[selected_node][obj_id] = obj_load; @@ -1316,7 +1324,6 @@ void TemperedLB::decide() { auto node = migration.first; lazyMigrateObjsTo(lazy_epoch, node, migration.second); } - } else { // do nothing (underloaded-based algorithm), waits to get work from // overloaded nodes From 844db21ebc8b31b7a934be58508ef66ff89dbd63 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 28 Nov 2023 13:16:05 -0500 Subject: [PATCH 007/126] #2201: fixed the incorrect transfer type causing build error --- .../balance/temperedlb/temperedlb.cc | 18 ++++++++++++++++-- .../collection/balance/temperedlb/temperedlb.h | 2 +- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 7e886ecb1d..03ba04e5e3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -559,6 +559,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { is_underloaded_ = true; } + // Perform requested type of information stage switch (inform_type_) { case InformTypeEnum::SyncInform: informSync(); @@ -570,7 +571,20 @@ void TemperedLB::doLBStages(LoadType start_imb) { vtAbort("TemperedLB:: Unsupported inform type"); } - decide(); + // Execute transfer stage + switch (transfer_type_) { + case TransferTypeEnum::Original: + originalTransfer(); + break; + case TransferTypeEnum::Recursive: + vtAbort("TemperedLB:: Unimplemented transfer type: Recursive"); + break; + case TransferTypeEnum::SwapClusters: + vtAbort("TemperedLB:: Unimplemented transfer type: SwapClusters"); + break; + default: + vtAbort("TemperedLB:: Unsupported transfer type"); + } vt_debug_print( verbose, temperedlb, @@ -1219,7 +1233,7 @@ std::vector TemperedLB::orderObjects( return ordered_obj_ids; } -void TemperedLB::decide() { +void TemperedLB::originalTransfer() { auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: decide"); // Initialize transfer and rejection counters diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 4c5bd8a20d..15f4cc69ba 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -90,7 +90,7 @@ struct TemperedLB : BaseLB { void doLBStages(LoadType start_imb); void informAsync(); void informSync(); - void decide(); + void originalTransfer(); void migrate(); void propagateRound(uint8_t k_cur_async, bool sync, EpochType epoch = no_epoch); From ef8ef7629d17410e195a62c533ca50ef106824ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 18 Oct 2023 03:25:42 -0400 Subject: [PATCH 008/126] #2201: added enums to specify transfer strategy --- .../balance/temperedlb/tempered_enums.h | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index 260db5fec0..cca2ebb819 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -195,6 +195,36 @@ enum struct KnowledgeEnum : uint8_t { Log = 2 }; +/// Enum for the strategy to be used in transfer stage +enum struct TransferStrategyEnum : uint8_t { + /** + * \brief Original strategy + * + * Transfer one object per transfer as in original Grapevine approach. + */ + Original = 0, + /** + * \brief Original strategy improved by recursion + * + * When single object transfer is rejected, attempt to recurse in order to + * pull more objects into the transfer and hereby minimize work added by + * said transfer. + * This is especially useful when communication is taken into account, as + * object transfers typically disrupt local vs. global communication edges. + */ + Recursive = 1, + /** + * \brief Form object clusters and attempt to perform swaps. + * + * Object can be clustered according to arbitrary definition, and swaps + * of entire clusters, according the nullset, between ranks are attempted. + * This is especially useful when shared memory constraints are present, + * as breaking shared memory clusters results in higher overall memory + * footprint, in constrast with whole cluster swaps. + */ + ClusterSwap = 2, +}; + }}}} /* end namespace vt::vrt::collection::lb */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDLB_TEMPERED_ENUMS_H*/ From 2f67dc4d111136b55303fb8962fe53077cd735cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 18 Oct 2023 06:38:00 -0400 Subject: [PATCH 009/126] #2201: created framework to integrate transfer strategy ivar --- .../balance/temperedlb/tempered_enums.h | 34 ++----------------- .../balance/temperedlb/temperedlb.h | 7 +++- 2 files changed, 8 insertions(+), 33 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index cca2ebb819..fd3878bcff 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -92,8 +92,8 @@ enum struct TransferTypeEnum : uint8_t { /** * \brief Form object clusters and attempt to perform swaps. * - * Object can be clustered including to arbitrary definition, and swaps - * of entire clusters, including the nullset, between ranks are attempted. + * Object can be clustered according to arbitrary definition, and swaps + * of entire clusters, according the nullset, between ranks are attempted. * This is especially useful when shared memory constraints are present, * as breaking shared memory clusters results in higher overall memory * footprint, in constrast with whole cluster swaps. @@ -195,36 +195,6 @@ enum struct KnowledgeEnum : uint8_t { Log = 2 }; -/// Enum for the strategy to be used in transfer stage -enum struct TransferStrategyEnum : uint8_t { - /** - * \brief Original strategy - * - * Transfer one object per transfer as in original Grapevine approach. - */ - Original = 0, - /** - * \brief Original strategy improved by recursion - * - * When single object transfer is rejected, attempt to recurse in order to - * pull more objects into the transfer and hereby minimize work added by - * said transfer. - * This is especially useful when communication is taken into account, as - * object transfers typically disrupt local vs. global communication edges. - */ - Recursive = 1, - /** - * \brief Form object clusters and attempt to perform swaps. - * - * Object can be clustered according to arbitrary definition, and swaps - * of entire clusters, according the nullset, between ranks are attempted. - * This is especially useful when shared memory constraints are present, - * as breaking shared memory clusters results in higher overall memory - * footprint, in constrast with whole cluster swaps. - */ - ClusterSwap = 2, -}; - }}}} /* end namespace vt::vrt::collection::lb */ #endif /*INCLUDED_VT_VRT_COLLECTION_BALANCE_TEMPEREDLB_TEMPERED_ENUMS_H*/ diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 15f4cc69ba..663a385ec9 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -171,7 +171,12 @@ struct TemperedLB : BaseLB { LoadType target_max_load_ = 0.0; CriterionEnum criterion_ = CriterionEnum::ModifiedGrapevine; InformTypeEnum inform_type_ = InformTypeEnum::AsyncInform; - /// Type of strategy to be used in transfer stage + /** + * \brief Type of strategy to be used in transfer stage + * + * Available strategies include: Original, Recursive, and SwapClusters + * and are adapted to different kinds of problems. + */ TransferTypeEnum transfer_type_ = TransferTypeEnum::Original; ObjectOrderEnum obj_ordering_ = ObjectOrderEnum::FewestMigrations; CMFTypeEnum cmf_type_ = CMFTypeEnum::NormByMax; From d7fdc3b64c3d7515824a9e1f19872648ca2cf20c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Thu, 19 Oct 2023 15:27:20 -0400 Subject: [PATCH 010/126] #2201: added transfer type key to getInputKeysWithHelp() --- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 663a385ec9..15f4cc69ba 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -171,12 +171,7 @@ struct TemperedLB : BaseLB { LoadType target_max_load_ = 0.0; CriterionEnum criterion_ = CriterionEnum::ModifiedGrapevine; InformTypeEnum inform_type_ = InformTypeEnum::AsyncInform; - /** - * \brief Type of strategy to be used in transfer stage - * - * Available strategies include: Original, Recursive, and SwapClusters - * and are adapted to different kinds of problems. - */ + /// Type of strategy to be used in transfer stage TransferTypeEnum transfer_type_ = TransferTypeEnum::Original; ObjectOrderEnum obj_ordering_ = ObjectOrderEnum::FewestMigrations; CMFTypeEnum cmf_type_ = CMFTypeEnum::NormByMax; From 03290cbe4942bf69d876ac22a487c6e2f9938bd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Sat, 18 Nov 2023 03:54:53 -0500 Subject: [PATCH 011/126] #2201: addressed PR2203 NS reviww comments --- src/vt/vrt/collection/balance/temperedlb/tempered_enums.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index fd3878bcff..260db5fec0 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -92,8 +92,8 @@ enum struct TransferTypeEnum : uint8_t { /** * \brief Form object clusters and attempt to perform swaps. * - * Object can be clustered according to arbitrary definition, and swaps - * of entire clusters, according the nullset, between ranks are attempted. + * Object can be clustered including to arbitrary definition, and swaps + * of entire clusters, including the nullset, between ranks are attempted. * This is especially useful when shared memory constraints are present, * as breaking shared memory clusters results in higher overall memory * footprint, in constrast with whole cluster swaps. From b3d428c29b44798bbf478e19782b711daf5bfd2e Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 16:19:58 -0800 Subject: [PATCH 012/126] #2201: args: add arg to force LB to run on the first phase (mainly for single-shot replaying) --- src/vt/configs/arguments/app_config.h | 1 + src/vt/configs/arguments/args.cc | 3 +++ src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 6 +++++- 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/vt/configs/arguments/app_config.h b/src/vt/configs/arguments/app_config.h index aa83d05588..2ddb024a31 100644 --- a/src/vt/configs/arguments/app_config.h +++ b/src/vt/configs/arguments/app_config.h @@ -160,6 +160,7 @@ struct AppConfig { bool vt_lb_self_migration = false; bool vt_lb_spec = false; std::string vt_lb_spec_file = ""; + bool vt_lb_run_lb_first_phase = false; bool vt_no_detect_hang = false; diff --git a/src/vt/configs/arguments/args.cc b/src/vt/configs/arguments/args.cc index ee3e6f8c25..0aae6c0572 100644 --- a/src/vt/configs/arguments/args.cc +++ b/src/vt/configs/arguments/args.cc @@ -913,6 +913,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto lb_self_migration = "Allow load balancer to migrate objects to the same node"; auto lb_spec = "Enable LB spec file (defines which phases output LB data)"; auto lb_spec_file = "File containing LB spec; --vt_lb_spec to enable"; + auto lb_first_phase_info = "Force LB to run on the first phase (phase 0)"; auto s = app.add_flag("--vt_lb", appConfig.vt_lb, lb); auto t1 = app.add_flag("--vt_lb_quiet", appConfig.vt_lb_quiet, lb_quiet); auto u = app.add_option("--vt_lb_file_name", appConfig.vt_lb_file_name, lb_file_name)->capture_default_str()->check(CLI::ExistingFile); @@ -935,6 +936,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto lbasm = app.add_flag("--vt_lb_self_migration", appConfig.vt_lb_self_migration, lb_self_migration); auto lbspec = app.add_flag("--vt_lb_spec", appConfig.vt_lb_spec, lb_spec); auto lbspecfile = app.add_option("--vt_lb_spec_file", appConfig.vt_lb_spec_file, lb_spec_file)->capture_default_str()->check(CLI::ExistingFile); + auto lb_first_phase = app.add_flag("--vt_lb_run_lb_first_phase", appConfig.vt_lb_run_lb_first_phase, lb_first_phase_info); // --vt_lb_name excludes --vt_lb_file_name, and vice versa v->excludes(u); @@ -963,6 +965,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { lbasm->group(debugLB); lbspec->group(debugLB); lbspecfile->group(debugLB); + lb_first_phase->group(debugLB); // help options deliberately omitted from the debugLB group above so that // they appear grouped with --vt_help when --vt_help is used diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 50898735c3..751c9dd235 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -130,7 +130,11 @@ LBType LBManager::decideLBToRun(PhaseType phase, bool try_file) { } else { auto interval = theConfig()->vt_lb_interval; vtAssert(interval != 0, "LB Interval must not be 0"); - if (phase % interval == 1 || (interval == 1 && phase != 0)) { + if ( + phase % interval == 1 || + (interval == 1 && phase != 0) || + (phase == 0 && theConfig()->vt_lb_run_lb_first_phase) + ) { bool name_match = false; for (auto&& elm : get_lb_names()) { if (elm.second == theConfig()->vt_lb_name) { From 55dd1c051628b80e988bdd878ae2e2f75fc554eb Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 17:18:00 -0800 Subject: [PATCH 013/126] #2201: temperedlb: implement basic memory information consumption, threshold variable for user --- .../balance/temperedlb/temperedlb.cc | 112 ++++++++++++++++++ .../balance/temperedlb/temperedlb.h | 41 +++++++ 2 files changed, 153 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 03ba04e5e3..b4d8d240ce 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -274,6 +274,15 @@ Default: false instead of the processor-average load. )" }, + { + "memory_threshold", + R"( +Values: +Defaut: 0 +Description: The memory threshold TemperedLB should strictly stay under which is +respected if memory information is present in the user-defined data. +)" + } }; return keys_help; } @@ -378,6 +387,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { deterministic_ = config->getOrDefault("deterministic", deterministic_); rollback_ = config->getOrDefault("rollback", rollback_); target_pole_ = config->getOrDefault("targetpole", target_pole_); + mem_thresh_ = config->getOrDefault("memory_threshold", mem_thresh_); balance::LBArgsEnumConverter criterion_converter_( "criterion", "CriterionEnum", { @@ -508,6 +518,98 @@ void TemperedLB::runLB(LoadType total_load) { } } +void TemperedLB::readClustersMemoryData() { + if (user_data_) { + for (auto const& [obj, data_map] : *user_data_) { + SharedIDType shared_id = -1; + BytesType shared_bytes = 0; + BytesType working_bytes = 0; + for (auto const& [key, variant] : data_map) { + if (key == "shared_id") { + // Because of how JSON is stored this is always a double, even though + // it should be an integer + if (double const* val = std::get_if(&variant)) { + shared_id = static_cast(*val); + } else { + vtAbort("\"shared_id\" in variant does not match integer"); + } + } + if (key == "shared_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + shared_bytes = *val; + } else { + vtAbort("\"shared_bytes\" in variant does not match double"); + } + } + if (key == "task_working_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + working_bytes = *val; + } else { + vtAbort("\"working_bytes\" in variant does not match double"); + } + } + if (key == "rank_working_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + rank_bytes_ = *val; + } else { + vtAbort("\"rank_bytes\" in variant does not match double"); + } + } + // @todo: for now, skip "task_serialized_bytes" and + // "task_footprint_bytes" + } + + // @todo: switch to debug print at some point + vt_print( + temperedlb, "obj={} shared_block={} bytes={}\n", + obj, shared_id, shared_bytes + ); + + obj_shared_block_[obj] = shared_id; + obj_working_bytes_[obj] = working_bytes; + shared_block_size_[shared_id] = shared_bytes; + has_memory_data_ = true; + } + } +} + +TemperedLB::BytesType TemperedLB::computeMemoryUsage() const { + // Compute bytes used by shared blocks mapped here based on object mapping + auto const blocks_here = getSharedBlocksHere(); + + double total_shared_bytes = 0; + for (auto const& block_id : blocks_here) { + total_shared_bytes += shared_block_size_.find(block_id)->second; + } + + // Compute max object size + // @todo: Slight issue here that this will only count migratable objects + // (those contained in cur_objs), for our current use case this is not a + // problem, but it should include the max of non-migratable + double max_object_working_bytes = 0; + for (auto const& [obj_id, _] : cur_objs_) { + if (obj_working_bytes_.find(obj_id) != obj_working_bytes_.end()) { + max_object_working_bytes = + std::max(max_object_working_bytes, obj_working_bytes_.find(obj_id)->second); + } else { + vt_print( + temperedlb, "Warning: working bytes not found for object: {}\n", obj_id + ); + } + } + return rank_bytes_ + total_shared_bytes + max_object_working_bytes; +} + +std::set TemperedLB::getSharedBlocksHere() const { + std::set blocks_here; + for (auto const& [obj, _] : cur_objs_) { + if (obj_shared_block_.find(obj) != obj_shared_block_.end()) { + blocks_here.insert(obj_shared_block_.find(obj)->second); + } + } + return blocks_here; +} + void TemperedLB::doLBStages(LoadType start_imb) { decltype(this->cur_objs_) best_objs; LoadType best_load = 0; @@ -516,6 +618,9 @@ void TemperedLB::doLBStages(LoadType start_imb) { auto this_node = theContext()->getNode(); + // Read in memory information if it's available before be do any trials + readClustersMemoryData(); + for (trial_ = 0; trial_ < num_trials_; ++trial_) { // Clear out data structures selected_.clear(); @@ -553,6 +658,13 @@ void TemperedLB::doLBStages(LoadType start_imb) { LoadType(this_new_load_) ); + vt_print( + temperedlb, + "Current memory info: total memory usage={}, shared blocks here={}, " + "memory_threshold={}\n", computeMemoryUsage(), getSharedBlocksHere().size(), + mem_thresh_ + ); + if (isOverloaded(this_new_load_)) { is_overloaded_ = true; } else if (isUnderloaded(this_new_load_)) { diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 15f4cc69ba..9d05872abb 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -67,6 +67,8 @@ struct TemperedLB : BaseLB { using ReduceMsgType = vt::collective::ReduceNoneMsg; using QuantityType = std::map; using StatisticMapType = std::unordered_map; + using SharedIDType = int; + using BytesType = double; TemperedLB() = default; TemperedLB(TemperedLB const&) = delete; @@ -119,6 +121,27 @@ struct TemperedLB : BaseLB { void setupDone(); + /** + * \brief Read the memory data from the user-defined json blocks into data + * structures + */ + void readClustersMemoryData(); + + /** + * \brief Compute the memory usage for current assignment + * + * \return the total memory usage + */ + BytesType computeMemoryUsage() const; + + /** + * \brief Get the shared blocks that are located on this node with the current + * object assignment + * + * \return the number of shared blocks here + */ + std::set getSharedBlocksHere() const; + private: uint16_t f_ = 0; uint8_t k_max_ = 0; @@ -183,6 +206,24 @@ struct TemperedLB : BaseLB { std::mt19937 gen_sample_; StatisticMapType stats; LoadType this_load = 0.0f; + + + ////////////////////////////////////////////////////////////////////////////// + // All the memory info (may or may not be present) + ////////////////////////////////////////////////////////////////////////////// + + /// Whether we have memory information + bool has_memory_data_ = false; + /// Working bytes for this rank + BytesType rank_bytes_ = 0; + /// Shared ID for each object + std::unordered_map obj_shared_block_; + /// Shared block size in bytes + std::unordered_map shared_block_size_; + /// Working bytes for each object + std::unordered_map obj_working_bytes_; + /// User-defined memory threshold + BytesType mem_thresh_ = 0; }; }}}} /* end namespace vt::vrt::collection::lb */ From 855ef8856b757e0084e77f9470e7a1a23571a85a Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 17:31:34 -0800 Subject: [PATCH 014/126] #2201: temperedlb: add computation for cluster/memory summary --- .../balance/temperedlb/temperedlb.cc | 41 +++++++++++++++---- .../balance/temperedlb/temperedlb.h | 7 ++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index b4d8d240ce..27ae92358a 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -565,14 +565,28 @@ void TemperedLB::readClustersMemoryData() { obj, shared_id, shared_bytes ); + has_memory_data_ = true; obj_shared_block_[obj] = shared_id; obj_working_bytes_[obj] = working_bytes; shared_block_size_[shared_id] = shared_bytes; - has_memory_data_ = true; } } } +void TemperedLB::computeClusterSummary() { + for (auto const& [shared_id, shared_bytes] : shared_block_size_) { + LoadType cluster_load = 0; + for (auto const& [obj_id, obj_load] : cur_objs_) { + if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { + if (iter->second == shared_id) { + cluster_load += obj_load; + } + } + } + cur_blocks_[shared_id] = std::make_tuple(shared_bytes, cluster_load); + } +} + TemperedLB::BytesType TemperedLB::computeMemoryUsage() const { // Compute bytes used by shared blocks mapped here based on object mapping auto const blocks_here = getSharedBlocksHere(); @@ -658,12 +672,25 @@ void TemperedLB::doLBStages(LoadType start_imb) { LoadType(this_new_load_) ); - vt_print( - temperedlb, - "Current memory info: total memory usage={}, shared blocks here={}, " - "memory_threshold={}\n", computeMemoryUsage(), getSharedBlocksHere().size(), - mem_thresh_ - ); + if (has_memory_data_) { + vt_print( + temperedlb, + "Current memory info: total memory usage={}, shared blocks here={}, " + "memory_threshold={}\n", computeMemoryUsage(), + getSharedBlocksHere().size(), mem_thresh_ + ); + + computeClusterSummary(); + + for (auto const& [shared_id, value] : cur_blocks_) { + auto const& [shared_bytes, cluster_load] = value; + vt_print( + temperedlb, + "Cluster: id={}, bytes={}, load={}\n", + shared_id, shared_bytes, cluster_load + ); + } + } if (isOverloaded(this_new_load_)) { is_overloaded_ = true; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 9d05872abb..104a5494e3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -142,6 +142,11 @@ struct TemperedLB : BaseLB { */ std::set getSharedBlocksHere() const; + /** + * \brief Compute the current cluster assignment summary for this rank + */ + void computeClusterSummary(); + private: uint16_t f_ = 0; uint8_t k_max_ = 0; @@ -222,6 +227,8 @@ struct TemperedLB : BaseLB { std::unordered_map shared_block_size_; /// Working bytes for each object std::unordered_map obj_working_bytes_; + /// Current assignment memory/load summary + std::unordered_map> cur_blocks_; /// User-defined memory threshold BytesType mem_thresh_ = 0; }; From 8ccb65f9a504cf299973815bfa064d1b20c63d51 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 17:33:41 -0800 Subject: [PATCH 015/126] #2201: temperedlb: clear cur_block_ before recomputing the summary --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 27ae92358a..35f0a4be9a 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -574,6 +574,7 @@ void TemperedLB::readClustersMemoryData() { } void TemperedLB::computeClusterSummary() { + cur_blocks_.clear(); for (auto const& [shared_id, shared_bytes] : shared_block_size_) { LoadType cluster_load = 0; for (auto const& [obj_id, obj_load] : cur_objs_) { From 81a48f63f314293fa8dd9c168259c92266ef4aa6 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 17:35:51 -0800 Subject: [PATCH 016/126] #2201: tools: NOT to merge: for now, add the user-defined problem to repo to make it easy to run --- tools/user-defined-memory-toy-problem/README | 101 +++++++ .../toy_mem.0.json | 285 ++++++++++++++++++ .../toy_mem.1.json | 1 + .../toy_mem.2.json | 1 + .../toy_mem.3.json | 1 + 5 files changed, 389 insertions(+) create mode 100644 tools/user-defined-memory-toy-problem/README create mode 100644 tools/user-defined-memory-toy-problem/toy_mem.0.json create mode 100644 tools/user-defined-memory-toy-problem/toy_mem.1.json create mode 100644 tools/user-defined-memory-toy-problem/toy_mem.2.json create mode 100644 tools/user-defined-memory-toy-problem/toy_mem.3.json diff --git a/tools/user-defined-memory-toy-problem/README b/tools/user-defined-memory-toy-problem/README new file mode 100644 index 0000000000..2222379f91 --- /dev/null +++ b/tools/user-defined-memory-toy-problem/README @@ -0,0 +1,101 @@ +These files describe a toy problem for testing whether a memory-aware load +balancer is achieving a sensible solution. + +The 3D vt index in these files is: + (rank_index, decomp_index_on_rank, task_index_on_decomp). + +Each task appears in the JSON files on its home rank (rank_index) where +communication costs will be zero, so no communication edges were included. +However, see the final paragraph for details about communication patterns that +will emerge when the tasks are migrated off the home rank. + +The "user-defined" section of the JSON data contains the following fields: +- "task_serialized_bytes": This is the serialized size of the task, which can be + used for modeling the migration cost of the task. It should not be included + when computing the memory usage on a rank. +- "shared_id": This uniquely identifies a block of data on which multiple tasks + will operate. While not important, the shared_id was computing using: + shared_id = decomp_index_on_rank * num_ranks + rank_index +- "shared_bytes": This is the size of the block of data being operated on by the + relevant set of tasks. This memory cost will be incurred exactly once on each + MPI rank on which a task with this shared_id exists. +- "task_footprint_bytes": This is the footprinted size of the task in its + non-running state. We will incur this memory cost once for each individual + task, even if there are other tasks on this rank with the same shared_id. This + can be greater than task_serialized_bytes when the task has data members that + have greater capacity than is being used at serialization time. +- "task_working_bytes": This is the high water mark of the additional working + memory required by the individual task, such as temporary memory needed for + intermediate computation. This value does not include memory shared with other + tasks (i.e., shared_bytes), nor does it include the task_footprint bytes or + task_serialized_bytes. This cost is incurred for each individual task, but + only one at a time because tasks will not run concurrently. +- "rank_working_bytes": This is the amount of memory that the particular rank + needs while processing tasks. This may include global data, constants, and + completely unrelated data pre-allocated by the application. It is assumed to + be constant over time but may vary from rank to rank. This value does not + include shared_bytes, task_working_bytes, task_footprint_bytes, or + task_serialized_bytes. + +The maximum memory usage for determining if task placement is feasible will be: + max_memory_usage = rank_working_bytes + shared_level_memory + max_task_level_memory + +Computing shared_level_memory: Let S be the set of unique shared_id values on +the rank being considered. Then shared_level_memory is simply the sum of +shared_bytes values for each shared_id in S. + +Computing max_task_level_memory: Let T be the set of all tasks on a rank, +regardless of the shared_id on which they operate. Then max_task_level_memory +is the sum of task_footprint_bytes values for each task in T plus the maximum +over the task_working_bytes values for each task in T. + +Any communication-aware load balancer should also consider the communication +implied by this memory data. The task_serialized_bytes is the serialized size +of the task, so migrating it will require a communication of at least that size +from the home rank to the target rank. For applications where the shared memory +corresponding shared_id is writeable, at least shared_bytes per unique shared_id +on a target rank will need to be communicated from the target rank back to the +home rank after the relevant tasks complete. + +***Spoilers*** + +Each of four ranks has three shared blocks. The memory constrains dictate that +at most four unique shared_id values can coexist on each rank. Under these +memory constraints, it is possible to perfectly balance the load (time). There +is more than one way to do so. The communication cost to migrate a task off-rank +is extremely low, but the cost to communicate back the result should be +significant enough to discourage migrating shared_ids to other ranks without it +resulting in a better balanced load. + +One of the ranks has exactly the rank-averaged load, so it is best if the tasks +on that rank are left in place. Another rank has more than twice the +rank-averaged load. The sum of the loads for the task corresponding to one of +its shared_id values is more than the rank-averaged load, so the tasks for that +shared_id will need to be split across two ranks to achieve good balance. The +tasks for the other shared_ids across all ranks do not need to be split across +multiple ranks to perfectly balance the load (time). + +Below is one solution with a perfectly balanced load and decent communication. +I have not evaluated whether it is optimal. + +Rank 0: +[0,1,1],[0,1,3],[0,1,4] (part of block home) +[1,0,0],[1,0,1] (whole block not home) +[2,2,0],[2,2,1] (whole block not home) + +Rank 1: +[1,1,0],[1,1,1] (home) +[1,2,0] (home) +[0,0,0],[0,0,1],[0,0,2] (whole block not home) +[0,1,0],[0,1,2] (part of block not home) + +Rank 2: +[2,0,0],[2,0,1],[2,0,2] (home) +[2,1,0],[2,1,1] (home) +[0,2,0],[0,2,1],[0,2,2] (whole block not home) + +Rank 3: +[3,0,0],[3,0,1],[3,0,2] (home) +[3,1,0],[3,1,1],[3,1,2] (home) +[3,2,0],[3,2,1],[3,2,2] (home) + diff --git a/tools/user-defined-memory-toy-problem/toy_mem.0.json b/tools/user-defined-memory-toy-problem/toy_mem.0.json new file mode 100644 index 0000000000..36de223728 --- /dev/null +++ b/tools/user-defined-memory-toy-problem/toy_mem.0.json @@ -0,0 +1,285 @@ +{ + "type": "LBDatafile", + "phases": [ + { + "id": 0, + "tasks": [ + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 2883587, + "index": [ + 0, + 1, + 4 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 10.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 4, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 2621443, + "index": [ + 0, + 1, + 3 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 35.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 4, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 2359299, + "index": [ + 0, + 2, + 2 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 10.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 8, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 2097155, + "index": [ + 0, + 1, + 2 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 25.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 4, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 1835011, + "index": [ + 0, + 0, + 2 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 10.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 0, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 524291, + "index": [ + 0, + 1, + 0 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 20.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 4, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 262147, + "index": [ + 0, + 0, + 0 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 10.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 0, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 786435, + "index": [ + 0, + 2, + 0 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 20.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 8, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 1048579, + "index": [ + 0, + 0, + 1 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 15.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 0, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 1310723, + "index": [ + 0, + 1, + 1 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 30.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 4, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "id": 1572867, + "index": [ + 0, + 2, + 1 + ], + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 5.0, + "user_defined": { + "rank_working_bytes": 980000000.0, + "shared_bytes": 1600000000.0, + "shared_id": 8, + "task_footprint_bytes": 1024.0, + "task_serialized_bytes": 1024.0, + "task_working_bytes": 110000000.0 + } + } + ] + } + ] +} diff --git a/tools/user-defined-memory-toy-problem/toy_mem.1.json b/tools/user-defined-memory-toy-problem/toy_mem.1.json new file mode 100644 index 0000000000..875b316c0b --- /dev/null +++ b/tools/user-defined-memory-toy-problem/toy_mem.1.json @@ -0,0 +1 @@ +{"type":"LBDatafile","phases":[{"id":0,"tasks":[{"entity":{"collection_id":7,"home":1,"id":1310727,"index":[1,1,1],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":5,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":1048583,"index":[1,0,1],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":1,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":786439,"index":[1,2,0],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":9,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":262151,"index":[1,0,0],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":1,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":524295,"index":[1,1,0],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":5,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}}]}]} \ No newline at end of file diff --git a/tools/user-defined-memory-toy-problem/toy_mem.2.json b/tools/user-defined-memory-toy-problem/toy_mem.2.json new file mode 100644 index 0000000000..e77e11c03a --- /dev/null +++ b/tools/user-defined-memory-toy-problem/toy_mem.2.json @@ -0,0 +1 @@ +{"type":"LBDatafile","phases":[{"id":0,"tasks":[{"entity":{"collection_id":7,"home":2,"id":1835019,"index":[2,0,2],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":15.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":2,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":524299,"index":[2,1,0],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":6,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":262155,"index":[2,0,0],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":2,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":786443,"index":[2,2,0],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":10,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":1048587,"index":[2,0,1],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":2,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":1310731,"index":[2,1,1],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":6,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":1572875,"index":[2,2,1],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":10,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}}]}]} \ No newline at end of file diff --git a/tools/user-defined-memory-toy-problem/toy_mem.3.json b/tools/user-defined-memory-toy-problem/toy_mem.3.json new file mode 100644 index 0000000000..b0ef40536a --- /dev/null +++ b/tools/user-defined-memory-toy-problem/toy_mem.3.json @@ -0,0 +1 @@ +{"type":"LBDatafile","phases":[{"id":0,"tasks":[{"entity":{"collection_id":7,"home":3,"id":2359311,"index":[3,2,2],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":11,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":2097167,"index":[3,1,2],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":15.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":7,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1835023,"index":[3,0,2],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":3,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":524303,"index":[3,1,0],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":7,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":262159,"index":[3,0,0],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":3,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":786447,"index":[3,2,0],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":11,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1048591,"index":[3,0,1],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":3,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1310735,"index":[3,1,1],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":20.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":7,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1572879,"index":[3,2,1],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":11,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}}]}]} \ No newline at end of file From d3ff67c0a91533e8399396a038e259b409ae2312 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 17:43:15 -0800 Subject: [PATCH 017/126] #2201: temperedlb: add missing check for zero cluster size --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 35f0a4be9a..afed82148d 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -584,7 +584,9 @@ void TemperedLB::computeClusterSummary() { } } } - cur_blocks_[shared_id] = std::make_tuple(shared_bytes, cluster_load); + if (cluster_load != 0) { + cur_blocks_[shared_id] = std::make_tuple(shared_bytes, cluster_load); + } } } From 0b0c81c5ca559fcf1779d526c56f7be15c4fa6c2 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 22:07:18 -0800 Subject: [PATCH 018/126] #2201: temperedlb: add cluster summary to messages --- .../balance/temperedlb/tempered_msgs.h | 21 +++++++++++++++++++ .../balance/temperedlb/temperedlb.cc | 10 +++++++-- .../balance/temperedlb/temperedlb.h | 4 +--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index 5eb188a827..526bc2bd9b 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -49,6 +49,15 @@ #include #include +namespace vt::vrt::collection::lb { + +using SharedIDType = int; +using BytesType = double; +using ClusterSummaryType = + std::unordered_map>; + +} /* end namespace vt::vrt::collection::lb */ + namespace vt { namespace vrt { namespace collection { namespace balance { struct LoadMsg : vt::Message { @@ -56,6 +65,8 @@ struct LoadMsg : vt::Message { vt_msg_serialize_required(); // node_load_ using NodeLoadType = std::unordered_map; + using NodeClusterSummaryType = + std::unordered_map; LoadMsg() = default; LoadMsg(NodeType in_from_node, NodeLoadType const& in_node_load) @@ -66,10 +77,18 @@ struct LoadMsg : vt::Message { return node_load_; } + NodeClusterSummaryType const& getNodeClusterSummary() const { + return node_cluster_summary_; + } + void addNodeLoad(NodeType node, LoadType load) { node_load_[node] = load; } + void addNodeClusters(NodeType node, lb::ClusterSummaryType summary) { + node_cluster_summary_[node] = summary; + } + NodeType getFromNode() const { return from_node_; } template @@ -77,11 +96,13 @@ struct LoadMsg : vt::Message { MessageParentType::serialize(s); s | from_node_; s | node_load_; + s | node_cluster_summary_; } private: NodeType from_node_ = uninitialized_destination; NodeLoadType node_load_ = {}; + NodeClusterSummaryType node_cluster_summary_ = {}; }; struct LoadMsgAsync : LoadMsg { diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index afed82148d..9c1ef26800 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -590,7 +590,7 @@ void TemperedLB::computeClusterSummary() { } } -TemperedLB::BytesType TemperedLB::computeMemoryUsage() const { +BytesType TemperedLB::computeMemoryUsage() const { // Compute bytes used by shared blocks mapped here based on object mapping auto const blocks_here = getSharedBlocksHere(); @@ -617,7 +617,7 @@ TemperedLB::BytesType TemperedLB::computeMemoryUsage() const { return rank_bytes_ + total_shared_bytes + max_object_working_bytes; } -std::set TemperedLB::getSharedBlocksHere() const { +std::set TemperedLB::getSharedBlocksHere() const { std::set blocks_here; for (auto const& [obj, _] : cur_objs_) { if (obj_shared_block_.find(obj) != obj_shared_block_.end()) { @@ -1014,6 +1014,9 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { envelopeSetEpoch(msg->env, epoch); } msg->addNodeLoad(this_node, this_new_load_); + if (has_memory_data_) { + msg->addNodeClusters(this_node, cur_blocks_); + } proxy_[random_node].sendMsg< LoadMsgSync, &TemperedLB::propagateIncomingSync >(msg.get()); @@ -1024,6 +1027,9 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { envelopeSetEpoch(msg->env, epoch); } msg->addNodeLoad(this_node, this_new_load_); + if (has_memory_data_) { + msg->addNodeClusters(this_node, cur_blocks_); + } proxy_[random_node].sendMsg< LoadMsgAsync, &TemperedLB::propagateIncomingAsync >(msg.get()); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 104a5494e3..7a3318f78c 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -67,8 +67,6 @@ struct TemperedLB : BaseLB { using ReduceMsgType = vt::collective::ReduceNoneMsg; using QuantityType = std::map; using StatisticMapType = std::unordered_map; - using SharedIDType = int; - using BytesType = double; TemperedLB() = default; TemperedLB(TemperedLB const&) = delete; @@ -228,7 +226,7 @@ struct TemperedLB : BaseLB { /// Working bytes for each object std::unordered_map obj_working_bytes_; /// Current assignment memory/load summary - std::unordered_map> cur_blocks_; + ClusterSummaryType cur_blocks_; /// User-defined memory threshold BytesType mem_thresh_ = 0; }; From 66c778d21d1b8d783aed27add3ce5eaa1b6a426e Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 28 Nov 2023 22:17:03 -0800 Subject: [PATCH 019/126] #2201: temperedlb: add data structures to track other rank's clusters --- .../balance/temperedlb/temperedlb.cc | 49 ++++++++++++++++++- .../balance/temperedlb/temperedlb.h | 2 + 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 9c1ef26800..55b6a1cccd 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -665,6 +665,10 @@ void TemperedLB::doLBStages(LoadType start_imb) { underloaded_.clear(); load_info_.clear(); is_overloaded_ = is_underloaded_ = false; + other_rank_clusters_.clear(); + + // Not clearing shared_block_size_ because this never changes and + // the knowledge might be useful } vt_debug_print( @@ -685,11 +689,12 @@ void TemperedLB::doLBStages(LoadType start_imb) { computeClusterSummary(); + // Verbose printing about local clusters for (auto const& [shared_id, value] : cur_blocks_) { auto const& [shared_bytes, cluster_load] = value; vt_print( temperedlb, - "Cluster: id={}, bytes={}, load={}\n", + "Local cluster: id={}, bytes={}, load={}\n", shared_id, shared_bytes, cluster_load ); } @@ -713,6 +718,28 @@ void TemperedLB::doLBStages(LoadType start_imb) { vtAbort("TemperedLB:: Unsupported inform type"); } + // Some very verbose printing about all remote clusters we know about that + // we can shut off later + for (auto const& [node, clusters] : other_rank_clusters_) { + for (auto const& [shared_id, value] : clusters) { + auto const& [shared_bytes, cluster_load] = value; + vt_print( + temperedlb, + "Remote cluster: node={}, id={}, bytes={}, load={}\n", + node, shared_id, shared_bytes, cluster_load + ); + } + } + + // Move remove cluster information to shared_block_size_ so we have all + // the sizes in the same place + for (auto const& [node, clusters] : other_rank_clusters_) { + for (auto const& [shared_id, value] : clusters) { + auto const& [shared_bytes, _] = value; + shared_block_size_[shared_id] = shared_bytes; + } + } + // Execute transfer stage switch (transfer_type_) { case TransferTypeEnum::Original: @@ -1048,6 +1075,16 @@ void TemperedLB::propagateIncomingAsync(LoadMsgAsync* msg) { trial_, iter_, k_max_, k_cur_async, from_node, msg->getNodeLoad().size() ); + auto const this_node = theContext()->getNode(); + for (auto const& [node, clusters] : msg->getNodeClusterSummary()) { + if ( + node != this_node and + other_rank_clusters_.find(node) == other_rank_clusters_.end() + ) { + other_rank_clusters_[node] = clusters; + } + } + for (auto&& elm : msg->getNodeLoad()) { if (load_info_.find(elm.first) == load_info_.end()) { load_info_[elm.first] = elm.second; @@ -1082,6 +1119,16 @@ void TemperedLB::propagateIncomingSync(LoadMsgSync* msg) { trial_, iter_, k_max_, k_cur_, from_node, msg->getNodeLoad().size() ); + auto const this_node = theContext()->getNode(); + for (auto const& [node, clusters] : msg->getNodeClusterSummary()) { + if ( + node != this_node and + other_rank_clusters_.find(node) == other_rank_clusters_.end() + ) { + other_rank_clusters_[node] = clusters; + } + } + for (auto&& elm : msg->getNodeLoad()) { if (new_load_info_.find(elm.first) == new_load_info_.end()) { new_load_info_[elm.first] = elm.second; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 7a3318f78c..eeba67f3d3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -227,6 +227,8 @@ struct TemperedLB : BaseLB { std::unordered_map obj_working_bytes_; /// Current assignment memory/load summary ClusterSummaryType cur_blocks_; + /// Clusters that we know of on other ranks (might be out of date) + std::unordered_map other_rank_clusters_; /// User-defined memory threshold BytesType mem_thresh_ = 0; }; From 25aa87e4d751d02346eb0f23fd962780d08a9c7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 29 Nov 2023 06:26:46 -0500 Subject: [PATCH 020/126] #2201: fixed EOL CI error --- tools/user-defined-memory-toy-problem/README | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/user-defined-memory-toy-problem/README b/tools/user-defined-memory-toy-problem/README index 2222379f91..920c57f7e9 100644 --- a/tools/user-defined-memory-toy-problem/README +++ b/tools/user-defined-memory-toy-problem/README @@ -97,5 +97,4 @@ Rank 2: Rank 3: [3,0,0],[3,0,1],[3,0,2] (home) [3,1,0],[3,1,1],[3,1,2] (home) -[3,2,0],[3,2,1],[3,2,2] (home) - +[3,2,0],[3,2,1],[3,2,2] (home) \ No newline at end of file From 2077781f0a7aa51b77222ae1785ecdf4b374c8dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Wed, 29 Nov 2023 11:12:46 -0500 Subject: [PATCH 021/126] #2201: fixed print errors; added pseudocode; and epoch boilerplate --- .../balance/temperedlb/temperedlb.cc | 57 +++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 55b6a1cccd..297893d413 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1434,7 +1434,7 @@ void TemperedLB::originalTransfer() { // Initialize transfer and rejection counters int n_transfers = 0, n_rejected = 0; - // Try to migrate objects only from overloaded objects + // Try to migrate objects only from overloaded ranks if (is_overloaded_) { std::vector under = makeUnderloaded(); std::unordered_map migrate_objs; @@ -1473,7 +1473,7 @@ void TemperedLB::originalTransfer() { vt_debug_print( verbose, temperedlb, - "TemperedLB::decide: selected_node={}, load_info_.size()={}\n", + "TemperedLB::originalTransfer: selected_node={}, load_info_.size()={}\n", selected_node, load_info_.size() ); @@ -1488,7 +1488,7 @@ void TemperedLB::originalTransfer() { ); vt_debug_print( verbose, temperedlb, - "TemperedLB::decide: trial={}, iter={}, under.size()={}, " + "TemperedLB::originalTransfer: trial={}, iter={}, under.size()={}, " "selected_node={}, selected_load={:e}, obj_id={:x}, home={}, " "obj_load={}, target_max_load={}, this_new_load_={}, " "criterion={}\n", @@ -1544,7 +1544,7 @@ void TemperedLB::originalTransfer() { if (theConfig()->vt_debug_temperedlb) { // compute rejection rate because it will be printed - runInEpochCollective("TemperedLB::decide -> compute rejection", [=] { + runInEpochCollective("TemperedLB::originalTransfer -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( n_rejected, n_transfers ); @@ -1552,6 +1552,55 @@ void TemperedLB::originalTransfer() { } } +void TemperedLB::swapClusters() { + auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); + + // Initialize transfer and rejection counters + int n_transfers = 0, n_rejected = 0; + + // Try to migrate objects only from overloaded ranks + if (is_overloaded_) { + // Compute collection of potential targets + std::vector under = makeUnderloaded(); + std::unordered_map migrate_objs; + if (under.size() > 0) { + std::vector ordered_obj_ids = orderObjects( + obj_ordering_, cur_objs_, this_new_load_, target_max_load_ + ); + + // Cluster migratable objects on source rank + + // Iterage over potential targets to try to swap clusters + + //// Iteratr over target clusters + + ////// Decide whether swap is beneficial + + + //////// If swap is beneficial compute source cluster size + //////// Test whether criterion is creater than swap RTOL times source size + + ////////// Only in this case perform swap + ////////// Else reject swap + + } // if (under.size() > 0) + } // if (is_overloaded_) + + // Finalize epoch + theTerm()->finishedEpoch(lazy_epoch); + vt::runSchedulerThrough(lazy_epoch); + + // Report on rejection rate in debug mode + if (theConfig()->vt_debug_temperedlb) { + runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { + proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( + n_rejected, n_transfers + ); + }); + } + +} // void TemperedLB::originalTransfer() + void TemperedLB::thunkMigrations() { vt_debug_print( normal, temperedlb, From e96a09fd3a08819fecbbfc933fa77c30ee5c930b Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 29 Nov 2023 09:10:50 -0800 Subject: [PATCH 022/126] #2201: temperedlb: add swap clusters call, fix git history --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 ++-- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 297893d413..5309851602 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -749,7 +749,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { vtAbort("TemperedLB:: Unimplemented transfer type: Recursive"); break; case TransferTypeEnum::SwapClusters: - vtAbort("TemperedLB:: Unimplemented transfer type: SwapClusters"); + swapClusters(); break; default: vtAbort("TemperedLB:: Unsupported transfer type"); @@ -1429,7 +1429,7 @@ std::vector TemperedLB::orderObjects( } void TemperedLB::originalTransfer() { - auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: decide"); + auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: originalTransfer"); // Initialize transfer and rejection counters int n_transfers = 0, n_rejected = 0; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index eeba67f3d3..512732437e 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -91,6 +91,7 @@ struct TemperedLB : BaseLB { void informAsync(); void informSync(); void originalTransfer(); + void swapClusters(); void migrate(); void propagateRound(uint8_t k_cur_async, bool sync, EpochType epoch = no_epoch); From 94d04afae99f40b875809a7624f4c22f73decdd4 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 29 Nov 2023 09:35:31 -0800 Subject: [PATCH 023/126] #2201: temperedlb: fix whitespace --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 5309851602..e224790c49 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1598,7 +1598,6 @@ void TemperedLB::swapClusters() { ); }); } - } // void TemperedLB::originalTransfer() void TemperedLB::thunkMigrations() { From d4b2e0f455e825c9d4f63892350c99f007c19054 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 29 Nov 2023 12:08:57 -0800 Subject: [PATCH 024/126] #2201: temperedlb: rename cur_blocks_ to cur_clusters_ --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 10 +++++----- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index e224790c49..3539c16ef9 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -574,7 +574,7 @@ void TemperedLB::readClustersMemoryData() { } void TemperedLB::computeClusterSummary() { - cur_blocks_.clear(); + cur_clusters_.clear(); for (auto const& [shared_id, shared_bytes] : shared_block_size_) { LoadType cluster_load = 0; for (auto const& [obj_id, obj_load] : cur_objs_) { @@ -585,7 +585,7 @@ void TemperedLB::computeClusterSummary() { } } if (cluster_load != 0) { - cur_blocks_[shared_id] = std::make_tuple(shared_bytes, cluster_load); + cur_clusters_[shared_id] = std::make_tuple(shared_bytes, cluster_load); } } } @@ -690,7 +690,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { computeClusterSummary(); // Verbose printing about local clusters - for (auto const& [shared_id, value] : cur_blocks_) { + for (auto const& [shared_id, value] : cur_clusters_) { auto const& [shared_bytes, cluster_load] = value; vt_print( temperedlb, @@ -1042,7 +1042,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { } msg->addNodeLoad(this_node, this_new_load_); if (has_memory_data_) { - msg->addNodeClusters(this_node, cur_blocks_); + msg->addNodeClusters(this_node, cur_clusters_); } proxy_[random_node].sendMsg< LoadMsgSync, &TemperedLB::propagateIncomingSync @@ -1055,7 +1055,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { } msg->addNodeLoad(this_node, this_new_load_); if (has_memory_data_) { - msg->addNodeClusters(this_node, cur_blocks_); + msg->addNodeClusters(this_node, cur_clusters_); } proxy_[random_node].sendMsg< LoadMsgAsync, &TemperedLB::propagateIncomingAsync diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 512732437e..478003c4f8 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -226,8 +226,8 @@ struct TemperedLB : BaseLB { std::unordered_map shared_block_size_; /// Working bytes for each object std::unordered_map obj_working_bytes_; - /// Current assignment memory/load summary - ClusterSummaryType cur_blocks_; + /// Cluster summary based on current local assignment + ClusterSummaryType cur_clusters_; /// Clusters that we know of on other ranks (might be out of date) std::unordered_map other_rank_clusters_; /// User-defined memory threshold From 30ba257f480cc6f6fe8019e01319dd99db20f985 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Thu, 30 Nov 2023 15:42:07 -0800 Subject: [PATCH 025/126] #2201: temperedlb: sketch of some code written in the meeting --- .../balance/temperedlb/temperedlb.cc | 41 ++++++++++--------- .../balance/temperedlb/temperedlb.h | 2 + 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 3539c16ef9..2d23e29436 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1552,6 +1552,20 @@ void TemperedLB::originalTransfer() { } } +// void TemperedLB::tryLock( +// NodeType requesting_node, double criterion_value, SharedIDType cluster_id +// ) { +// // some logic + +// // if yes +// is_locked = true; +// proxy_[requesting_node].template send(/*full info on cluster*/); +// } + +// void TemperedLB::lockObtained() { + +// } + void TemperedLB::swapClusters() { auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); @@ -1559,32 +1573,21 @@ void TemperedLB::swapClusters() { int n_transfers = 0, n_rejected = 0; // Try to migrate objects only from overloaded ranks - if (is_overloaded_) { - // Compute collection of potential targets - std::vector under = makeUnderloaded(); - std::unordered_map migrate_objs; - if (under.size() > 0) { - std::vector ordered_obj_ids = orderObjects( - obj_ordering_, cur_objs_, this_new_load_, target_max_load_ - ); + // Compute collection of potential targets + // std::vector targets = other_rank_clusters_.keys(); + // sample cmf + // Iterage over potential targets to try to swap clusters - // Cluster migratable objects on source rank + //// Iteratr over target clusters - // Iterage over potential targets to try to swap clusters - - //// Iteratr over target clusters - - ////// Decide whether swap is beneficial + ////// Decide whether swap is beneficial //////// If swap is beneficial compute source cluster size //////// Test whether criterion is creater than swap RTOL times source size - ////////// Only in this case perform swap - ////////// Else reject swap - - } // if (under.size() > 0) - } // if (is_overloaded_) + // send try-lock message, with numerical criterion value + // spin in the scheduler // Finalize epoch theTerm()->finishedEpoch(lazy_epoch); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 478003c4f8..8a65eb715f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -232,6 +232,8 @@ struct TemperedLB : BaseLB { std::unordered_map other_rank_clusters_; /// User-defined memory threshold BytesType mem_thresh_ = 0; + /// + bool is_locked = false; }; }}}} /* end namespace vt::vrt::collection::lb */ From bf3ff66135cd85bb5b981268bc66c08435a540fb Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 12:05:41 -0800 Subject: [PATCH 026/126] #2201: temperedlb: use new method of getting user data, fix error message for shared_id --- .../balance/temperedlb/temperedlb.cc | 94 ++++++++++--------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 2d23e29436..7d1b7c2608 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -519,58 +519,64 @@ void TemperedLB::runLB(LoadType total_load) { } void TemperedLB::readClustersMemoryData() { - if (user_data_) { - for (auto const& [obj, data_map] : *user_data_) { - SharedIDType shared_id = -1; - BytesType shared_bytes = 0; - BytesType working_bytes = 0; - for (auto const& [key, variant] : data_map) { - if (key == "shared_id") { - // Because of how JSON is stored this is always a double, even though - // it should be an integer - if (double const* val = std::get_if(&variant)) { - shared_id = static_cast(*val); - } else { - vtAbort("\"shared_id\" in variant does not match integer"); + if (load_model_->hasUserData()) { + for (auto obj : *load_model_) { + if (obj.isMigratable()) { + auto data_map = load_model_->getUserData( + obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + ); + + SharedIDType shared_id = -1; + BytesType shared_bytes = 0; + BytesType working_bytes = 0; + for (auto const& [key, variant] : data_map) { + if (key == "shared_id") { + // Because of how JSON is stored this is always a double, even + // though it should be an integer + if (double const* val = std::get_if(&variant)) { + shared_id = static_cast(*val); + } else { + vtAbort("\"shared_id\" in variant does not match double"); + } } - } - if (key == "shared_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - shared_bytes = *val; - } else { - vtAbort("\"shared_bytes\" in variant does not match double"); + if (key == "shared_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + shared_bytes = *val; + } else { + vtAbort("\"shared_bytes\" in variant does not match double"); + } } - } - if (key == "task_working_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - working_bytes = *val; - } else { - vtAbort("\"working_bytes\" in variant does not match double"); + if (key == "task_working_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + working_bytes = *val; + } else { + vtAbort("\"working_bytes\" in variant does not match double"); + } } - } - if (key == "rank_working_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - rank_bytes_ = *val; - } else { - vtAbort("\"rank_bytes\" in variant does not match double"); + if (key == "rank_working_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + rank_bytes_ = *val; + } else { + vtAbort("\"rank_bytes\" in variant does not match double"); + } } + // @todo: for now, skip "task_serialized_bytes" and + // "task_footprint_bytes" } - // @todo: for now, skip "task_serialized_bytes" and - // "task_footprint_bytes" - } - // @todo: switch to debug print at some point - vt_print( - temperedlb, "obj={} shared_block={} bytes={}\n", - obj, shared_id, shared_bytes - ); + // @todo: switch to debug print at some point + vt_print( + temperedlb, "obj={} shared_block={} bytes={}\n", + obj, shared_id, shared_bytes + ); - has_memory_data_ = true; - obj_shared_block_[obj] = shared_id; - obj_working_bytes_[obj] = working_bytes; - shared_block_size_[shared_id] = shared_bytes; + has_memory_data_ = true; + obj_shared_block_[obj] = shared_id; + obj_working_bytes_[obj] = working_bytes; + shared_block_size_[shared_id] = shared_bytes; + } } - } + } } void TemperedLB::computeClusterSummary() { From 58514dc900fb12ada36e2ab40229f8ccf161204c Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 12:10:30 -0800 Subject: [PATCH 027/126] #2201: temperedlb: fix indentation --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 7d1b7c2608..3312272921 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -576,7 +576,7 @@ void TemperedLB::readClustersMemoryData() { shared_block_size_[shared_id] = shared_bytes; } } - } + } } void TemperedLB::computeClusterSummary() { From 3d9fdabff64498400189e54952ecebacb0ef755b Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 16:07:30 -0800 Subject: [PATCH 028/126] #2201: temperedlb: implement locking and swapping protocol--may deadlock --- .../balance/temperedlb/temperedlb.cc | 330 ++++++++++++++++-- .../balance/temperedlb/temperedlb.h | 81 ++++- 2 files changed, 380 insertions(+), 31 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 3312272921..1b1a9c65a7 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -596,7 +596,7 @@ void TemperedLB::computeClusterSummary() { } } -BytesType TemperedLB::computeMemoryUsage() const { +BytesType TemperedLB::computeMemoryUsage() { // Compute bytes used by shared blocks mapped here based on object mapping auto const blocks_here = getSharedBlocksHere(); @@ -612,7 +612,7 @@ BytesType TemperedLB::computeMemoryUsage() const { double max_object_working_bytes = 0; for (auto const& [obj_id, _] : cur_objs_) { if (obj_working_bytes_.find(obj_id) != obj_working_bytes_.end()) { - max_object_working_bytes = + max_object_working_bytes_ = std::max(max_object_working_bytes, obj_working_bytes_.find(obj_id)->second); } else { vt_print( @@ -620,7 +620,8 @@ BytesType TemperedLB::computeMemoryUsage() const { ); } } - return rank_bytes_ + total_shared_bytes + max_object_working_bytes; + return current_memory_usage_ = + rank_bytes_ + total_shared_bytes + max_object_working_bytes_; } std::set TemperedLB::getSharedBlocksHere() const { @@ -1558,52 +1559,325 @@ void TemperedLB::originalTransfer() { } } -// void TemperedLB::tryLock( -// NodeType requesting_node, double criterion_value, SharedIDType cluster_id -// ) { -// // some logic +void TemperedLB::tryLock(NodeType requesting_node, double criterion_value) { + try_locks_.emplace(requesting_node, criterion_value); +} + +auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { + std::unordered_map give_objs; + std::unordered_map give_obj_shared_block; + std::unordered_map give_shared_blocks_size; + + if (shared_id != -1) { + give_shared_blocks_size[shared_id] = shared_block_size_[shared_id]; + } + + for (auto const& [obj_id, obj_load] : cur_objs_) { + if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { + if (iter->second == shared_id) { + give_objs.emplace(obj_id, obj_load); + give_obj_shared_block[obj_id] = shared_id; + } + } + } + + for (auto const& [give_obj_id, give_obj_load] : give_objs) { + auto iter = cur_objs_.find(give_obj_id); + vtAssert(iter != cur_objs_.end(), "Object must exist"); + // remove the object! + cur_objs_.erase(iter); + this_new_load_ -= give_obj_load; + } + + return std::make_tuple( + give_objs, give_obj_shared_block, give_shared_blocks_size + ); +} + +void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { + double total_shared_bytes = 0; + for (auto const& block_id : getSharedBlocksHere()) { + total_shared_bytes += shared_block_size_.find(block_id)->second; + } + + auto criterion = [&,this](auto src_cluster, auto try_cluster) -> double { + auto const& [src_id, src_bytes, src_load] = src_cluster; + auto const& [try_rank, try_total_load, try_total_bytes, + try_id, try_bytes, try_load] = try_cluster; + + auto const before_work_src = this_new_load_; + auto const before_work_try = try_total_load; + auto const w_max_0 = std::max(before_work_src, before_work_try); + + auto const after_work_src = this_new_load_ - src_load + try_load; + auto const after_work_try = before_work_try + src_load - try_load; + auto const w_max_new = std::max(after_work_src, after_work_try); + + auto const src_after_mem = current_memory_usage_ - src_bytes + try_bytes; + auto const try_after_mem = try_total_bytes + src_bytes - try_bytes; + + if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { + return -1000.0; + } + + return w_max_0 - w_max_new; + }; + + auto const& try_clusters = msg->locked_clusters; + auto const& try_rank = msg->locked_node; + auto const& try_load = msg->locked_load; + auto const& try_total_bytes = msg->locked_bytes; + + double best_c_try = -1.0; + std::tuple best_swap = {-1,-1}; + for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { + auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + + for (auto const& [try_shared_id, try_cluster] : try_clusters) { + auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; + double c_try = criterion( + std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), + std::make_tuple( + try_rank, + try_load, + try_total_bytes, + try_shared_id, + try_cluster_bytes, + try_cluster_load + ) + ); + vt_print( + temperedlb, + "testing a possible swap: {} {} c_try={}\n", + src_shared_id, try_shared_id, c_try + ); + if (c_try > 0.0) { + if (c_try > best_c_try) { + best_c_try = c_try; + best_swap = std::make_tuple(src_shared_id, try_shared_id); + } + } + } + } + if (best_c_try > 0) { + vt_print( + temperedlb, + "best_c_try={}\n", best_c_try + ); + + auto const& [src_shared_id, try_shared_id] = best_swap; + + auto const& [give_objs, give_obj_shared_block, give_shared_blocks_size] = + removeClusterToSend(src_shared_id); + + auto const this_node = theContext()->getNode(); + + runInEpochRooted("giveCluster", [&]{ + proxy_[try_rank].template send<&TemperedLB::giveCluster>( + this_node, + give_shared_blocks_size, + give_objs, + give_obj_shared_block, + try_shared_id + ); + }); + } + + proxy_[try_rank].template send<&TemperedLB::releaseLock>(); +} + +void TemperedLB::giveCluster( + NodeType from_rank, + std::unordered_map const& give_shared_blocks_size, + std::unordered_map const& give_objs, + std::unordered_map const& give_obj_shared_block, + SharedIDType take_cluster +) { + n_transfers_swap_++; + + for (auto const& elm : give_objs) { + this_new_load_ += elm.second; + cur_objs_.emplace(elm); + } + for (auto const& elm : give_shared_blocks_size) { + shared_block_size_.emplace(elm); + } + for (auto const& elm : give_obj_shared_block) { + obj_shared_block_.emplace(elm); + } + + if (take_cluster != -1) { + auto const this_node = theContext()->getNode(); + + auto const& [take_objs, take_obj_shared_block, take_shared_blocks_size] = + removeClusterToSend(take_cluster); + + proxy_[from_rank].template send<&TemperedLB::giveCluster>( + this_node, + take_shared_blocks_size, + take_objs, + take_obj_shared_block, + -1 + ); + } + + vt_print( + temperedlb, + "After giveCluster: total memory usage={}, shared blocks here={}, " + "memory_threshold={}\n", computeMemoryUsage(), + getSharedBlocksHere().size(), mem_thresh_ + ); +} + +void TemperedLB::releaseLock() { + vt_print( + temperedlb, + "releaseLock: pending size={}\n", + pending_actions_.size() + ); -// // if yes -// is_locked = true; -// proxy_[requesting_node].template send(/*full info on cluster*/); -// } + is_locked_ = false; + + if (pending_actions_.size() > 0) { + auto action = pending_actions_.back(); + pending_actions_.pop_back(); + action(); + } else { + // satisfy another lock + satisfyLockRequest(); + } +} + +void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { + auto msg = promoteMsg(in_msg); + + vt_print( + temperedlb, + "lockObtained: is_locked_={}\n", + is_locked_ + ); -// void TemperedLB::lockObtained() { + auto cur_epoch = theMsg()->getEpoch(); + theTerm()->produce(cur_epoch); -// } + auto action = [this, msg, cur_epoch]{ + theMsg()->pushEpoch(cur_epoch); + considerSwapsAfterLock(msg); + theMsg()->popEpoch(cur_epoch); + theTerm()->consume(cur_epoch); + }; + + if (is_locked_) { + pending_actions_.push_back(action); + } else { + action(); + } +} + +void TemperedLB::satisfyLockRequest() { + vtAssert(not is_locked_, "Must not already be locked to satisfy a request"); + if (try_locks_.size() > 0) { + // find the best lock to give + for (auto&& tl : try_locks_) { + vt_print( + temperedlb, + "satisfyLockRequest: node={}, c_try={}\n", tl.requesting_node, tl.c_try + ); + } + + auto iter = try_locks_.begin(); + auto lock = *iter; + try_locks_.erase(iter); + + auto const this_node = theContext()->getNode(); + + vt_print( + temperedlb, + "satisfyLockRequest: locked obtained for node={}\n", + lock.requesting_node + ); + + proxy_[lock.requesting_node].template send<&TemperedLB::lockObtained>( + this_node, this_new_load_, cur_clusters_, rank_bytes_, + max_object_working_bytes_ + ); + + is_locked_ = true; + } +} void TemperedLB::swapClusters() { auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); + theTerm()->pushEpoch(lazy_epoch); - // Initialize transfer and rejection counters - int n_transfers = 0, n_rejected = 0; + auto criterion = [this](auto src_cluster, auto try_cluster) -> double { + // this does not handle empty cluster swaps + auto const& [src_id, src_bytes, src_load] = src_cluster; + auto const& [try_rank, try_id, try_bytes, try_load] = try_cluster; - // Try to migrate objects only from overloaded ranks - // Compute collection of potential targets - // std::vector targets = other_rank_clusters_.keys(); - // sample cmf - // Iterage over potential targets to try to swap clusters + auto const before_work_src = this_new_load_; + auto const before_work_try = load_info_.find(try_rank)->second; + auto const w_max_0 = std::max(before_work_src, before_work_try); + + auto const after_work_src = this_new_load_ - src_load + try_load; + auto const after_work_try = before_work_try + src_load - try_load; + auto const w_max_new = std::max(after_work_src, after_work_try); + + return w_max_0 - w_max_new; + }; + + auto const this_node = theContext()->getNode(); - //// Iteratr over target clusters + for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { + bool found_potential_good_swap = false; - ////// Decide whether swap is beneficial + for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { + auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + for (auto const& [try_shared_id, try_cluster] : try_clusters) { + auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; + double c_try = criterion( + std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), + std::make_tuple(try_rank, try_shared_id, try_cluster_bytes, try_cluster_load) + ); + if (c_try > 0.0) { + found_potential_good_swap = true; + // request lock + proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try); + break; + } + } + if (found_potential_good_swap) { + break; + } + } + } + + // We have to be very careful here since we will allow some reentrancy here. + constexpr int turn_scheduler_times = 10; + for (int i = 0; i < turn_scheduler_times; i++) { + theSched()->runSchedulerOnceImpl(); + } - //////// If swap is beneficial compute source cluster size - //////// Test whether criterion is creater than swap RTOL times source size + while (not theSched()->workQueueEmpty()) { + theSched()->runSchedulerOnceImpl(); + } - // send try-lock message, with numerical criterion value - // spin in the scheduler + satisfyLockRequest(); - // Finalize epoch + // Finalize epoch, we have sent our initial round of messages + // from here everything is message driven theTerm()->finishedEpoch(lazy_epoch); + theTerm()->popEpoch(lazy_epoch); vt::runSchedulerThrough(lazy_epoch); + int n_rejected = 0; + // Report on rejection rate in debug mode if (theConfig()->vt_debug_temperedlb) { runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( - n_rejected, n_transfers + n_rejected, n_transfers_swap_ ); }); } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 8a65eb715f..fe992944a5 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -131,7 +131,7 @@ struct TemperedLB : BaseLB { * * \return the total memory usage */ - BytesType computeMemoryUsage() const; + BytesType computeMemoryUsage(); /** * \brief Get the shared blocks that are located on this node with the current @@ -146,6 +146,56 @@ struct TemperedLB : BaseLB { */ void computeClusterSummary(); + void tryLock(NodeType requesting_node, double criterion_value); + + struct LockedInfoMsg : vt::Message { + using MessageParentType = vt::Message; + vt_msg_serialize_required(); // locked_clusters_ + + LockedInfoMsg() = default; + LockedInfoMsg( + NodeType in_locked_node, LoadType in_locked_load, + ClusterSummaryType in_locked_clusters, BytesType in_locked_bytes, + BytesType in_locked_max_object_working_bytes + ) : locked_node(in_locked_node), + locked_load(in_locked_load), + locked_clusters(in_locked_clusters), + locked_bytes(in_locked_bytes), + locked_max_object_working_bytes(in_locked_max_object_working_bytes) + { } + + template + void serialize(SerializerT& s) { + MessageParentType::serialize(s); + s | locked_node; + s | locked_load; + s | locked_clusters; + s | locked_bytes; + s | locked_max_object_working_bytes; + } + + NodeType locked_node = uninitialized_destination; + LoadType locked_load = 0; + ClusterSummaryType locked_clusters = {}; + BytesType locked_bytes = 0; + BytesType locked_max_object_working_bytes = 0; + }; + + void satisfyLockRequest(); + void lockObtained(LockedInfoMsg* msg); + void considerSwapsAfterLock(MsgSharedPtr msg); + void releaseLock(); + + void giveCluster( + NodeType from_rank, + std::unordered_map const& give_shared_blocks_size, + std::unordered_map const& give_objs, + std::unordered_map const& give_obj_shared_block, + SharedIDType take_cluster + ); + + auto removeClusterToSend(SharedIDType shared_id); + private: uint16_t f_ = 0; uint8_t k_max_ = 0; @@ -216,6 +266,21 @@ struct TemperedLB : BaseLB { // All the memory info (may or may not be present) ////////////////////////////////////////////////////////////////////////////// + struct TryLock { + TryLock(NodeType in_requesting, double in_c_try) + : requesting_node(in_requesting), + c_try(in_c_try) + { } + + NodeType requesting_node = uninitialized_destination; + double c_try = 0; + + double operator<(TryLock const& other) const { + // sort in reverse order so the best is first! + return c_try > other.c_try; + } + }; + /// Whether we have memory information bool has_memory_data_ = false; /// Working bytes for this rank @@ -232,8 +297,18 @@ struct TemperedLB : BaseLB { std::unordered_map other_rank_clusters_; /// User-defined memory threshold BytesType mem_thresh_ = 0; - /// - bool is_locked = false; + /// The max working bytes for an object currently residing here + BytesType max_object_working_bytes_ = 0; + /// Current memory usage based on distribution + BytesType current_memory_usage_ = 0; + /// Whether this rank is locked or now + bool is_locked_ = false; + /// Try locks that have arrived from other ranks + std::set try_locks_; + /// Pending operations that are waiting for an unlock + std::list pending_actions_; + /// Number of swaps so far + int n_transfers_swap_ = 0; }; }}}} /* end namespace vt::vrt::collection::lb */ From 78e9249c8b2e448ffc3c5cfa5fa4a13b67a103e1 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 17:40:23 -0800 Subject: [PATCH 029/126] #2201: temperedlb: fix a couple of bugs --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 1b1a9c65a7..8ab5cdae7e 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -650,6 +650,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { selected_.clear(); underloaded_.clear(); load_info_.clear(); + other_rank_clusters_.clear(); is_overloaded_ = is_underloaded_ = false; LoadType best_imb_this_trial = start_imb + 10; @@ -1807,6 +1808,8 @@ void TemperedLB::satisfyLockRequest() { } void TemperedLB::swapClusters() { + n_transfers_swap_ = 0; + auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); theTerm()->pushEpoch(lazy_epoch); From b0360cb0107e332b9493029cd77413cbe2c21455 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 17:40:40 -0800 Subject: [PATCH 030/126] #2201: temperedlb: a hack for now to work around the deadlock problem --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 6 +++++- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 8ab5cdae7e..10f4344986 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1738,6 +1738,7 @@ void TemperedLB::releaseLock() { ); is_locked_ = false; + locking_rank_ = uninitialized_destination; if (pending_actions_.size() > 0) { auto action = pending_actions_.back(); @@ -1769,7 +1770,9 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { }; if (is_locked_) { - pending_actions_.push_back(action); + proxy_[msg->locked_node].template send<&TemperedLB::releaseLock>(); + theTerm()->consume(cur_epoch); + //pending_actions_.push_back(action); } else { action(); } @@ -1804,6 +1807,7 @@ void TemperedLB::satisfyLockRequest() { ); is_locked_ = true; + locking_rank_ = lock.requesting_node; } } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index fe992944a5..fa4c1980ea 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -303,6 +303,8 @@ struct TemperedLB : BaseLB { BytesType current_memory_usage_ = 0; /// Whether this rank is locked or now bool is_locked_ = false; + // Which rank locked this rank + NodeType locking_rank_ = uninitialized_destination; /// Try locks that have arrived from other ranks std::set try_locks_; /// Pending operations that are waiting for an unlock From a67faf1c7d2a6c4ddec6d3a444efed193e5487ee Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 21:42:35 -0800 Subject: [PATCH 031/126] #2201: temperedlb: fix bug in the code due to reentrancy causing problems --- .../balance/temperedlb/temperedlb.cc | 85 ++++++++++++++----- .../balance/temperedlb/temperedlb.h | 2 + 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 10f4344986..35f01e5789 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1569,6 +1569,12 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { std::unordered_map give_obj_shared_block; std::unordered_map give_shared_blocks_size; + vt_print( + temperedlb, + "removeClusterToSend: shared_id={}\n", + shared_id + ); + if (shared_id != -1) { give_shared_blocks_size[shared_id] = shared_block_size_[shared_id]; } @@ -1576,12 +1582,14 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { for (auto const& [obj_id, obj_load] : cur_objs_) { if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { if (iter->second == shared_id) { - give_objs.emplace(obj_id, obj_load); + give_objs[obj_id] = obj_load; give_obj_shared_block[obj_id] = shared_id; } } } + auto const blocks_here_before = getSharedBlocksHere(); + for (auto const& [give_obj_id, give_obj_load] : give_objs) { auto iter = cur_objs_.find(give_obj_id); vtAssert(iter != cur_objs_.end(), "Object must exist"); @@ -1590,16 +1598,21 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { this_new_load_ -= give_obj_load; } + auto const blocks_here_after = getSharedBlocksHere(); + + vt_print( + temperedlb, + "removeClusterToSend: before count={}, after count={}\n", + blocks_here_before.size(), blocks_here_after.size() + ); + return std::make_tuple( give_objs, give_obj_shared_block, give_shared_blocks_size ); } void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { - double total_shared_bytes = 0; - for (auto const& block_id : getSharedBlocksHere()) { - total_shared_bytes += shared_block_size_.find(block_id)->second; - } + is_swapping_ = true; auto criterion = [&,this](auto src_cluster, auto try_cluster) -> double { auto const& [src_id, src_bytes, src_load] = src_cluster; @@ -1649,8 +1662,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { ); vt_print( temperedlb, - "testing a possible swap: {} {} c_try={}\n", - src_shared_id, try_shared_id, c_try + "testing a possible swap (rank {}): {} {} c_try={}\n", + try_rank, src_shared_id, try_shared_id, c_try ); if (c_try > 0.0) { if (c_try > best_c_try) { @@ -1660,14 +1673,16 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { } } } + if (best_c_try > 0) { + auto const& [src_shared_id, try_shared_id] = best_swap; + vt_print( temperedlb, - "best_c_try={}\n", best_c_try + "best_c_try={}, swapping {} for {} on rank ={}\n", + best_c_try, src_shared_id, try_shared_id, try_rank ); - auto const& [src_shared_id, try_shared_id] = best_swap; - auto const& [give_objs, give_obj_shared_block, give_shared_blocks_size] = removeClusterToSend(src_shared_id); @@ -1682,9 +1697,25 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { try_shared_id ); }); + + computeClusterSummary(); + + vt_print( + temperedlb, + "best_c_try={}, swap completed with rank={}\n", + best_c_try, try_rank + ); } proxy_[try_rank].template send<&TemperedLB::releaseLock>(); + + is_swapping_ = false; + + if (pending_actions_.size() > 0) { + auto action = pending_actions_.back(); + pending_actions_.pop_back(); + action(); + } } void TemperedLB::giveCluster( @@ -1696,15 +1727,17 @@ void TemperedLB::giveCluster( ) { n_transfers_swap_++; - for (auto const& elm : give_objs) { - this_new_load_ += elm.second; - cur_objs_.emplace(elm); + vtAssert(give_shared_blocks_size.size() == 1, "Must be one block right now"); + + for (auto const& [obj_id, obj_load] : give_objs) { + this_new_load_ += obj_load; + cur_objs_[obj_id] = obj_load; } - for (auto const& elm : give_shared_blocks_size) { - shared_block_size_.emplace(elm); + for (auto const& [id, bytes] : give_shared_blocks_size) { + shared_block_size_[id] = bytes; } - for (auto const& elm : give_obj_shared_block) { - obj_shared_block_.emplace(elm); + for (auto const& [obj_id, id] : give_obj_shared_block) { + obj_shared_block_[obj_id] = id; } if (take_cluster != -1) { @@ -1722,11 +1755,14 @@ void TemperedLB::giveCluster( ); } + computeClusterSummary(); + vt_print( temperedlb, - "After giveCluster: total memory usage={}, shared blocks here={}, " - "memory_threshold={}\n", computeMemoryUsage(), - getSharedBlocksHere().size(), mem_thresh_ + "giveCluster: total memory usage={}, shared blocks here={}, " + "memory_threshold={}, give_cluster={}, take_cluster={}\n", computeMemoryUsage(), + getSharedBlocksHere().size(), mem_thresh_, + give_shared_blocks_size.begin()->first, take_cluster ); } @@ -1773,6 +1809,8 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { proxy_[msg->locked_node].template send<&TemperedLB::releaseLock>(); theTerm()->consume(cur_epoch); //pending_actions_.push_back(action); + } else if (is_swapping_) { + pending_actions_.push_back(action); } else { action(); } @@ -1878,6 +1916,13 @@ void TemperedLB::swapClusters() { theTerm()->popEpoch(lazy_epoch); vt::runSchedulerThrough(lazy_epoch); + vt_print( + temperedlb, + "After iteration: total memory usage={}, shared blocks here={}, " + "memory_threshold={}\n", computeMemoryUsage(), + getSharedBlocksHere().size(), mem_thresh_ + ); + int n_rejected = 0; // Report on rejection rate in debug mode diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index fa4c1980ea..2364aefe14 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -311,6 +311,8 @@ struct TemperedLB : BaseLB { std::list pending_actions_; /// Number of swaps so far int n_transfers_swap_ = 0; + /// Whether it's mid-swap or not + bool is_swapping_ = false; }; }}}} /* end namespace vt::vrt::collection::lb */ From 66bca792700982bd18b38e7c85acdf2531a7ea46 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 21:52:03 -0800 Subject: [PATCH 032/126] #2201: temperedlb: fix bug in code sending memory usage --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 35f01e5789..b8e7f2b887 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1840,7 +1840,7 @@ void TemperedLB::satisfyLockRequest() { ); proxy_[lock.requesting_node].template send<&TemperedLB::lockObtained>( - this_node, this_new_load_, cur_clusters_, rank_bytes_, + this_node, this_new_load_, cur_clusters_, current_memory_usage_, max_object_working_bytes_ ); From 91c01039da120abd6fd537fa7cfe7865f633bad0 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 4 Dec 2023 22:04:04 -0800 Subject: [PATCH 033/126] #2201: temperedlb: fix some other minor bugs, add empty cluster "swap" --- .../balance/temperedlb/temperedlb.cc | 33 +++++++++++++++++-- .../balance/temperedlb/temperedlb.h | 8 +++-- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index b8e7f2b887..1866e1cce8 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1647,6 +1647,27 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + // try swapping with empty cluster first + { + double c_try = criterion( + std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), + std::make_tuple( + try_rank, + try_load, + try_total_bytes, + -1, + 0, + 0 + ) + ); + if (c_try > 0.0) { + if (c_try > best_c_try) { + best_c_try = c_try; + best_swap = std::make_tuple(src_shared_id, -1); + } + } + } + for (auto const& [try_shared_id, try_cluster] : try_clusters) { auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; double c_try = criterion( @@ -1808,6 +1829,7 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { if (is_locked_) { proxy_[msg->locked_node].template send<&TemperedLB::releaseLock>(); theTerm()->consume(cur_epoch); + try_locks_.emplace(msg->locked_node, msg->locked_c_try); //pending_actions_.push_back(action); } else if (is_swapping_) { pending_actions_.push_back(action); @@ -1841,7 +1863,7 @@ void TemperedLB::satisfyLockRequest() { proxy_[lock.requesting_node].template send<&TemperedLB::lockObtained>( this_node, this_new_load_, cur_clusters_, current_memory_usage_, - max_object_working_bytes_ + max_object_working_bytes_, lock.c_try ); is_locked_ = true; @@ -1876,6 +1898,11 @@ void TemperedLB::swapClusters() { for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { bool found_potential_good_swap = false; + // if (try_clusters.size() < cur_clusters_.size()) { + // proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, 100); + // continue; + // } + for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; @@ -1919,8 +1946,8 @@ void TemperedLB::swapClusters() { vt_print( temperedlb, "After iteration: total memory usage={}, shared blocks here={}, " - "memory_threshold={}\n", computeMemoryUsage(), - getSharedBlocksHere().size(), mem_thresh_ + "memory_threshold={}, load={}\n", computeMemoryUsage(), + getSharedBlocksHere().size(), mem_thresh_, this_new_load_ ); int n_rejected = 0; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 2364aefe14..e4a0247fa2 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -156,12 +156,14 @@ struct TemperedLB : BaseLB { LockedInfoMsg( NodeType in_locked_node, LoadType in_locked_load, ClusterSummaryType in_locked_clusters, BytesType in_locked_bytes, - BytesType in_locked_max_object_working_bytes + BytesType in_locked_max_object_working_bytes, + double in_locked_c_try ) : locked_node(in_locked_node), locked_load(in_locked_load), locked_clusters(in_locked_clusters), locked_bytes(in_locked_bytes), - locked_max_object_working_bytes(in_locked_max_object_working_bytes) + locked_max_object_working_bytes(in_locked_max_object_working_bytes), + locked_c_try(in_locked_c_try) { } template @@ -172,6 +174,7 @@ struct TemperedLB : BaseLB { s | locked_clusters; s | locked_bytes; s | locked_max_object_working_bytes; + s | locked_c_try; } NodeType locked_node = uninitialized_destination; @@ -179,6 +182,7 @@ struct TemperedLB : BaseLB { ClusterSummaryType locked_clusters = {}; BytesType locked_bytes = 0; BytesType locked_max_object_working_bytes = 0; + double locked_c_try = 0; }; void satisfyLockRequest(); From 735e8341a92244f12cb0d32fe57e31dd93559d6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 5 Dec 2023 11:39:28 -0500 Subject: [PATCH 034/126] #2201: annotated code for side-by-side comparison with LBAF --- .../collection/balance/temperedlb/temperedlb.cc | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 1866e1cce8..df5350b2a1 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1895,6 +1895,8 @@ void TemperedLB::swapClusters() { auto const this_node = theContext()->getNode(); + // Iddentify and perform beneficial cluster swaps + int n_rank_swaps = 0; for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { bool found_potential_good_swap = false; @@ -1902,28 +1904,32 @@ void TemperedLB::swapClusters() { // proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, 100); // continue; // } - + + // Iterate over source clusters for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + // Iterate over target clusters for (auto const& [try_shared_id, try_cluster] : try_clusters) { auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; + // Decide whether swap is beneficial double c_try = criterion( std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), std::make_tuple(try_rank, try_shared_id, try_cluster_bytes, try_cluster_load) ); if (c_try > 0.0) { + // Try to perform swap found_potential_good_swap = true; - // request lock proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try); + n_rank_swaps; break; } } if (found_potential_good_swap) { break; - } - } - } + } // try_clusters + } // cur_clusters_ + } // other_rank_clusters // We have to be very careful here since we will allow some reentrancy here. constexpr int turn_scheduler_times = 10; From c583530469262f31a929d548838a49666105e4b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 5 Dec 2023 12:33:15 -0500 Subject: [PATCH 035/126] #2201: annotated following discussion --- .../vrt/collection/balance/temperedlb/temperedlb.cc | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index df5350b2a1..11518825e9 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1895,8 +1895,7 @@ void TemperedLB::swapClusters() { auto const this_node = theContext()->getNode(); - // Iddentify and perform beneficial cluster swaps - int n_rank_swaps = 0; + // Identify and message beneficial cluster swaps for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { bool found_potential_good_swap = false; @@ -1904,7 +1903,7 @@ void TemperedLB::swapClusters() { // proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, 100); // continue; // } - + // Iterate over source clusters for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; @@ -1918,16 +1917,15 @@ void TemperedLB::swapClusters() { std::make_tuple(try_rank, try_shared_id, try_cluster_bytes, try_cluster_load) ); if (c_try > 0.0) { - // Try to perform swap + // Try to obtain lock for feasible swap found_potential_good_swap = true; proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try); - n_rank_swaps; break; } - } + } // try_clusters if (found_potential_good_swap) { break; - } // try_clusters + } } // cur_clusters_ } // other_rank_clusters From a1a1bf9b7e68c1b189784f89f20e8ec770041f88 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 11:21:20 -0800 Subject: [PATCH 036/126] #2201: temperedlb: switch a ton of prints to debug prints --- .../balance/temperedlb/temperedlb.cc | 67 ++++++++++--------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 11518825e9..aa145a80c1 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -564,9 +564,9 @@ void TemperedLB::readClustersMemoryData() { // "task_footprint_bytes" } - // @todo: switch to debug print at some point - vt_print( - temperedlb, "obj={} shared_block={} bytes={}\n", + vt_debug_print( + verbose, temperedlb, + "obj={} shared_block={} bytes={}\n", obj, shared_id, shared_bytes ); @@ -615,8 +615,9 @@ BytesType TemperedLB::computeMemoryUsage() { max_object_working_bytes_ = std::max(max_object_working_bytes, obj_working_bytes_.find(obj_id)->second); } else { - vt_print( - temperedlb, "Warning: working bytes not found for object: {}\n", obj_id + vt_debug_print( + verbose, temperedlb, + "Warning: working bytes not found for object: {}\n", obj_id ); } } @@ -688,8 +689,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { ); if (has_memory_data_) { - vt_print( - temperedlb, + vt_debug_print( + terse, temperedlb, "Current memory info: total memory usage={}, shared blocks here={}, " "memory_threshold={}\n", computeMemoryUsage(), getSharedBlocksHere().size(), mem_thresh_ @@ -700,8 +701,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Verbose printing about local clusters for (auto const& [shared_id, value] : cur_clusters_) { auto const& [shared_bytes, cluster_load] = value; - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "Local cluster: id={}, bytes={}, load={}\n", shared_id, shared_bytes, cluster_load ); @@ -731,8 +732,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { for (auto const& [node, clusters] : other_rank_clusters_) { for (auto const& [shared_id, value] : clusters) { auto const& [shared_bytes, cluster_load] = value; - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "Remote cluster: node={}, id={}, bytes={}, load={}\n", node, shared_id, shared_bytes, cluster_load ); @@ -1569,8 +1570,8 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { std::unordered_map give_obj_shared_block; std::unordered_map give_shared_blocks_size; - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "removeClusterToSend: shared_id={}\n", shared_id ); @@ -1600,8 +1601,8 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { auto const blocks_here_after = getSharedBlocksHere(); - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "removeClusterToSend: before count={}, after count={}\n", blocks_here_before.size(), blocks_here_after.size() ); @@ -1681,8 +1682,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { try_cluster_load ) ); - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "testing a possible swap (rank {}): {} {} c_try={}\n", try_rank, src_shared_id, try_shared_id, c_try ); @@ -1698,8 +1699,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { if (best_c_try > 0) { auto const& [src_shared_id, try_shared_id] = best_swap; - vt_print( - temperedlb, + vt_debug_print( + normal, temperedlb, "best_c_try={}, swapping {} for {} on rank ={}\n", best_c_try, src_shared_id, try_shared_id, try_rank ); @@ -1721,8 +1722,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { computeClusterSummary(); - vt_print( - temperedlb, + vt_debug_print( + normal, temperedlb, "best_c_try={}, swap completed with rank={}\n", best_c_try, try_rank ); @@ -1778,8 +1779,8 @@ void TemperedLB::giveCluster( computeClusterSummary(); - vt_print( - temperedlb, + vt_debug_print( + normal, temperedlb, "giveCluster: total memory usage={}, shared blocks here={}, " "memory_threshold={}, give_cluster={}, take_cluster={}\n", computeMemoryUsage(), getSharedBlocksHere().size(), mem_thresh_, @@ -1788,8 +1789,8 @@ void TemperedLB::giveCluster( } void TemperedLB::releaseLock() { - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "releaseLock: pending size={}\n", pending_actions_.size() ); @@ -1810,8 +1811,8 @@ void TemperedLB::releaseLock() { void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { auto msg = promoteMsg(in_msg); - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "lockObtained: is_locked_={}\n", is_locked_ ); @@ -1843,8 +1844,8 @@ void TemperedLB::satisfyLockRequest() { if (try_locks_.size() > 0) { // find the best lock to give for (auto&& tl : try_locks_) { - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "satisfyLockRequest: node={}, c_try={}\n", tl.requesting_node, tl.c_try ); } @@ -1855,8 +1856,8 @@ void TemperedLB::satisfyLockRequest() { auto const this_node = theContext()->getNode(); - vt_print( - temperedlb, + vt_debug_print( + normal, temperedlb, "satisfyLockRequest: locked obtained for node={}\n", lock.requesting_node ); @@ -1947,8 +1948,8 @@ void TemperedLB::swapClusters() { theTerm()->popEpoch(lazy_epoch); vt::runSchedulerThrough(lazy_epoch); - vt_print( - temperedlb, + vt_debug_print( + normal, temperedlb, "After iteration: total memory usage={}, shared blocks here={}, " "memory_threshold={}, load={}\n", computeMemoryUsage(), getSharedBlocksHere().size(), mem_thresh_, this_new_load_ From 2cc9bbb7ab047f351f645a1475d6599f6e4835fb Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 11:28:22 -0800 Subject: [PATCH 037/126] #2201: temperedlb: add proper approximation for memory usage for empty cluster tryLock --- .../balance/temperedlb/temperedlb.cc | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index aa145a80c1..eccd8ed9ba 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1881,7 +1881,7 @@ void TemperedLB::swapClusters() { auto criterion = [this](auto src_cluster, auto try_cluster) -> double { // this does not handle empty cluster swaps auto const& [src_id, src_bytes, src_load] = src_cluster; - auto const& [try_rank, try_id, try_bytes, try_load] = try_cluster; + auto const& [try_rank, try_id, try_bytes, try_load, try_mem] = try_cluster; auto const before_work_src = this_new_load_; auto const before_work_try = load_info_.find(try_rank)->second; @@ -1891,6 +1891,10 @@ void TemperedLB::swapClusters() { auto const after_work_try = before_work_try + src_load - try_load; auto const w_max_new = std::max(after_work_src, after_work_try); + if (try_mem - try_bytes + src_bytes > mem_thresh_) { + return -1000; + } + return w_max_0 - w_max_new; }; @@ -1900,22 +1904,41 @@ void TemperedLB::swapClusters() { for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { bool found_potential_good_swap = false; - // if (try_clusters.size() < cur_clusters_.size()) { - // proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, 100); - // continue; - // } + // Approximate roughly the memory usage on the target + BytesType try_approx_mem_usage = rank_bytes_; + for (auto const& [try_shared_id, try_cluster] : try_clusters) { + auto const& [try_cluster_bytes, _] = try_cluster; + try_approx_mem_usage += try_cluster_bytes; + } // Iterate over source clusters for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + // empty cluster swap approximate criterion + { + double c_try = criterion( + std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), + std::make_tuple(try_rank, 0, 0, 0, try_approx_mem_usage) + ); + if (c_try > 0.0) { + // Try to obtain lock for feasible swap + found_potential_good_swap = true; + proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try); + break; + } + } + // Iterate over target clusters for (auto const& [try_shared_id, try_cluster] : try_clusters) { auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; // Decide whether swap is beneficial double c_try = criterion( std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), - std::make_tuple(try_rank, try_shared_id, try_cluster_bytes, try_cluster_load) + std::make_tuple( + try_rank, try_shared_id, try_cluster_bytes, try_cluster_load, + try_approx_mem_usage + ) ); if (c_try > 0.0) { // Try to obtain lock for feasible swap From 4c429938edbe367e866ee01d706812da90721a4f Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 12:16:57 -0800 Subject: [PATCH 038/126] #2201: temperedlb: fix bug where some ranks don't participate if they don't have user-defined blocks --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index eccd8ed9ba..00186a859c 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -570,7 +570,6 @@ void TemperedLB::readClustersMemoryData() { obj, shared_id, shared_bytes ); - has_memory_data_ = true; obj_shared_block_[obj] = shared_id; obj_working_bytes_[obj] = working_bytes; shared_block_size_[shared_id] = shared_bytes; @@ -646,6 +645,10 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Read in memory information if it's available before be do any trials readClustersMemoryData(); + if (transfer_type_ == TransferTypeEnum::SwapClusters) { + has_memory_data_ = true; + } + for (trial_ = 0; trial_ < num_trials_; ++trial_) { // Clear out data structures selected_.clear(); From 294bb58776c7353b1b8b39750b5ec0c63304581f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 5 Dec 2023 16:15:56 -0500 Subject: [PATCH 039/126] #2201: clarify annotation --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 00186a859c..7317025ff3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1918,7 +1918,7 @@ void TemperedLB::swapClusters() { for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; - // empty cluster swap approximate criterion + // Compute approximation swap criterion { double c_try = criterion( std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), From df619957d499dba4b2333d4bd355064d52e17fac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 5 Dec 2023 16:32:33 -0500 Subject: [PATCH 040/126] #2201: include infinite value for memory overflow --- .../collection/balance/temperedlb/temperedlb.cc | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 7317025ff3..832d0e68e0 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -57,7 +57,7 @@ #include #include #include -#include +#include namespace vt { namespace vrt { namespace collection { namespace lb { @@ -1886,18 +1886,22 @@ void TemperedLB::swapClusters() { auto const& [src_id, src_bytes, src_load] = src_cluster; auto const& [try_rank, try_id, try_bytes, try_load, try_mem] = try_cluster; + // Check whether strict bounds on memory are satisfied + if (try_mem - try_bytes + src_bytes > mem_thresh_) { + return - std::numeric_limits::infinity(); + } + + // Compute maximum work of original arrangement auto const before_work_src = this_new_load_; auto const before_work_try = load_info_.find(try_rank)->second; auto const w_max_0 = std::max(before_work_src, before_work_try); + // Compute maximum work of proposed new arrangement auto const after_work_src = this_new_load_ - src_load + try_load; auto const after_work_try = before_work_try + src_load - try_load; auto const w_max_new = std::max(after_work_src, after_work_try); - if (try_mem - try_bytes + src_bytes > mem_thresh_) { - return -1000; - } - + // Return criterion value return w_max_0 - w_max_new; }; From be1a30c982c372a6dff52a82c3a7fa52b13ed73c Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 13:46:51 -0800 Subject: [PATCH 041/126] #2201: temperedlb: fix whitespace --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 832d0e68e0..14dc0b1029 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1891,12 +1891,12 @@ void TemperedLB::swapClusters() { return - std::numeric_limits::infinity(); } - // Compute maximum work of original arrangement + // Compute maximum work of original arrangement auto const before_work_src = this_new_load_; auto const before_work_try = load_info_.find(try_rank)->second; auto const w_max_0 = std::max(before_work_src, before_work_try); - // Compute maximum work of proposed new arrangement + // Compute maximum work of proposed new arrangement auto const after_work_src = this_new_load_ - src_load + try_load; auto const after_work_try = before_work_try + src_load - try_load; auto const w_max_new = std::max(after_work_src, after_work_try); From cf34d5b13d32347b8ae21b6e5d565ecef74536d3 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 13:56:55 -0800 Subject: [PATCH 042/126] #2201: temperedlb: switch other criterion to use negative inf --- .../collection/balance/temperedlb/temperedlb.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 14dc0b1029..34d063c8f9 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1623,6 +1623,13 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto const& [try_rank, try_total_load, try_total_bytes, try_id, try_bytes, try_load] = try_cluster; + auto const src_after_mem = current_memory_usage_ - src_bytes + try_bytes; + auto const try_after_mem = try_total_bytes + src_bytes - try_bytes; + + if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { + return - std::numeric_limits::infinity(); + } + auto const before_work_src = this_new_load_; auto const before_work_try = try_total_load; auto const w_max_0 = std::max(before_work_src, before_work_try); @@ -1631,13 +1638,6 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto const after_work_try = before_work_try + src_load - try_load; auto const w_max_new = std::max(after_work_src, after_work_try); - auto const src_after_mem = current_memory_usage_ - src_bytes + try_bytes; - auto const try_after_mem = try_total_bytes + src_bytes - try_bytes; - - if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { - return -1000.0; - } - return w_max_0 - w_max_new; }; From c10f80af98d4c21d5e25d5fe1522393b9428839d Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 15:33:02 -0800 Subject: [PATCH 043/126] #2201: temperedlb: adding working bytes transfer for correctness --- .../balance/temperedlb/temperedlb.cc | 36 +++++++++++++++---- .../balance/temperedlb/temperedlb.h | 1 + 2 files changed, 31 insertions(+), 6 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 34d063c8f9..689e4a533c 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1572,6 +1572,7 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { std::unordered_map give_objs; std::unordered_map give_obj_shared_block; std::unordered_map give_shared_blocks_size; + std::unordered_map give_obj_working_bytes; vt_debug_print( verbose, temperedlb, @@ -1588,6 +1589,12 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { if (iter->second == shared_id) { give_objs[obj_id] = obj_load; give_obj_shared_block[obj_id] = shared_id; + if ( + auto iter2 = give_obj_working_bytes.find(obj_id); + iter2 != give_obj_working_bytes.end() + ) { + give_obj_working_bytes[obj_id] = iter2->second; + } } } } @@ -1611,7 +1618,10 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { ); return std::make_tuple( - give_objs, give_obj_shared_block, give_shared_blocks_size + give_objs, + give_obj_shared_block, + give_shared_blocks_size, + give_obj_working_bytes ); } @@ -1708,8 +1718,12 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { best_c_try, src_shared_id, try_shared_id, try_rank ); - auto const& [give_objs, give_obj_shared_block, give_shared_blocks_size] = - removeClusterToSend(src_shared_id); + auto const& [ + give_objs, + give_obj_shared_block, + give_shared_blocks_size, + give_obj_working_bytes + ] = removeClusterToSend(src_shared_id); auto const this_node = theContext()->getNode(); @@ -1719,6 +1733,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { give_shared_blocks_size, give_objs, give_obj_shared_block, + give_obj_working_bytes, try_shared_id ); }); @@ -1748,6 +1763,7 @@ void TemperedLB::giveCluster( std::unordered_map const& give_shared_blocks_size, std::unordered_map const& give_objs, std::unordered_map const& give_obj_shared_block, + std::unordered_map const& give_obj_working_bytes, SharedIDType take_cluster ) { n_transfers_swap_++; @@ -1764,18 +1780,26 @@ void TemperedLB::giveCluster( for (auto const& [obj_id, id] : give_obj_shared_block) { obj_shared_block_[obj_id] = id; } + for (auto const& elm : give_obj_working_bytes) { + obj_working_bytes_.emplace(elm); + } if (take_cluster != -1) { auto const this_node = theContext()->getNode(); - auto const& [take_objs, take_obj_shared_block, take_shared_blocks_size] = - removeClusterToSend(take_cluster); + auto const& [ + take_objs, + take_obj_shared_block, + take_shared_blocks_size, + take_obj_working_bytes + ] = removeClusterToSend(take_cluster); proxy_[from_rank].template send<&TemperedLB::giveCluster>( this_node, take_shared_blocks_size, take_objs, take_obj_shared_block, + take_obj_working_bytes, -1 ); } @@ -1922,7 +1946,7 @@ void TemperedLB::swapClusters() { for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; - // Compute approximation swap criterion + // Compute approximation swap criterion for empty cluster "swap" case { double c_try = criterion( std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index e4a0247fa2..7a70cade83 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -195,6 +195,7 @@ struct TemperedLB : BaseLB { std::unordered_map const& give_shared_blocks_size, std::unordered_map const& give_objs, std::unordered_map const& give_obj_shared_block, + std::unordered_map const& give_obj_working_bytes, SharedIDType take_cluster ); From c1a9b7dd3fc91eaad6944b17eecb30231060d0e0 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 15:38:16 -0800 Subject: [PATCH 044/126] #2201: temperedlb: add header file comments for the new methods added --- .../balance/temperedlb/temperedlb.h | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 7a70cade83..8add415f37 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -146,8 +146,21 @@ struct TemperedLB : BaseLB { */ void computeClusterSummary(); + /** + * \brief Try to lock a rank + * + * \param[in] requesting_node the requesting rank asking to lock + * \param[in] criterion_value the criterion evaluation value to compare + */ void tryLock(NodeType requesting_node, double criterion_value); + /** + * \struct LockedInfoMsg + * + * \brief The update message that comes from a rank when it is locked. This is + * a message instead of a normal handler so it can be buffered without copying + * it. + */ struct LockedInfoMsg : vt::Message { using MessageParentType = vt::Message; vt_msg_serialize_required(); // locked_clusters_ @@ -177,19 +190,55 @@ struct TemperedLB : BaseLB { s | locked_c_try; } + /// The node that is locked NodeType locked_node = uninitialized_destination; + /// The total load of the locked node LoadType locked_load = 0; + /// The up-to-date summary of the clusters ClusterSummaryType locked_clusters = {}; + /// The total bytes for the locked node BytesType locked_bytes = 0; + /// The largest working bytes for the locked node BytesType locked_max_object_working_bytes = 0; + /// The approximate criterion value at the time it was locked with possible + /// out-of-date info double locked_c_try = 0; }; + /** + * \brief Satisfy a lock request (if there is one) + */ void satisfyLockRequest(); + + /** + * \brief Inform a rank that a lock was obtained + * + * \param[in] msg update message with all the info + */ void lockObtained(LockedInfoMsg* msg); + + /** + * \brief Consider possible swaps with all the up-to-date info from a rank + * + * \param[in] msg update message with all the info + */ void considerSwapsAfterLock(MsgSharedPtr msg); + + /** + * \brief Release a lock on a rank + */ void releaseLock(); + /** + * \brief Give a cluster to a rank + * + * \param[in] from_rank the rank it's coming from + * \param[in] give_shared_blocks_size the shared block info for the swap + * \param[in] give_objs the objects given + * \param[in] give_obj_shared_block the shared block the objs are part of + * \param[in] give_obj_working_bytes the working bytes for the objs + * \param[in] take_cluster (optional) a cluster requested in return + */ void giveCluster( NodeType from_rank, std::unordered_map const& give_shared_blocks_size, @@ -199,6 +248,14 @@ struct TemperedLB : BaseLB { SharedIDType take_cluster ); + /** + * \internal \brief Remove a cluster to send. Does all the bookkeeping + * associated with removing the cluster + * + * \param[in] shared_id the shared ID of the cluster to remove + * + * \return a tuple with all the information to send to \c giveCluster + */ auto removeClusterToSend(SharedIDType shared_id); private: From 90bffc71c97e8f318e00b5812963e1378fa3d872 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 5 Dec 2023 16:30:54 -0800 Subject: [PATCH 045/126] #2201: temperedlb: start implementing sub-clustering --- .../balance/temperedlb/temperedlb.cc | 159 +++++++++++++++++- .../balance/temperedlb/temperedlb.h | 17 ++ 2 files changed, 174 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 689e4a533c..b905a299eb 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -655,7 +655,9 @@ void TemperedLB::doLBStages(LoadType start_imb) { underloaded_.clear(); load_info_.clear(); other_rank_clusters_.clear(); + max_load_over_iters_.clear(); is_overloaded_ = is_underloaded_ = false; + is_subclustering_ = false; LoadType best_imb_this_trial = start_imb + 10; @@ -677,6 +679,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { underloaded_.clear(); load_info_.clear(); is_overloaded_ = is_underloaded_ = false; + is_subclustering_ = false; other_rank_clusters_.clear(); // Not clearing shared_block_size_ because this never changes and @@ -775,7 +778,12 @@ void TemperedLB::doLBStages(LoadType start_imb) { LoadType(this_new_load_) ); - if (rollback_ || theConfig()->vt_debug_temperedlb || (iter_ == num_iters_ - 1)) { + if ( + rollback_ || + theConfig()->vt_debug_temperedlb || + (iter_ == num_iters_ - 1) || + transfer_type_ == TransferTypeEnum::SwapClusters + ) { runInEpochCollective("TemperedLB::doLBStages -> Rank_load_modeled", [=] { // Perform the reduction for Rank_load_modeled -> processor load only proxy_.allreduce<&TemperedLB::loadStatsHandler, collective::PlusOp>( @@ -842,6 +850,8 @@ void TemperedLB::loadStatsHandler(std::vector const& vec) { auto const& in = vec[0]; new_imbalance_ = in.I(); + max_load_over_iters_.push_back(in.max()); + auto this_node = theContext()->getNode(); if (this_node == 0) { vt_debug_print( @@ -1625,6 +1635,62 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { ); } +void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { + is_swapping_ = true; + + // auto const& try_clusters = msg->locked_clusters; + // auto const& try_rank = msg->locked_node; + auto const& try_load = msg->locked_load; + auto const& try_total_bytes = msg->locked_bytes; + + // get the shared blocks current residing on this rank + auto shared_blocks_here = getSharedBlocksHere(); + + // Shared IDs when added to this rank don't put it over the limit + std::set possible_transfers; + + for (auto const& shared_id : shared_blocks_here) { + if (try_total_bytes + shared_block_size_[shared_id] < mem_thresh_) { + possible_transfers.insert(shared_id); + } + } + + // Now, we will greedily try to find a combo of objects that will reduce our + // max + + // We can prune some clusters out of this mix based on the requirements that + // this is beneficial + auto const amount_over_average = this_new_load_ - target_max_load_; + auto const amount_under_average = target_max_load_ - try_load; + + // Any sub-cluster that is smaller than amount_over_avergae or great than + // amount_under_average we can just skip. We start by skipping all entire + // clusters that don't fit this criteria since sub-clusters will also be + // eliminated from those + + std::set clusters_to_split; + + for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { + auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + if ( + src_cluster_load < amount_over_average or + src_cluster_load > amount_under_average + ) { + // skip it + } else { + clusters_to_split.insert(src_shared_id); + } + } + + is_swapping_ = false; + + if (pending_actions_.size() > 0) { + auto action = pending_actions_.back(); + pending_actions_.pop_back(); + action(); + } +} + void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { is_swapping_ = true; @@ -1849,7 +1915,11 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { auto action = [this, msg, cur_epoch]{ theMsg()->pushEpoch(cur_epoch); - considerSwapsAfterLock(msg); + if (is_subclustering_) { + considerSubClustersAfterLock(msg); + } else { + considerSwapsAfterLock(msg); + } theMsg()->popEpoch(cur_epoch); theTerm()->consume(cur_epoch); }; @@ -1899,7 +1969,92 @@ void TemperedLB::satisfyLockRequest() { } } +void TemperedLB::trySubClustering() { + is_subclustering_ = true; + n_transfers_swap_ = 0; + + auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: subCluster"); + theTerm()->pushEpoch(lazy_epoch); + + auto const this_node = theContext()->getNode(); + + // Only ranks that are close to max should do this...otherwise its a waste + // Very aggressive to start. + if ( + auto n_iters = max_load_over_iters_.size(); + this_new_load_ / max_load_over_iters_[n_iters - 1] > 0.80 + ) { + for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { + auto const try_num_clusters = try_clusters.size(); + + // Only target ranks where the rank has fewer clusters and are + // underloaded. Random constants for now + if (try_num_clusters < cur_clusters_.size()) { + if ( + auto target_rank_load = load_info_.find(try_rank)->second; + target_rank_load < target_max_load_ + ) { + // c-value is now the ratio of load compared to this rank. prefer + // ranks that have less load and have fewer clusters. + proxy_[try_rank].template send<&TemperedLB::tryLock>( + this_node, this_new_load_ / target_rank_load + ); + } + } + + } + + } else { + // do nothing--not loaded enough, may be a target to put load + } + + // Finalize epoch, we have sent our initial round of messages + // from here everything is message driven + theTerm()->finishedEpoch(lazy_epoch); + theTerm()->popEpoch(lazy_epoch); + vt::runSchedulerThrough(lazy_epoch); + + vt_debug_print( + normal, temperedlb, + "After subclustering iteration: total memory usage={}, shared blocks here={}, " + "memory_threshold={}, load={}\n", computeMemoryUsage(), + getSharedBlocksHere().size(), mem_thresh_, this_new_load_ + ); + + int n_rejected = 0; + + // Report on rejection rate in debug mode + if (theConfig()->vt_debug_temperedlb) { + runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { + proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( + n_rejected, n_transfers_swap_ + ); + }); + } +} + void TemperedLB::swapClusters() { + // Do the test to see if we should start sub-clustering. This is probably far + // too aggressive. We could check as an conservative check that requires more + // computation to see if a cluster is blocking progress. + if (auto const len = max_load_over_iters_.size(); len > 2) { + double const i1 = max_load_over_iters_[len-1]; + double const i2 = max_load_over_iters_[len-1]; + + vt_debug_print( + terse, temperedlb, + "swapClusters: check for subclustering: i1={}, i2={}," + " criteria=abs={} tol={}\n", + i1, i2, std::abs(i1 - i2), 0.01*i1 + ); + + // the max is mostly stable + if (std::abs(i1 - i2) < 0.01*i1) { + trySubClustering(); + return; + } + } + n_transfers_swap_ = 0; auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 8add415f37..2396e36e31 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -224,11 +224,24 @@ struct TemperedLB : BaseLB { */ void considerSwapsAfterLock(MsgSharedPtr msg); + /** + * \brief Consider possible subcluster transfers with all the up-to-date info + * from a rank + * + * \param[in] msg update message with all the info + */ + void considerSubClustersAfterLock(MsgSharedPtr msg); + /** * \brief Release a lock on a rank */ void releaseLock(); + /** + * \brief Try sub-clustering---i.e., breaking up clusters to improve LB + */ + void trySubClustering(); + /** * \brief Give a cluster to a rank * @@ -375,6 +388,10 @@ struct TemperedLB : BaseLB { int n_transfers_swap_ = 0; /// Whether it's mid-swap or not bool is_swapping_ = false; + /// Max-load over ranks vector + std::vector max_load_over_iters_; + /// Whether we are sub-clustering + bool is_subclustering_ = false; }; }}}} /* end namespace vt::vrt::collection::lb */ From 68b4d7585cdb949a3ca2a5115175b9d8aa8b1e12 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 6 Dec 2023 18:46:18 -0800 Subject: [PATCH 046/126] #2201: temperedlb: sub-clustering implemented, disabled by default for now --- .../balance/temperedlb/temperedlb.cc | 244 ++++++++++++++++-- .../balance/temperedlb/temperedlb.h | 20 +- 2 files changed, 240 insertions(+), 24 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index b905a299eb..8f50af7435 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -658,6 +658,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { max_load_over_iters_.clear(); is_overloaded_ = is_underloaded_ = false; is_subclustering_ = false; + ready_to_satisfy_locks_ = false; LoadType best_imb_this_trial = start_imb + 10; @@ -680,6 +681,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { load_info_.clear(); is_overloaded_ = is_underloaded_ = false; is_subclustering_ = false; + ready_to_satisfy_locks_ = false; other_rank_clusters_.clear(); // Not clearing shared_block_size_ because this never changes and @@ -1576,9 +1578,15 @@ void TemperedLB::originalTransfer() { void TemperedLB::tryLock(NodeType requesting_node, double criterion_value) { try_locks_.emplace(requesting_node, criterion_value); + + if (ready_to_satisfy_locks_ and not is_locked_) { + satisfyLockRequest(); + } } -auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { +auto TemperedLB::removeClusterToSend( + SharedIDType shared_id, std::set objs +) { std::unordered_map give_objs; std::unordered_map give_obj_shared_block; std::unordered_map give_shared_blocks_size; @@ -1594,19 +1602,32 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { give_shared_blocks_size[shared_id] = shared_block_size_[shared_id]; } - for (auto const& [obj_id, obj_load] : cur_objs_) { - if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { - if (iter->second == shared_id) { - give_objs[obj_id] = obj_load; - give_obj_shared_block[obj_id] = shared_id; - if ( - auto iter2 = give_obj_working_bytes.find(obj_id); - iter2 != give_obj_working_bytes.end() - ) { - give_obj_working_bytes[obj_id] = iter2->second; + if (objs.size() == 0) { + for (auto const& [obj_id, obj_load] : cur_objs_) { + if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { + if (iter->second == shared_id) { + give_objs[obj_id] = obj_load; + give_obj_shared_block[obj_id] = shared_id; + if ( + auto iter2 = give_obj_working_bytes.find(obj_id); + iter2 != give_obj_working_bytes.end() + ) { + give_obj_working_bytes[obj_id] = iter2->second; + } } } } + } else { + for (auto const& obj_id : objs) { + give_objs[obj_id] = cur_objs_.find(obj_id)->second; + give_obj_shared_block[obj_id] = shared_id; + if ( + auto iter2 = give_obj_working_bytes.find(obj_id); + iter2 != give_obj_working_bytes.end() + ) { + give_obj_working_bytes[obj_id] = iter2->second; + } + } } auto const blocks_here_before = getSharedBlocksHere(); @@ -1638,11 +1659,38 @@ auto TemperedLB::removeClusterToSend(SharedIDType shared_id) { void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { is_swapping_ = true; - // auto const& try_clusters = msg->locked_clusters; - // auto const& try_rank = msg->locked_node; + auto criterion = [&,this](auto src_cluster, auto try_cluster) -> double { + auto const& [src_id, src_bytes, src_load] = src_cluster; + auto const& [try_rank, try_total_load, try_total_bytes] = try_cluster; + + auto const src_after_mem = current_memory_usage_; + auto const try_after_mem = try_total_bytes + src_bytes; + + if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { + return - std::numeric_limits::infinity(); + } + + auto const before_work_src = this_new_load_; + auto const before_work_try = try_total_load; + auto const w_max_0 = std::max(before_work_src, before_work_try); + + auto const after_work_src = this_new_load_ - src_load; + auto const after_work_try = before_work_try + src_load; + auto const w_max_new = std::max(after_work_src, after_work_try); + + return w_max_0 - w_max_new; + }; + + auto const& try_clusters = msg->locked_clusters; + auto const& try_rank = msg->locked_node; auto const& try_load = msg->locked_load; auto const& try_total_bytes = msg->locked_bytes; + vt_print( + temperedlb, + "considerSubClustersAfterLock: try_rank={} try_load={}\n", try_rank, try_load + ); + // get the shared blocks current residing on this rank auto shared_blocks_here = getSharedBlocksHere(); @@ -1650,11 +1698,23 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { std::set possible_transfers; for (auto const& shared_id : shared_blocks_here) { - if (try_total_bytes + shared_block_size_[shared_id] < mem_thresh_) { + // Allow shared blocks that don't put it over memory or already exist on + // try_rank + if (try_clusters.find(shared_id) == try_clusters.end()) { + if (try_total_bytes + shared_block_size_[shared_id] < mem_thresh_) { + possible_transfers.insert(shared_id); + } + } else { possible_transfers.insert(shared_id); } } + vt_print( + temperedlb, + "considerSubClustersAfterLock: possible_transfers={}\n", + possible_transfers.size() + ); + // Now, we will greedily try to find a combo of objects that will reduce our // max @@ -1663,18 +1723,24 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { auto const amount_over_average = this_new_load_ - target_max_load_; auto const amount_under_average = target_max_load_ - try_load; - // Any sub-cluster that is smaller than amount_over_avergae or great than + // Any sub-cluster that is smaller than amount_over_average or smaller than // amount_under_average we can just skip. We start by skipping all entire // clusters that don't fit this criteria since sub-clusters will also be // eliminated from those + vt_print( + temperedlb, + "considerSubClustersAfterLock: over={}, under={}\n", amount_over_average, + amount_under_average + ); + std::set clusters_to_split; for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; if ( src_cluster_load < amount_over_average or - src_cluster_load > amount_under_average + src_cluster_load < amount_under_average ) { // skip it } else { @@ -1682,6 +1748,90 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { } } + double best_c_try = -1.0; + std::set best_selected; + SharedIDType best_id = -1; + for (auto const& shared_id : clusters_to_split) { + auto const& [src_cluster_bytes, src_cluster_load] = cur_clusters_[shared_id]; + + std::set objs; + for (auto const& [obj_id, shared_id_obj] : obj_shared_block_) { + if (shared_id_obj == shared_id) { + objs.emplace(obj_id, cur_objs_[obj_id]); + } + } + + std::set selected; + LoadType load_sum = 0; + for (auto const& [obj_id, load] : objs) { + load_sum += load; + selected.insert(obj_id); + + // We will not consider empty cluster "swaps" here. + if (selected.size() != objs.size()) { + auto src_cluster_bytes_add = + try_clusters.find(shared_id) == try_clusters.end() ? src_cluster_bytes : 0; + + double c_try = criterion( + std::make_tuple(shared_id, src_cluster_bytes_add, load_sum), + std::make_tuple(try_rank, try_load, try_total_bytes) + ); + + vt_debug_print( + terse, temperedlb, + "testing a possible sub-cluster (rank {}): id={} load={} c_try={}, " + "amount over average={}, amount under average={}\n", + try_rank, shared_id, load_sum, c_try, amount_over_average, + amount_under_average + ); + + if (c_try > 0.0) { + best_c_try = c_try; + best_selected = selected; + best_id = shared_id; + } + } + } + } + + if (best_c_try > 0.0) { + vt_debug_print( + normal, temperedlb, + "best_c_try={}, picked subcluster with id={} for rank ={}\n", + best_c_try, best_id, try_rank + ); + + auto const& [ + give_objs, + give_obj_shared_block, + give_shared_blocks_size, + give_obj_working_bytes + ] = removeClusterToSend(best_id, best_selected); + + auto const this_node = theContext()->getNode(); + + runInEpochRooted("giveSubCluster", [&]{ + proxy_[try_rank].template send<&TemperedLB::giveCluster>( + this_node, + give_shared_blocks_size, + give_objs, + give_obj_shared_block, + give_obj_working_bytes, + -1 + ); + }); + + computeClusterSummary(); + + vt_debug_print( + normal, temperedlb, + "best_c_try={}, sub-cluster sent to rank={}\n", + best_c_try, try_rank + ); + } + + proxy_[try_rank].template send<&TemperedLB::releaseLock>(); + is_swapping_ = false; if (pending_actions_.size() > 0) { @@ -1905,9 +2055,9 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { auto msg = promoteMsg(in_msg); vt_debug_print( - verbose, temperedlb, - "lockObtained: is_locked_={}\n", - is_locked_ + normal, temperedlb, + "lockObtained: is_locked_={}, is_subclustering_={}\n", + is_locked_, is_subclustering_ ); auto cur_epoch = theMsg()->getEpoch(); @@ -1978,22 +2128,54 @@ void TemperedLB::trySubClustering() { auto const this_node = theContext()->getNode(); + vt_print( + temperedlb, + "SUBcluster: load={} max_load={}\n", + this_new_load_, max_load_over_iters_.back() + ); + // Only ranks that are close to max should do this...otherwise its a waste // Very aggressive to start. if ( auto n_iters = max_load_over_iters_.size(); this_new_load_ / max_load_over_iters_[n_iters - 1] > 0.80 ) { + BytesType avg_cluster_bytes = 0; + for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { + auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; + avg_cluster_bytes += src_cluster_bytes; + } + avg_cluster_bytes /= cur_clusters_.size(); + for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { - auto const try_num_clusters = try_clusters.size(); - // Only target ranks where the rank has fewer clusters and are - // underloaded. Random constants for now - if (try_num_clusters < cur_clusters_.size()) { + BytesType total_clusters_bytes = 0; + for (auto const& [try_shared_id, try_cluster] : try_clusters) { + auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; + total_clusters_bytes += try_cluster_bytes; + } + + vt_print( + temperedlb, + "SUBcluster: load={} max_load={}, try_rank={}\n", + this_new_load_, max_load_over_iters_.back(), try_rank + ); + + + // Only target ranks where the target rank has room for the average + // cluster size that this rank has + if (total_clusters_bytes + avg_cluster_bytes < mem_thresh_) { if ( auto target_rank_load = load_info_.find(try_rank)->second; target_rank_load < target_max_load_ ) { + + vt_print( + temperedlb, + "SUBcluster: load={} max_load={}, try_rank={} sending lock\n", + this_new_load_, max_load_over_iters_.back(), try_rank + ); + // c-value is now the ratio of load compared to this rank. prefer // ranks that have less load and have fewer clusters. proxy_[try_rank].template send<&TemperedLB::tryLock>( @@ -2008,6 +2190,19 @@ void TemperedLB::trySubClustering() { // do nothing--not loaded enough, may be a target to put load } + // We have to be very careful here since we will allow some reentrancy here. + constexpr int turn_scheduler_times = 10; + for (int i = 0; i < turn_scheduler_times; i++) { + theSched()->runSchedulerOnceImpl(); + } + + while (not theSched()->workQueueEmpty()) { + theSched()->runSchedulerOnceImpl(); + } + + ready_to_satisfy_locks_ = true; + satisfyLockRequest(); + // Finalize epoch, we have sent our initial round of messages // from here everything is message driven theTerm()->finishedEpoch(lazy_epoch); @@ -2034,6 +2229,7 @@ void TemperedLB::trySubClustering() { } void TemperedLB::swapClusters() { +#if 0 // Do the test to see if we should start sub-clustering. This is probably far // too aggressive. We could check as an conservative check that requires more // computation to see if a cluster is blocking progress. @@ -2054,6 +2250,7 @@ void TemperedLB::swapClusters() { return; } } +#endif n_transfers_swap_ = 0; @@ -2149,6 +2346,7 @@ void TemperedLB::swapClusters() { theSched()->runSchedulerOnceImpl(); } + ready_to_satisfy_locks_ = true; satisfyLockRequest(); // Finalize epoch, we have sent our initial round of messages diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 2396e36e31..8b96a76f6e 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -266,10 +266,12 @@ struct TemperedLB : BaseLB { * associated with removing the cluster * * \param[in] shared_id the shared ID of the cluster to remove + * \param[in] objs the set of objects to send with that shared ID (optional, + * if not specified then send all of them) * * \return a tuple with all the information to send to \c giveCluster */ - auto removeClusterToSend(SharedIDType shared_id); + auto removeClusterToSend(SharedIDType shared_id, std::set objs = {}); private: uint16_t f_ = 0; @@ -356,6 +358,20 @@ struct TemperedLB : BaseLB { } }; + struct ObjLoad { + ObjLoad(ObjIDType in_obj_id, LoadType in_load) + : obj_id(in_obj_id), + load(in_load) + { } + + ObjIDType obj_id = {}; + LoadType load = 0; + + double operator<(ObjLoad const& other) const { + return load < other.load; + } + }; + /// Whether we have memory information bool has_memory_data_ = false; /// Working bytes for this rank @@ -392,6 +408,8 @@ struct TemperedLB : BaseLB { std::vector max_load_over_iters_; /// Whether we are sub-clustering bool is_subclustering_ = false; + /// Ready to satify looks + bool ready_to_satisfy_locks_ = false; }; }}}} /* end namespace vt::vrt::collection::lb */ From d8f75d8d0c5ba41ae65546fbe7768d6abdd16c9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Sat, 9 Dec 2023 08:53:15 -0500 Subject: [PATCH 047/126] #2201: reviewed algorithm and annotated for better legibility --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 8f50af7435..9e3d2fae28 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1852,18 +1852,22 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto const src_after_mem = current_memory_usage_ - src_bytes + try_bytes; auto const try_after_mem = try_total_bytes + src_bytes - try_bytes; + // Check whether strict bounds on memory are satisfied if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { return - std::numeric_limits::infinity(); } + // Compute maximum work of original arrangement auto const before_work_src = this_new_load_; auto const before_work_try = try_total_load; auto const w_max_0 = std::max(before_work_src, before_work_try); + // Compute maximum work of proposed new arrangement auto const after_work_src = this_new_load_ - src_load + try_load; auto const after_work_try = before_work_try + src_load - try_load; auto const w_max_new = std::max(after_work_src, after_work_try); + // Return criterion value return w_max_0 - w_max_new; }; @@ -2258,7 +2262,7 @@ void TemperedLB::swapClusters() { theTerm()->pushEpoch(lazy_epoch); auto criterion = [this](auto src_cluster, auto try_cluster) -> double { - // this does not handle empty cluster swaps + // FIXME: this does not swaps with an empty cluster auto const& [src_id, src_bytes, src_load] = src_cluster; auto const& [try_rank, try_id, try_bytes, try_load, try_mem] = try_cluster; @@ -2362,9 +2366,9 @@ void TemperedLB::swapClusters() { getSharedBlocksHere().size(), mem_thresh_, this_new_load_ ); - int n_rejected = 0; // Report on rejection rate in debug mode + int n_rejected = 0; if (theConfig()->vt_debug_temperedlb) { runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( From c3d3369d5ddbb868d379f0f4ac673ae9412da1a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Sat, 9 Dec 2023 09:40:08 -0500 Subject: [PATCH 048/126] #2201: added tempered criterion when ONLY load transfer is considered --- .../balance/temperedlb/temperedlb.cc | 20 +++++++++++++++++++ .../balance/temperedlb/temperedlb.h | 7 +++++++ 2 files changed, 27 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 9e3d2fae28..3295b0c8df 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1656,6 +1656,22 @@ auto TemperedLB::removeClusterToSend( ); } +double loadTransferCriterion(std::tuple in_values){ + // Compute maximum work of original arrangement + auto const before_w_src = std::get<0>(in_values); + auto const before_w_dst = std::get<1>(in_values); + auto const w_max_0 = std::max(before_w_src, before_w_dst); + + // Compute maximum work of arrangement after load transfer + auto const src_l = std::get<2>(in_values); + auto const after_w_src = before_w_src - src_l; + auto const after_w_dst = before_w_dst + src_l; + auto const w_max_new = std::max(after_w_src, after_w_dst); + + // Return criterion value + return w_max_0 - w_max_new; +} // double loadTransferCriterion + void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { is_swapping_ = true; @@ -1666,18 +1682,22 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { auto const src_after_mem = current_memory_usage_; auto const try_after_mem = try_total_bytes + src_bytes; + // Check whether strict bounds on memory are satisfied if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { return - std::numeric_limits::infinity(); } + // Compute maximum work of original arrangement auto const before_work_src = this_new_load_; auto const before_work_try = try_total_load; auto const w_max_0 = std::max(before_work_src, before_work_try); + // Compute maximum work of proposed new arrangement auto const after_work_src = this_new_load_ - src_load; auto const after_work_try = before_work_try + src_load; auto const w_max_new = std::max(after_work_src, after_work_try); + // Return criterion value return w_max_0 - w_max_new; }; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 8b96a76f6e..458a820388 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -217,6 +217,13 @@ struct TemperedLB : BaseLB { */ void lockObtained(LockedInfoMsg* msg); + /** + * \brief Compute tempered criterion when only load transfer is considered + * + * \param[in] in_value contains: source work, destination work, transferred load + */ + double loadTransferCriterion(std::tuple in_values); + /** * \brief Consider possible swaps with all the up-to-date info from a rank * From 794b9c5365208fb3736649f1aaca571161bc9b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Sat, 9 Dec 2023 15:32:10 -0500 Subject: [PATCH 049/126] #2201: factored out computations of load-based tempered criterion --- .../balance/temperedlb/temperedlb.cc | 44 +++---------------- 1 file changed, 7 insertions(+), 37 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 3295b0c8df..4285590dfe 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1656,7 +1656,7 @@ auto TemperedLB::removeClusterToSend( ); } -double loadTransferCriterion(std::tuple in_values){ +double TemperedLB::loadTransferCriterion(std::tuple in_values){ // Compute maximum work of original arrangement auto const before_w_src = std::get<0>(in_values); auto const before_w_dst = std::get<1>(in_values); @@ -1687,18 +1687,8 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { return - std::numeric_limits::infinity(); } - // Compute maximum work of original arrangement - auto const before_work_src = this_new_load_; - auto const before_work_try = try_total_load; - auto const w_max_0 = std::max(before_work_src, before_work_try); - - // Compute maximum work of proposed new arrangement - auto const after_work_src = this_new_load_ - src_load; - auto const after_work_try = before_work_try + src_load; - auto const w_max_new = std::max(after_work_src, after_work_try); - - // Return criterion value - return w_max_0 - w_max_new; + // Return load transfer criterion + return loadTransferCriterion(std::make_tuple(this_new_load_, try_total_load, src_load)); }; auto const& try_clusters = msg->locked_clusters; @@ -1877,18 +1867,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { return - std::numeric_limits::infinity(); } - // Compute maximum work of original arrangement - auto const before_work_src = this_new_load_; - auto const before_work_try = try_total_load; - auto const w_max_0 = std::max(before_work_src, before_work_try); - - // Compute maximum work of proposed new arrangement - auto const after_work_src = this_new_load_ - src_load + try_load; - auto const after_work_try = before_work_try + src_load - try_load; - auto const w_max_new = std::max(after_work_src, after_work_try); - - // Return criterion value - return w_max_0 - w_max_new; + // Return load transfer criterion + return loadTransferCriterion(std::make_tuple(this_new_load_, try_total_load, src_load)); }; auto const& try_clusters = msg->locked_clusters; @@ -2291,18 +2271,8 @@ void TemperedLB::swapClusters() { return - std::numeric_limits::infinity(); } - // Compute maximum work of original arrangement - auto const before_work_src = this_new_load_; - auto const before_work_try = load_info_.find(try_rank)->second; - auto const w_max_0 = std::max(before_work_src, before_work_try); - - // Compute maximum work of proposed new arrangement - auto const after_work_src = this_new_load_ - src_load + try_load; - auto const after_work_try = before_work_try + src_load - try_load; - auto const w_max_new = std::max(after_work_src, after_work_try); - - // Return criterion value - return w_max_0 - w_max_new; + // Return load transfer criterion + return loadTransferCriterion(std::make_tuple(this_new_load_, load_info_.find(try_rank)->second, src_load)); }; auto const this_node = theContext()->getNode(); From e5e7b4739ea843036f6b549ecc40ff31b2e3295f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Sat, 9 Dec 2023 15:42:48 -0500 Subject: [PATCH 050/126] #2201: trailing whitespace cleanup --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- src/vt/vrt/collection/balance/temperedlb/temperedlb.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 4285590dfe..ed41a79efb 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1667,7 +1667,7 @@ double TemperedLB::loadTransferCriterion(std::tuple in_v auto const after_w_src = before_w_src - src_l; auto const after_w_dst = before_w_dst + src_l; auto const w_max_new = std::max(after_w_src, after_w_dst); - + // Return criterion value return w_max_0 - w_max_new; } // double loadTransferCriterion diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 458a820388..a09ba25515 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -223,7 +223,7 @@ struct TemperedLB : BaseLB { * \param[in] in_value contains: source work, destination work, transferred load */ double loadTransferCriterion(std::tuple in_values); - + /** * \brief Consider possible swaps with all the up-to-date info from a rank * From 4bd78e01e5a6298c5dd9e61f4934a6aa3eb5e7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Mon, 11 Dec 2023 08:01:02 -0500 Subject: [PATCH 051/126] #2201: temperedlb: dedicated method for memory component of criterion --- .../collection/balance/temperedlb/temperedlb.cc | 14 ++++++++++---- .../vrt/collection/balance/temperedlb/temperedlb.h | 10 +++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index ed41a79efb..ea211f48b3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1656,7 +1656,15 @@ auto TemperedLB::removeClusterToSend( ); } -double TemperedLB::loadTransferCriterion(std::tuple in_values){ +bool TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_bytes) { + // FIXME: incomplete implementation that ignores memory regrouping + auto const src_after_mem = this->current_memory_usage_; + auto const try_after_mem = try_total_bytes + src_bytes; + + return not (src_after_mem > this->mem_thresh_ or try_after_mem > this->mem_thresh_); +} // bool memoryTransferCriterion + +double TemperedLB::loadTransferCriterion(std::tuple in_values) { // Compute maximum work of original arrangement auto const before_w_src = std::get<0>(in_values); auto const before_w_dst = std::get<1>(in_values); @@ -1679,11 +1687,9 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { auto const& [src_id, src_bytes, src_load] = src_cluster; auto const& [try_rank, try_total_load, try_total_bytes] = try_cluster; - auto const src_after_mem = current_memory_usage_; - auto const try_after_mem = try_total_bytes + src_bytes; // Check whether strict bounds on memory are satisfied - if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { + if (not memoryTransferCriterion(try_total_bytes, src_bytes)) { return - std::numeric_limits::infinity(); } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index a09ba25515..2bf7d2f263 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -218,7 +218,15 @@ struct TemperedLB : BaseLB { void lockObtained(LockedInfoMsg* msg); /** - * \brief Compute tempered criterion when only load transfer is considered + * \brief Compute memory component of tempered transfer criterion + * + * \param[in] try_total_bytes: total memory bytes on target rank + * \param[in] src_bytes: memory bytes to be transferred from source rank + */ + bool memoryTransferCriterion(double try_total_bytes, double src_bytes); + + /** + * \brief Compute load component of tempered transfer criterion * * \param[in] in_value contains: source work, destination work, transferred load */ From 25a5c0c2e80bd4354da3e2e9db3c470690fdeefb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 12 Dec 2023 16:52:22 -0500 Subject: [PATCH 052/126] #2201: temperedlb: fixed load criterion implementation and usage --- .../balance/temperedlb/temperedlb.cc | 19 ++++++++----------- .../balance/temperedlb/temperedlb.h | 7 +++++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index ea211f48b3..5ea2157f9f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1664,16 +1664,13 @@ bool TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_byte return not (src_after_mem > this->mem_thresh_ or try_after_mem > this->mem_thresh_); } // bool memoryTransferCriterion -double TemperedLB::loadTransferCriterion(std::tuple in_values) { +double TemperedLB::loadTransferCriterion(double before_w_src, double before_w_dst, double src_l, double dst_l) { // Compute maximum work of original arrangement - auto const before_w_src = std::get<0>(in_values); - auto const before_w_dst = std::get<1>(in_values); auto const w_max_0 = std::max(before_w_src, before_w_dst); - // Compute maximum work of arrangement after load transfer - auto const src_l = std::get<2>(in_values); - auto const after_w_src = before_w_src - src_l; - auto const after_w_dst = before_w_dst + src_l; + // Compute maximum work of arrangement after proposed load transfer + auto const after_w_src = before_w_src - src_l + dst_l; + auto const after_w_dst = before_w_dst + src_l - dst_l; auto const w_max_new = std::max(after_w_src, after_w_dst); // Return criterion value @@ -1694,7 +1691,7 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { } // Return load transfer criterion - return loadTransferCriterion(std::make_tuple(this_new_load_, try_total_load, src_load)); + return loadTransferCriterion(this_new_load_, try_total_load, src_load, 0.); }; auto const& try_clusters = msg->locked_clusters; @@ -1874,7 +1871,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { } // Return load transfer criterion - return loadTransferCriterion(std::make_tuple(this_new_load_, try_total_load, src_load)); + return loadTransferCriterion(this_new_load_, try_total_load, src_load, try_load); }; auto const& try_clusters = msg->locked_clusters; @@ -2272,13 +2269,13 @@ void TemperedLB::swapClusters() { auto const& [src_id, src_bytes, src_load] = src_cluster; auto const& [try_rank, try_id, try_bytes, try_load, try_mem] = try_cluster; - // Check whether strict bounds on memory are satisfied + // Necessary but not sufficient check regarding memory bounds if (try_mem - try_bytes + src_bytes > mem_thresh_) { return - std::numeric_limits::infinity(); } // Return load transfer criterion - return loadTransferCriterion(std::make_tuple(this_new_load_, load_info_.find(try_rank)->second, src_load)); + return loadTransferCriterion(this_new_load_, load_info_.find(try_rank)->second, src_load, try_load); }; auto const this_node = theContext()->getNode(); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 2bf7d2f263..84a808b98f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -228,9 +228,12 @@ struct TemperedLB : BaseLB { /** * \brief Compute load component of tempered transfer criterion * - * \param[in] in_value contains: source work, destination work, transferred load + * \param[in] before_w_src: original work on source rank + * \param[in] before_w_dst: original work on destination rank + * \param[in] src_l: sum of object loads to be transferred from source + * \param[in] dst_l: sum of object loads to be transferred from destination */ - double loadTransferCriterion(std::tuple in_values); + double loadTransferCriterion(double before_w_src, double before_w_dst, double src_l, double dst_l); /** * \brief Consider possible swaps with all the up-to-date info from a rank From 40f0d360fb725c67285900f3baf8d6aea56005c7 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 12 Dec 2023 09:51:27 -0800 Subject: [PATCH 053/126] #2201: temperedlb: add ordered locking protocol --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 5ea2157f9f..86b33ee7c3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2040,7 +2040,7 @@ void TemperedLB::giveCluster( void TemperedLB::releaseLock() { vt_debug_print( - verbose, temperedlb, + normal, temperedlb, "releaseLock: pending size={}\n", pending_actions_.size() ); @@ -2081,11 +2081,13 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { theTerm()->consume(cur_epoch); }; - if (is_locked_) { + if (is_locked_ && locking_rank_ <= msg->locked_node) { proxy_[msg->locked_node].template send<&TemperedLB::releaseLock>(); theTerm()->consume(cur_epoch); try_locks_.emplace(msg->locked_node, msg->locked_c_try); //pending_actions_.push_back(action); + } else if (is_locked_) { + pending_actions_.push_back(action); } else if (is_swapping_) { pending_actions_.push_back(action); } else { From 6e8e8cfa78bf96b5db75bc9da8c8fd3d741ab999 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20P=2E=20P=C3=A9ba=C3=BF?= Date: Tue, 19 Dec 2023 13:36:36 -0500 Subject: [PATCH 054/126] #2201: temperedlb: fixed index of phase -2 --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 86b33ee7c3..d7d7574931 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2244,7 +2244,7 @@ void TemperedLB::swapClusters() { // computation to see if a cluster is blocking progress. if (auto const len = max_load_over_iters_.size(); len > 2) { double const i1 = max_load_over_iters_[len-1]; - double const i2 = max_load_over_iters_[len-1]; + double const i2 = max_load_over_iters_[len-2]; vt_debug_print( terse, temperedlb, From 8c343f9f67a367c1798b729cbdbbb03a8be5e888 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 15 Jan 2024 16:31:13 -0800 Subject: [PATCH 055/126] #2201: temperedlb: fix tracing by disabling it during temperedlb --- .../vrt/collection/balance/temperedlb/temperedlb.cc | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index d7d7574931..0a446231e1 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -514,7 +514,18 @@ void TemperedLB::runLB(LoadType total_load) { // Perform load rebalancing when deemed necessary if (should_lb) { - doLBStages(imb); +#if vt_check_enabled(trace_enabled) + theTrace()->disableTracing(); +#endif + + runInEpochCollective("doLBStaged", [&,this]{ + auto this_node = theContext()->getNode(); + proxy_[this_node].template send<&TemperedLB::doLBStages>(imb); + }); + +#if vt_check_enabled(trace_enabled) + theTrace()->enableTracing(); +#endif } } From 33719c76b659aca4b01f4c9f8bb719fe810d51cc Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 15 Jan 2024 16:31:50 -0800 Subject: [PATCH 056/126] #2201: temperedlb: read in comm edges and make graph symmetric --- .../balance/model/composed_model.cc | 4 + .../collection/balance/model/composed_model.h | 1 + .../vrt/collection/balance/model/load_model.h | 11 +++ .../vrt/collection/balance/model/raw_data.cc | 9 +++ .../vrt/collection/balance/model/raw_data.h | 1 + .../balance/temperedlb/temperedlb.cc | 74 +++++++++++++++++++ .../balance/temperedlb/temperedlb.h | 9 +++ 7 files changed, 109 insertions(+) diff --git a/src/vt/vrt/collection/balance/model/composed_model.cc b/src/vt/vrt/collection/balance/model/composed_model.cc index 1090662414..f7b9249692 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.cc +++ b/src/vt/vrt/collection/balance/model/composed_model.cc @@ -102,4 +102,8 @@ int ComposedModel::getNumSubphases() const { return base_->getNumSubphases(); } +CommMapType ComposedModel::getComm(PhaseOffset when) const { + return base_->getComm(when); +} + }}}} diff --git a/src/vt/vrt/collection/balance/model/composed_model.h b/src/vt/vrt/collection/balance/model/composed_model.h index 9330afda65..d8b4f707c7 100644 --- a/src/vt/vrt/collection/balance/model/composed_model.h +++ b/src/vt/vrt/collection/balance/model/composed_model.h @@ -77,6 +77,7 @@ class ComposedModel : public LoadModel bool hasUserData() const override; ElmUserDataType getUserData(ElementIDStruct object, PhaseOffset when) const override; unsigned int getNumPastPhasesNeeded(unsigned int look_back) const override; + CommMapType getComm(PhaseOffset offset) const override; ObjectIterator begin() const override; diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index e58d6e5468..99d2172c62 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -215,6 +215,17 @@ struct LoadModel */ virtual void updateLoads(PhaseType last_completed_phase) = 0; + /** + * \brief Provide all the comm info for a given phase + * + * \param[in] when the interval in which comm is desired + * + * \return the comm info + */ + virtual CommMapType getComm(PhaseOffset when) const { + return CommMapType{}; + } + /** * \brief Provide an estimate of the given object's load during a specified interval * diff --git a/src/vt/vrt/collection/balance/model/raw_data.cc b/src/vt/vrt/collection/balance/model/raw_data.cc index c0209bdd39..b5199ef4b0 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.cc +++ b/src/vt/vrt/collection/balance/model/raw_data.cc @@ -130,6 +130,15 @@ ElmUserDataType RawData::getUserData(ElementIDStruct object, PhaseOffset offset) } } +CommMapType RawData::getComm(PhaseOffset offset) const { + auto phase = getNumCompletedPhases() + offset.phases; + if (auto it = proc_comm_->find(phase); it != proc_comm_->end()) { + return it->second; + } else { + return CommMapType{}; + } +} + unsigned int RawData::getNumPastPhasesNeeded(unsigned int look_back) const { return look_back; diff --git a/src/vt/vrt/collection/balance/model/raw_data.h b/src/vt/vrt/collection/balance/model/raw_data.h index 6d7ec73c21..c7d6bef224 100644 --- a/src/vt/vrt/collection/balance/model/raw_data.h +++ b/src/vt/vrt/collection/balance/model/raw_data.h @@ -64,6 +64,7 @@ struct RawData : public LoadModel { LoadType getRawLoad(ElementIDStruct object, PhaseOffset when) const override; bool hasUserData() const override { return user_data_ != nullptr; } ElmUserDataType getUserData(ElementIDStruct object, PhaseOffset when) const override; + CommMapType getComm(PhaseOffset when) const override; void setLoads(std::unordered_map const* proc_load, std::unordered_map const* proc_comm, diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 0a446231e1..cf6bb56b50 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -684,6 +684,65 @@ void TemperedLB::doLBStages(LoadType start_imb) { cur_objs_[obj] = getModeledValue(obj); } } + + send_edges_.clear(); + recv_edges_.clear(); + bool has_comm = false; + auto const& comm = load_model_->getComm( + {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} + ); + // vt_print(temperedlb, "comm size={} {}\n", comm.size(), typeid(load_model_).name()); + + for (auto const& [key, volume] : comm) { + // vt_print(temperedlb, "Found comm: volume={}\n", volume.bytes); + // Skip self edges + if (key.selfEdge()) { + continue; + } + + if (key.commCategory() == elm::CommCategory::SendRecv) { + auto const from_obj = key.fromObj(); + auto const to_obj = key.toObj(); + auto const bytes = volume.bytes; + + send_edges_[from_obj].emplace_back(to_obj, bytes); + recv_edges_[to_obj].emplace_back(from_obj, bytes); + has_comm = true; + } + } + + runInEpochCollective("checkIfEdgesExist", [&]{ + proxy_.allreduce<&TemperedLB::hasCommAny, collective::OrOp>(has_comm); + }); + + if (has_comm_any_) { + runInEpochCollective("symmEdges", [&]{ + std::unordered_map edges; + + for (auto const& [from_obj, to_edges] : send_edges_) { + for (auto const& [to_obj, volume] : to_edges) { + vt_print( + temperedlb, + "SymmEdges: from={}, to={}, volume={}\n", + from_obj, to_obj, volume + ); + auto curr_from_node = from_obj.getCurrNode(); + if (curr_from_node != this_node) { + edges[curr_from_node][from_obj].emplace_back(to_obj, volume); + } + auto curr_to_node = to_obj.getCurrNode(); + if (curr_to_node != this_node) { + edges[curr_to_node][from_obj].emplace_back(to_obj, volume); + } + } + } + + for (auto const& [dest_node, edge_map] : edges) { + proxy_[dest_node].template send<&TemperedLB::giveEdges>(edge_map); + } + }); + } + this_new_load_ = this_load; } else { // Clear out data structures from previous iteration @@ -831,6 +890,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Clear out for next try or for not migrating by default cur_objs_.clear(); + send_edges_.clear(); + recv_edges_.clear(); this_new_load_ = this_load; } @@ -859,6 +920,19 @@ void TemperedLB::doLBStages(LoadType start_imb) { thunkMigrations(); } +void TemperedLB::giveEdges(EdgeMapType const& edge_map) { + for (auto const& [from_obj, to_edges] : edge_map) { + for (auto const& [to_obj, volume] : to_edges) { + send_edges_[from_obj].emplace_back(to_obj, volume); + recv_edges_[to_obj].emplace_back(from_obj, volume); + } + } +} + +void TemperedLB::hasCommAny(bool has_comm_any) { + has_comm_any_ = has_comm_any; +} + void TemperedLB::loadStatsHandler(std::vector const& vec) { auto const& in = vec[0]; new_imbalance_ = in.I(); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 84a808b98f..43960f51ed 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -67,6 +67,9 @@ struct TemperedLB : BaseLB { using ReduceMsgType = vt::collective::ReduceNoneMsg; using QuantityType = std::map; using StatisticMapType = std::unordered_map; + using EdgeMapType = std::unordered_map< + elm::ElementIDStruct, std::vector> + >; TemperedLB() = default; TemperedLB(TemperedLB const&) = delete; @@ -338,6 +341,8 @@ struct TemperedLB : BaseLB { std::unordered_set underloaded_ = {}; std::unordered_set new_underloaded_ = {}; std::unordered_map cur_objs_ = {}; + EdgeMapType send_edges_; + EdgeMapType recv_edges_; LoadType this_new_load_ = 0.0; LoadType new_imbalance_ = 0.0; LoadType target_max_load_ = 0.0; @@ -355,7 +360,11 @@ struct TemperedLB : BaseLB { std::mt19937 gen_sample_; StatisticMapType stats; LoadType this_load = 0.0f; + /// Whether any node has communication data + bool has_comm_any_ = false; + void hasCommAny(bool has_comm_any); + void giveEdges(EdgeMapType const& edge_map); ////////////////////////////////////////////////////////////////////////////// // All the memory info (may or may not be present) From d72582b9dc30d18dca2513861b862cd2ef2b92f2 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 15 Jan 2024 17:10:41 -0800 Subject: [PATCH 057/126] #2201: temperedlb: add work model and computation for it --- .../balance/temperedlb/temperedlb.cc | 32 +++++++++++++++++++ .../balance/temperedlb/temperedlb.h | 15 +++++++++ 2 files changed, 47 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index cf6bb56b50..99ae04027a 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -281,6 +281,30 @@ Values: Defaut: 0 Description: The memory threshold TemperedLB should strictly stay under which is respected if memory information is present in the user-defined data. +)" + }, + { + "alpha", + R"( +Values: +Defaut: 1.0 +Description: α in the work model +)" + }, + { + "beta", + R"( +Values: +Defaut: 1.0 +Description: β in the work model +)" + }, + { + "gamma", + R"( +Values: +Defaut: 1.0 +Description: γ in the work model )" } }; @@ -381,6 +405,10 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { vtAbort(s); } + α = config->getOrDefault("alpha", α); + β = config->getOrDefault("beta", β); + γ = config->getOrDefault("gamma", γ); + num_iters_ = config->getOrDefault("iters", num_iters_); num_trials_ = config->getOrDefault("trials", num_trials_); @@ -1741,6 +1769,10 @@ auto TemperedLB::removeClusterToSend( ); } +double TemperedLB::computeWork(double load, double comm_bytes) const { + return α * load + β * comm_bytes + γ; +} + bool TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_bytes) { // FIXME: incomplete implementation that ignores memory regrouping auto const src_after_mem = this->current_memory_usage_; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 43960f51ed..f8899a5451 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -238,6 +238,18 @@ struct TemperedLB : BaseLB { */ double loadTransferCriterion(double before_w_src, double before_w_dst, double src_l, double dst_l); + /** + * \brief Compute the amount of work based on the work model + * + * \note Model: α * load + β * comm_bytes + γ + * + * \param[in] load the load for a rank + * \param[in] comm_bytes the external communication + * + * \return the amount of work + */ + double computeWork(double load, double comm_bytes) const; + /** * \brief Consider possible swaps with all the up-to-date info from a rank * @@ -355,6 +367,9 @@ struct TemperedLB : BaseLB { KnowledgeEnum knowledge_ = KnowledgeEnum::Log; bool setup_done_ = false; bool propagate_next_round_ = false; + double α = 1.0; + double β = 0.0; + double γ = 0.0; std::vector propagated_k_; std::mt19937 gen_propagate_; std::mt19937 gen_sample_; From e4ee44c63f132a9b20e8914479beac724e2dc7a6 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 16 Jan 2024 16:22:03 -0800 Subject: [PATCH 058/126] #2201: temperedlb: add shared IDs to element communication --- src/vt/elm/elm_comm.h | 27 ++++++++++-- src/vt/elm/elm_lb_data.cc | 18 ++++++++ src/vt/elm/elm_lb_data.h | 3 ++ .../vrt/collection/balance/lb_data_holder.cc | 44 +++++++++++++++++++ src/vt/vrt/collection/balance/node_lb_data.cc | 3 ++ 5 files changed, 91 insertions(+), 4 deletions(-) diff --git a/src/vt/elm/elm_comm.h b/src/vt/elm/elm_comm.h index 692d06700a..cdc2b2fc97 100644 --- a/src/vt/elm/elm_comm.h +++ b/src/vt/elm/elm_comm.h @@ -58,7 +58,9 @@ enum struct CommCategory : int8_t { CollectionToNodeBcast = 5, NodeToCollectionBcast = 6, CollectiveToCollectionBcast = 7, - LocalInvoke = 8 + LocalInvoke = 8, + WriteShared = 9, + ReadOnlyShared = 10 }; inline NodeType objGetNode(ElementIDStruct const id) { @@ -71,6 +73,8 @@ struct CommKey { struct CollectionTag { }; struct CollectionToNodeTag { }; struct NodeToCollectionTag { }; + struct WriteSharedTag { }; + struct ReadOnlySharedTag { }; CommKey() = default; CommKey(CommKey const&) = default; @@ -107,12 +111,25 @@ struct CommKey { cat_(bcast ? CommCategory::NodeToCollectionBcast : CommCategory::NodeToCollection) { } + CommKey( + WriteSharedTag, + NodeType in_home, int in_shared_id + ) : nto_(in_home), shared_id_(in_shared_id), cat_(CommCategory::WriteShared) + { } + + CommKey( + ReadOnlySharedTag, + NodeType in_home, int in_shared_id + ) : nto_(in_home), shared_id_(in_shared_id), cat_(CommCategory::ReadOnlyShared) + { } + ElementIDStruct from_ = {}; ElementIDStruct to_ = {}; ElementIDStruct edge_id_ = {}; NodeType nfrom_ = uninitialized_destination; NodeType nto_ = uninitialized_destination; + int shared_id_ = -1; CommCategory cat_ = CommCategory::SendRecv; ElementIDStruct fromObj() const { return from_; } @@ -121,6 +138,7 @@ struct CommKey { ElementIDType toNode() const { return nto_; } ElementIDStruct edgeID() const { return edge_id_; } CommCategory commCategory() const { return cat_; } + int sharedID() const { return shared_id_; } bool selfEdge() const { return cat_ == CommCategory::SendRecv and from_ == to_; } bool offNode() const { @@ -140,12 +158,12 @@ struct CommKey { return k.from_ == from_ and k.to_ == to_ and k.nfrom_ == nfrom_ and k.nto_ == nto_ and - k.cat_ == cat_; + k.cat_ == cat_ and k.shared_id_ == shared_id_; } template void serialize(SerializerT& s) { - s | from_ | to_ | nfrom_ | nto_ | cat_ | edge_id_; + s | from_ | to_ | nfrom_ | nto_ | cat_ | edge_id_ | shared_id_; } }; @@ -189,7 +207,8 @@ struct hash { size_t operator()(vt::elm::CommKey const& in) const { return std::hash()( std::hash()(in.from_) ^ - std::hash()(in.to_) ^ in.nfrom_ ^ in.nto_ + std::hash()(in.to_) ^ in.nfrom_ ^ in.nto_ ^ + in.shared_id_ ); } }; diff --git a/src/vt/elm/elm_lb_data.cc b/src/vt/elm/elm_lb_data.cc index 71796b3b60..4387d8c3fd 100644 --- a/src/vt/elm/elm_lb_data.cc +++ b/src/vt/elm/elm_lb_data.cc @@ -86,6 +86,24 @@ void ElementLBData::sendToEntity( sendComm(key, bytes); } +void ElementLBData::addWritableSharedID( + NodeType home, int shared_id, double bytes +) { + elm::CommKey key(elm::CommKey::WriteSharedTag{}, home, shared_id); + phase_comm_[cur_phase_][key].sendMsg(bytes); + subphase_comm_[cur_phase_].resize(cur_subphase_ + 1); + subphase_comm_[cur_phase_].at(cur_subphase_)[key].sendMsg(bytes); +} + +void ElementLBData::addReadOnlySharedID( + NodeType home, int shared_id, double bytes +) { + elm::CommKey key(elm::CommKey::ReadOnlySharedTag{}, home, shared_id); + phase_comm_[cur_phase_][key].sendMsg(bytes); + subphase_comm_[cur_phase_].resize(cur_subphase_ + 1); + subphase_comm_[cur_phase_].at(cur_subphase_)[key].sendMsg(bytes); +} + void ElementLBData::sendComm(elm::CommKey key, double bytes) { phase_comm_[cur_phase_][key].sendMsg(bytes); subphase_comm_[cur_phase_].resize(cur_subphase_ + 1); diff --git a/src/vt/elm/elm_lb_data.h b/src/vt/elm/elm_lb_data.h index 01b6c4c8b6..e7b0c3e3f3 100644 --- a/src/vt/elm/elm_lb_data.h +++ b/src/vt/elm/elm_lb_data.h @@ -72,6 +72,9 @@ struct ElementLBData { void sendToEntity(ElementIDStruct to, ElementIDStruct from, double bytes); void sendComm(elm::CommKey key, double bytes); + void addWritableSharedID(NodeType home, int shared_id, double bytes); + void addReadOnlySharedID(NodeType home, int shared_id, double bytes); + void recvComm(elm::CommKey key, double bytes); void recvObjData( ElementIDStruct to_perm, diff --git a/src/vt/vrt/collection/balance/lb_data_holder.cc b/src/vt/vrt/collection/balance/lb_data_holder.cc index 701d3d27bb..9a7797eff6 100644 --- a/src/vt/vrt/collection/balance/lb_data_holder.cc +++ b/src/vt/vrt/collection/balance/lb_data_holder.cc @@ -296,6 +296,22 @@ std::unique_ptr LBDataHolder::toJson(PhaseType phase) const { outputEntity(j["communications"][i]["from"], key.fromObj()); break; } + case elm::CommCategory::ReadOnlyShared: { + j["communications"][i]["type"] = "ReadOnlyShared"; + j["communications"][i]["to"]["type"] = "node"; + j["communications"][i]["to"]["id"] = key.toNode(); + j["communications"][i]["from"]["type"] = "shared_id"; + j["communications"][i]["from"]["id"] = key.sharedID(); + break; + } + case elm::CommCategory::WriteShared: { + j["communications"][i]["type"] = "ReadOnlyShared"; + j["communications"][i]["to"]["type"] = "node"; + j["communications"][i]["to"]["id"] = key.toNode(); + j["communications"][i]["from"]["type"] = "shared_id"; + j["communications"][i]["from"]["id"] = key.sharedID(); + break; + } case elm::CommCategory::LocalInvoke: case elm::CommCategory::CollectiveToCollectionBcast: // not currently supported @@ -476,6 +492,34 @@ LBDataHolder::LBDataHolder(nlohmann::json const& j) ); CommVolume vol{bytes, messages}; this->node_comm_[id][key] = vol; + } else if ( + type == "ReadOnlyShared" or type == "WriteShared" + ) { + vtAssertExpr(comm["from"]["type"] == "shared_id"); + vtAssertExpr(comm["to"]["type"] == "node"); + + CommVolume vol{bytes, messages}; + auto to_node = comm["to"]["id"]; + vtAssertExpr(to_node.is_number()); + + auto from_shared_id = comm["from"]["id"]; + vtAssertExpr(from_shared_id.is_number()); + + if (type == "ReadOnlyShared") { + CommKey key( + CommKey::ReadOnlySharedTag{}, + static_cast(to_node), + static_cast(from_shared_id) + ); + this->node_comm_[id][key] = vol; + } else { + CommKey key( + CommKey::WriteSharedTag{}, + static_cast(to_node), + static_cast(from_shared_id) + ); + this->node_comm_[id][key] = vol; + } } } } diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index 6805f1e933..71e72e4aed 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -273,6 +273,9 @@ getRecvSendDirection(elm::CommKeyType const& comm) { case elm::CommCategory::CollectiveToCollectionBcast: case elm::CommCategory::LocalInvoke: return std::make_pair(ElementIDType{}, ElementIDType{}); + case elm::CommCategory::WriteShared: + case elm::CommCategory::ReadOnlyShared: + return std::make_pair(ElementIDType{}, ElementIDType{}); } vtAssert(false, "Invalid balance::CommCategory enum value"); From befcef5429f998ee48c11b6a3a1023510deee0cf Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 16 Jan 2024 16:31:37 -0800 Subject: [PATCH 059/126] #2201: temperedlb: add getter for rank-based LB data and elm ID for communicating to it --- src/vt/messaging/active.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/vt/messaging/active.h b/src/vt/messaging/active.h index 554b89b12a..73db04f58b 100644 --- a/src/vt/messaging/active.h +++ b/src/vt/messaging/active.h @@ -1722,6 +1722,18 @@ struct ActiveMessenger : runtime::component::PollableComponent MsgSizeType const msg_size ); + /** + * \brief Get the rank-based LB data along with element ID for rank-based work + * + * \return tuple with pointers to each one + */ + auto getRankLBData() { + return std::make_tuple( + &bare_handler_dummy_elm_id_for_lb_data_, + &bare_handler_lb_data_ + ); + } + private: # if vt_check_enabled(trace_enabled) trace::UserEventIDType trace_irecv = trace::no_user_event_id; From 66f2e323dc97d9a34bf1b214f69d8ec346c611c1 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 16 Jan 2024 16:45:13 -0800 Subject: [PATCH 060/126] #2201: temperedlb: add computation for work model for given distribution --- src/vt/elm/elm_id.cc | 5 + src/vt/elm/elm_id.h | 1 + .../balance/temperedlb/temperedlb.cc | 132 +++++++++++++++--- .../balance/temperedlb/temperedlb.h | 26 +++- 4 files changed, 146 insertions(+), 18 deletions(-) diff --git a/src/vt/elm/elm_id.cc b/src/vt/elm/elm_id.cc index 089ed2953e..a9303a2f3e 100644 --- a/src/vt/elm/elm_id.cc +++ b/src/vt/elm/elm_id.cc @@ -41,6 +41,7 @@ //@HEADER */ +#include "vt/context/context.h" #include "vt/elm/elm_id.h" #include "vt/elm/elm_id_bits.h" @@ -58,4 +59,8 @@ NodeType ElementIDStruct::getCurrNode() const { return curr_node; } +bool ElementIDStruct::isLocatedOnThisNode() const { + return theContext()->getNode() == curr_node and not isMigratable(); +} + }} /* end namespace vt::elm */ diff --git a/src/vt/elm/elm_id.h b/src/vt/elm/elm_id.h index d4baf3446a..17fd2a6db3 100644 --- a/src/vt/elm/elm_id.h +++ b/src/vt/elm/elm_id.h @@ -78,6 +78,7 @@ struct ElementIDStruct { bool isMigratable() const; NodeType getHomeNode() const; NodeType getCurrNode() const; + bool isLocatedOnThisNode() const; }; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 99ae04027a..3894253dcf 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -288,7 +288,7 @@ respected if memory information is present in the user-defined data. R"( Values: Defaut: 1.0 -Description: α in the work model +Description: α in the work model (load in work model) )" }, { @@ -296,7 +296,7 @@ Description: α in the work model R"( Values: Defaut: 1.0 -Description: β in the work model +Description: β in the work model (inter-node communication in work model) )" }, { @@ -304,7 +304,15 @@ Description: β in the work model R"( Values: Defaut: 1.0 -Description: γ in the work model +Description: γ in the work model (constant in work model) +)" + }, + { + "delta", + R"( +Values: +Defaut: 1.0 +Description: δ in the work model (intra-node communication in work model) )" } }; @@ -408,6 +416,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { α = config->getOrDefault("alpha", α); β = config->getOrDefault("beta", β); γ = config->getOrDefault("gamma", γ); + δ = config->getOrDefault("delta", δ); num_iters_ = config->getOrDefault("iters", num_iters_); num_trials_ = config->getOrDefault("trials", num_trials_); @@ -546,7 +555,7 @@ void TemperedLB::runLB(LoadType total_load) { theTrace()->disableTracing(); #endif - runInEpochCollective("doLBStaged", [&,this]{ + runInEpochCollective("doLBStages", [&,this]{ auto this_node = theContext()->getNode(); proxy_[this_node].template send<&TemperedLB::doLBStages>(imb); }); @@ -673,6 +682,79 @@ std::set TemperedLB::getSharedBlocksHere() const { return blocks_here; } +void TemperedLB::workStatsHandler(std::vector const& vec) { + auto const& work = vec[1]; + work_mean_ = work.avg(); + work_max_ = work.max(); + new_work_imbalance_ = work.I(); +} + +double TemperedLB::computeWork( + double load, double inter_comm_bytes, double intra_comm_bytes +) const { + return α * load + β * inter_comm_bytes + δ * intra_comm_bytes + γ; +} + +double TemperedLB::computeRankWork( + std::set exclude, std::set include +) { + auto const load = this_new_load_; + + // Communication bytes sent/recv'ed within the rank + double intra_rank_bytes_sent = 0, intra_rank_bytes_recv = 0; + // Communication bytes sent/recv'ed off rank + double inter_rank_bytes_sent = 0, inter_rank_bytes_recv = 0; + + for (auto const& [obj, _] : cur_objs_) { + if (auto it = send_edges_.find(obj); it != send_edges_.end()) { + for (auto const& [target, volume] : it->second) { + vt_debug_print( + verbose, temperedlb, + "computeRankWork: send obj={}, target={}\n", + obj, target + ); + if ( + cur_objs_.find(target) != cur_objs_.end() or + target.isLocatedOnThisNode() + ) { + intra_rank_bytes_sent += volume; + } else { + inter_rank_bytes_sent += volume; + } + } + } + if (auto it = recv_edges_.find(obj); it != recv_edges_.end()) { + for (auto const& [target, volume] : it->second) { + vt_debug_print( + verbose, temperedlb, + "computeRankWork: recv obj={}, target={}\n", + obj, target + ); + if ( + cur_objs_.find(target) != cur_objs_.end() or + target.isLocatedOnThisNode() + ) { + intra_rank_bytes_recv += volume; + } else { + inter_rank_bytes_recv += volume; + } + } + } + } + + vt_print( + temperedlb, + "computeRankWork: intra sent={}, recv={}, inter sent={}, recv={}\n", + intra_rank_bytes_sent, intra_rank_bytes_recv, + inter_rank_bytes_sent, inter_rank_bytes_recv + ); + + auto const inter_vol = std::max(inter_rank_bytes_sent, inter_rank_bytes_recv); + auto const intra_vol = std::max(intra_rank_bytes_sent, intra_rank_bytes_recv); + + return computeWork(load, inter_vol, intra_vol); +} + void TemperedLB::doLBStages(LoadType start_imb) { decltype(this->cur_objs_) best_objs; LoadType best_load = 0; @@ -708,9 +790,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Copy this node's object assignments to a local, mutable copy cur_objs_.clear(); for (auto obj : *load_model_) { - if (obj.isMigratable()) { - cur_objs_[obj] = getModeledValue(obj); - } + cur_objs_[obj] = getModeledValue(obj); } send_edges_.clear(); @@ -772,6 +852,18 @@ void TemperedLB::doLBStages(LoadType start_imb) { } this_new_load_ = this_load; + this_work = this_new_work_ = computeRankWork(); + + runInEpochCollective("TemperedLB::doLBStages -> Rank_load_modeled", [=] { + // Perform the reduction for Rank_load_modeled -> processor load only + proxy_.allreduce<&TemperedLB::workStatsHandler, collective::PlusOp>( + std::vector{ + {balance::LoadData{Statistic::Rank_load_modeled, this_new_load_}}, + {balance::LoadData{Statistic::Rank_strategy_specific_load_modeled, this_new_work_}} + } + ); + }); + } else { // Clear out data structures from previous iteration selected_.clear(); @@ -884,11 +976,13 @@ void TemperedLB::doLBStages(LoadType start_imb) { (iter_ == num_iters_ - 1) || transfer_type_ == TransferTypeEnum::SwapClusters ) { + this_new_work_ = computeRankWork(); runInEpochCollective("TemperedLB::doLBStages -> Rank_load_modeled", [=] { // Perform the reduction for Rank_load_modeled -> processor load only proxy_.allreduce<&TemperedLB::loadStatsHandler, collective::PlusOp>( std::vector{ - {balance::LoadData{Statistic::Rank_load_modeled, this_new_load_}} + {balance::LoadData{Statistic::Rank_load_modeled, this_new_load_}}, + {balance::LoadData{Statistic::Rank_strategy_specific_load_modeled, this_new_work_}} } ); }); @@ -963,22 +1057,32 @@ void TemperedLB::hasCommAny(bool has_comm_any) { void TemperedLB::loadStatsHandler(std::vector const& vec) { auto const& in = vec[0]; + auto const& work = vec[1]; new_imbalance_ = in.I(); + work_mean_ = work.avg(); + work_max_ = work.max(); + new_work_imbalance_ = work.I(); + max_load_over_iters_.push_back(in.max()); auto this_node = theContext()->getNode(); if (this_node == 0) { vt_debug_print( terse, temperedlb, - "TemperedLB::loadStatsHandler: trial={} iter={} max={} min={} " - "avg={} pole={} imb={:0.4f}\n", - trial_, iter_, LoadType(in.max()), + "TemperedLB::loadStatsHandler: trial={} iter={}" + " Load[max={:0.2f} min={:0.2f} avg={:0.2f} pole={:0.2f} imb={:0.4f}] " + " Work[max={:0.2f} min={:0.2f} avg={:0.2f} imb={:0.4f}]\n", + trial_, iter_, + LoadType(in.max()), LoadType(in.min()), LoadType(in.avg()), LoadType(stats.at( lb::Statistic::Object_load_modeled ).at(lb::StatisticQuantity::max)), - in.I() + in.I(), + LoadType(work.max()), + LoadType(work.min()), LoadType(work.avg()), + work.I() ); } } @@ -1769,10 +1873,6 @@ auto TemperedLB::removeClusterToSend( ); } -double TemperedLB::computeWork(double load, double comm_bytes) const { - return α * load + β * comm_bytes + γ; -} - bool TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_bytes) { // FIXME: incomplete implementation that ignores memory regrouping auto const src_after_mem = this->current_memory_usage_; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index f8899a5451..facaa734b3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -118,6 +118,7 @@ struct TemperedLB : BaseLB { void lazyMigrateObjsTo(EpochType epoch, NodeType node, ObjsType const& objs); void inLazyMigrations(balance::LazyMigrationMsg* msg); void loadStatsHandler(std::vector const& vec); + void workStatsHandler(std::vector const& vec); void rejectionStatsHandler(int n_rejected, int n_transfers); void thunkMigrations(); @@ -241,14 +242,29 @@ struct TemperedLB : BaseLB { /** * \brief Compute the amount of work based on the work model * - * \note Model: α * load + β * comm_bytes + γ + * \note Model: α * load + β * inter_comm_bytes + δ * intra_comm_bytes + γ * * \param[in] load the load for a rank * \param[in] comm_bytes the external communication * * \return the amount of work */ - double computeWork(double load, double comm_bytes) const; + double computeWork( + double load, double inter_comm_bytes, double intra_comm_bytes + ) const; + + /** + * \brief Compute the rank's work + * + * \param[in] exclude a set of objects to exclude that are in cur_objs_ + * \param[in] include a set of objects to include that are not in cur_objs_ + * + * \return the amount of work currently for the set of objects + */ + double computeRankWork( + std::set exclude = {}, + std::set include = {} + ); /** * \brief Consider possible swaps with all the up-to-date info from a rank @@ -356,7 +372,11 @@ struct TemperedLB : BaseLB { EdgeMapType send_edges_; EdgeMapType recv_edges_; LoadType this_new_load_ = 0.0; + LoadType this_new_work_ = 0.0; LoadType new_imbalance_ = 0.0; + LoadType new_work_imbalance_ = 0.0; + LoadType work_mean_ = 0.0; + LoadType work_max_ = 0.0; LoadType target_max_load_ = 0.0; CriterionEnum criterion_ = CriterionEnum::ModifiedGrapevine; InformTypeEnum inform_type_ = InformTypeEnum::AsyncInform; @@ -370,11 +390,13 @@ struct TemperedLB : BaseLB { double α = 1.0; double β = 0.0; double γ = 0.0; + double δ = 0.0; std::vector propagated_k_; std::mt19937 gen_propagate_; std::mt19937 gen_sample_; StatisticMapType stats; LoadType this_load = 0.0f; + LoadType this_work = 0.0f; /// Whether any node has communication data bool has_comm_any_ = false; From 479ec2a1607f0f9d31a18a82db83346a743a5bcd Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 16 Jan 2024 16:54:40 -0800 Subject: [PATCH 061/126] #2201: temperedlb: add rank working bytes to the inform to improve approximate criterion --- .../collection/balance/temperedlb/tempered_msgs.h | 11 ++++++++--- .../vrt/collection/balance/temperedlb/temperedlb.cc | 12 ++++++++---- .../vrt/collection/balance/temperedlb/temperedlb.h | 2 ++ 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index 526bc2bd9b..505156d936 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -55,6 +55,7 @@ using SharedIDType = int; using BytesType = double; using ClusterSummaryType = std::unordered_map>; +using RankSummaryType = std::tuple; } /* end namespace vt::vrt::collection::lb */ @@ -66,7 +67,7 @@ struct LoadMsg : vt::Message { using NodeLoadType = std::unordered_map; using NodeClusterSummaryType = - std::unordered_map; + std::unordered_map; LoadMsg() = default; LoadMsg(NodeType in_from_node, NodeLoadType const& in_node_load) @@ -85,8 +86,12 @@ struct LoadMsg : vt::Message { node_load_[node] = load; } - void addNodeClusters(NodeType node, lb::ClusterSummaryType summary) { - node_cluster_summary_[node] = summary; + void addNodeClusters( + NodeType node, + lb::BytesType rank_working_bytes, + lb::ClusterSummaryType summary + ) { + node_cluster_summary_[node] = std::make_tuple(rank_working_bytes, summary); } NodeType getFromNode() const { return from_node_; } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 3894253dcf..abbad8d336 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1283,7 +1283,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { } msg->addNodeLoad(this_node, this_new_load_); if (has_memory_data_) { - msg->addNodeClusters(this_node, cur_clusters_); + msg->addNodeClusters(this_node, rank_bytes_, cur_clusters_); } proxy_[random_node].sendMsg< LoadMsgSync, &TemperedLB::propagateIncomingSync @@ -1296,7 +1296,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { } msg->addNodeLoad(this_node, this_new_load_); if (has_memory_data_) { - msg->addNodeClusters(this_node, cur_clusters_); + msg->addNodeClusters(this_node, rank_bytes_, cur_clusters_); } proxy_[random_node].sendMsg< LoadMsgAsync, &TemperedLB::propagateIncomingAsync @@ -1317,12 +1317,14 @@ void TemperedLB::propagateIncomingAsync(LoadMsgAsync* msg) { ); auto const this_node = theContext()->getNode(); - for (auto const& [node, clusters] : msg->getNodeClusterSummary()) { + for (auto const& [node, rank_summary] : msg->getNodeClusterSummary()) { if ( node != this_node and other_rank_clusters_.find(node) == other_rank_clusters_.end() ) { + auto const& [rank_working_bytes, clusters] = rank_summary; other_rank_clusters_[node] = clusters; + other_rank_working_bytes_[node] = rank_working_bytes; } } @@ -1361,12 +1363,14 @@ void TemperedLB::propagateIncomingSync(LoadMsgSync* msg) { ); auto const this_node = theContext()->getNode(); - for (auto const& [node, clusters] : msg->getNodeClusterSummary()) { + for (auto const& [node, rank_summary] : msg->getNodeClusterSummary()) { if ( node != this_node and other_rank_clusters_.find(node) == other_rank_clusters_.end() ) { + auto const& [rank_working_bytes, clusters] = rank_summary; other_rank_clusters_[node] = clusters; + other_rank_working_bytes_[node] = rank_working_bytes; } } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index facaa734b3..eb2a0613b8 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -450,6 +450,8 @@ struct TemperedLB : BaseLB { ClusterSummaryType cur_clusters_; /// Clusters that we know of on other ranks (might be out of date) std::unordered_map other_rank_clusters_; + /// Working bytes for ranks we know about (never out of date) + std::unordered_map other_rank_working_bytes_; /// User-defined memory threshold BytesType mem_thresh_ = 0; /// The max working bytes for an object currently residing here From 4a07f8c6feb9980cddb4cee60bb0851a90e3dc6b Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 16 Jan 2024 16:56:40 -0800 Subject: [PATCH 062/126] #2201: temperedlb: use exact working bytes for approximation of memory usage --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index abbad8d336..95865d862f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2507,8 +2507,9 @@ void TemperedLB::swapClusters() { for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { bool found_potential_good_swap = false; - // Approximate roughly the memory usage on the target - BytesType try_approx_mem_usage = rank_bytes_; + // Approximate the memory usage on the target + BytesType try_approx_mem_usage = + other_rank_working_bytes_.find(try_rank)->second; for (auto const& [try_shared_id, try_cluster] : try_clusters) { auto const& [try_cluster_bytes, _] = try_cluster; try_approx_mem_usage += try_cluster_bytes; From 986d08e76d9e1abc14588d86477134e92d3f50fb Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 17 Jan 2024 10:27:40 -0800 Subject: [PATCH 063/126] #2201: temperedlb: make getRankLBData public --- src/vt/messaging/active.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vt/messaging/active.h b/src/vt/messaging/active.h index 73db04f58b..8721080054 100644 --- a/src/vt/messaging/active.h +++ b/src/vt/messaging/active.h @@ -1722,6 +1722,7 @@ struct ActiveMessenger : runtime::component::PollableComponent MsgSizeType const msg_size ); +public: /** * \brief Get the rank-based LB data along with element ID for rank-based work * From 77d738031aff7ec5a9283df4dc13eadd2941dc53 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 17 Jan 2024 10:29:37 -0800 Subject: [PATCH 064/126] #2201: temperedlb: fix copy-paste error in json type --- src/vt/vrt/collection/balance/lb_data_holder.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/lb_data_holder.cc b/src/vt/vrt/collection/balance/lb_data_holder.cc index 9a7797eff6..74bdf92aa1 100644 --- a/src/vt/vrt/collection/balance/lb_data_holder.cc +++ b/src/vt/vrt/collection/balance/lb_data_holder.cc @@ -305,7 +305,7 @@ std::unique_ptr LBDataHolder::toJson(PhaseType phase) const { break; } case elm::CommCategory::WriteShared: { - j["communications"][i]["type"] = "ReadOnlyShared"; + j["communications"][i]["type"] = "WriteShared"; j["communications"][i]["to"]["type"] = "node"; j["communications"][i]["to"]["id"] = key.toNode(); j["communications"][i]["from"]["type"] = "shared_id"; From 89fc45838d1b0b52957d2b4dd5666269e1406462 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 17 Jan 2024 15:19:18 -0800 Subject: [PATCH 065/126] #2201: temperedlb: fully implement the new work model for cluster swaps --- .../balance/temperedlb/tempered_msgs.h | 95 ++- .../balance/temperedlb/temperedlb.cc | 595 +++++++++++++----- .../balance/temperedlb/temperedlb.h | 70 ++- 3 files changed, 555 insertions(+), 205 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index 505156d936..5bcadb9458 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -46,6 +46,8 @@ #include "vt/config.h" +#include + #include #include @@ -53,37 +55,104 @@ namespace vt::vrt::collection::lb { using SharedIDType = int; using BytesType = double; -using ClusterSummaryType = - std::unordered_map>; + +struct ClusterInfo { + LoadType load = 0; + BytesType bytes = 0; + double intra_send_vol = 0, intra_recv_vol = 0; + std::unordered_map inter_send_vol, inter_recv_vol; + NodeType home_node = uninitialized_destination; + BytesType edge_weight = 0; + + template + void serialize(SerializerT& s) { + s | load | bytes | intra_send_vol | intra_recv_vol; + s | inter_send_vol | inter_recv_vol; + s | home_node | edge_weight; + } +}; + +struct NodeInfo { + LoadType load = 0; + LoadType work = 0; + double inter_send_vol = 0, inter_recv_vol = 0; + double intra_send_vol = 0, intra_recv_vol = 0; + double shared_vol = 0; + + template + void serialize(SerializerT& s) { + s | load | work; + s | inter_send_vol | inter_recv_vol; + s | intra_send_vol | intra_recv_vol; + s | shared_vol; + } +}; + +using ClusterSummaryType = std::unordered_map; using RankSummaryType = std::tuple; } /* end namespace vt::vrt::collection::lb */ +namespace fmt { + +/// Custom fmt formatter/print for \c vt::vrt::collection::lb::ClusterInfo +template <> +struct formatter<::vt::vrt::collection::lb::ClusterInfo> { + /// Parses format specifications of the form ['x' | 'd' | 'b']. + auto parse(format_parse_context& ctx) -> decltype(ctx.begin()) { + // Parse the presentation format and store it in the formatter: + auto it = ctx.begin(), end = ctx.end(); + + // Check if reached the end of the range: + if (it != end && *it != '}') { + throw format_error("invalid format"); + } + + // Return an iterator past the end of the parsed range: + return it; + } + + /// Formats the epoch using the parsed format specification (presentation) + /// stored in this formatter. + template + auto format( + ::vt::vrt::collection::lb::ClusterInfo const& e, FormatContext& ctx + ) { + auto fmt_str = "(load={},bytes={},intra=({},{})),home={},edge={}"; + return format_to( + ctx.out(), fmt_str, e.load, e.bytes, e.intra_send_vol, e.intra_recv_vol, + e.home_node, e.edge_weight + ); + } +}; + +} /* end namespace fmt */ + namespace vt { namespace vrt { namespace collection { namespace balance { struct LoadMsg : vt::Message { using MessageParentType = vt::Message; vt_msg_serialize_required(); // node_load_ - using NodeLoadType = std::unordered_map; using NodeClusterSummaryType = std::unordered_map; + using NodeInfoType = std::unordered_map; LoadMsg() = default; - LoadMsg(NodeType in_from_node, NodeLoadType const& in_node_load) - : from_node_(in_from_node), node_load_(in_node_load) + LoadMsg(NodeType in_from_node, NodeInfoType const& in_node_info) + : from_node_(in_from_node), node_info_(in_node_info) { } - NodeLoadType const& getNodeLoad() const { - return node_load_; + NodeInfoType const& getNodeInfo() const { + return node_info_; } NodeClusterSummaryType const& getNodeClusterSummary() const { return node_cluster_summary_; } - void addNodeLoad(NodeType node, LoadType load) { - node_load_[node] = load; + void addNodeInfo(NodeType node, lb::NodeInfo info) { + node_info_[node] = info; } void addNodeClusters( @@ -100,13 +169,13 @@ struct LoadMsg : vt::Message { void serialize(SerializerT& s) { MessageParentType::serialize(s); s | from_node_; - s | node_load_; + s | node_info_; s | node_cluster_summary_; } private: NodeType from_node_ = uninitialized_destination; - NodeLoadType node_load_ = {}; + NodeInfoType node_info_ = {}; NodeClusterSummaryType node_cluster_summary_ = {}; }; @@ -116,9 +185,9 @@ struct LoadMsgAsync : LoadMsg { LoadMsgAsync() = default; LoadMsgAsync( - NodeType in_from_node, NodeLoadType const& in_node_load, int round + NodeType in_from_node, NodeInfoType const& in_node_info, int round ) - : LoadMsg(in_from_node, in_node_load), round_(round) + : LoadMsg(in_from_node, in_node_info), round_(round) { } uint8_t getRound() const { diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 95865d862f..a1aff60462 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -313,6 +313,14 @@ Description: γ in the work model (constant in work model) Values: Defaut: 1.0 Description: δ in the work model (intra-node communication in work model) +)" + }, + { + "zeta", + R"( +Values: +Defaut: 1.0 +Description: ζ in the work model (shared-memory-edges in work model) )" } }; @@ -413,10 +421,11 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { vtAbort(s); } - α = config->getOrDefault("alpha", α); - β = config->getOrDefault("beta", β); - γ = config->getOrDefault("gamma", γ); - δ = config->getOrDefault("delta", δ); + α = config->getOrDefault("alpha", α); + β = config->getOrDefault("beta", β); + γ = config->getOrDefault("gamma", γ); + δ = config->getOrDefault("delta", δ); + ζ = config->getOrDefault("zeta", ζ); num_iters_ = config->getOrDefault("iters", num_iters_); num_trials_ = config->getOrDefault("trials", num_trials_); @@ -567,7 +576,8 @@ void TemperedLB::runLB(LoadType total_load) { } void TemperedLB::readClustersMemoryData() { - if (load_model_->hasUserData()) { + auto const this_node = theContext()->getNode(); + if (load_model_->hasUserData()) { for (auto obj : *load_model_) { if (obj.isMigratable()) { auto data_map = load_model_->getUserData( @@ -621,6 +631,9 @@ void TemperedLB::readClustersMemoryData() { obj_shared_block_[obj] = shared_id; obj_working_bytes_[obj] = working_bytes; shared_block_size_[shared_id] = shared_bytes; + + // @todo: remove this hack once we have good data + shared_block_edge_[shared_id] = std::make_tuple(this_node, shared_bytes); } } } @@ -628,17 +641,77 @@ void TemperedLB::readClustersMemoryData() { void TemperedLB::computeClusterSummary() { cur_clusters_.clear(); + + auto const this_node = theContext()->getNode(); + for (auto const& [shared_id, shared_bytes] : shared_block_size_) { - LoadType cluster_load = 0; + auto const& [home_node, shared_volume] = shared_block_edge_[shared_id]; + + ClusterInfo info; + info.bytes = shared_bytes; + info.home_node = home_node; + info.edge_weight = shared_volume; + + std::set cluster_objs; for (auto const& [obj_id, obj_load] : cur_objs_) { if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { if (iter->second == shared_id) { - cluster_load += obj_load; + cluster_objs.insert(obj_id); + info.load += obj_load; } } } - if (cluster_load != 0) { - cur_clusters_[shared_id] = std::make_tuple(shared_bytes, cluster_load); + + if (info.load != 0) { + for (auto&& obj : cluster_objs) { + if (auto it = send_edges_.find(obj); it != send_edges_.end()) { + for (auto const& [target, volume] : it->second) { + vt_debug_print( + verbose, temperedlb, + "computeClusterSummary: send obj={}, target={}\n", + obj, target + ); + + if (cluster_objs.find(target) != cluster_objs.end()) { + // intra-cluster edge + info.intra_send_vol += volume; + } else if ( + cur_objs_.find(target) != cur_objs_.end() or + target.isLocatedOnThisNode() + ) { + // intra-rank edge + info.inter_send_vol[this_node] += volume; + } else { + // inter-rank edge + info.inter_send_vol[target.getCurrNode()] += volume; + } + } + } + if (auto it = recv_edges_.find(obj); it != recv_edges_.end()) { + for (auto const& [target, volume] : it->second) { + vt_debug_print( + verbose, temperedlb, + "computeClusterSummary: recv obj={}, target={}\n", + obj, target + ); + if (cluster_objs.find(target) != cluster_objs.end()) { + // intra-cluster edge + info.intra_recv_vol += volume; + } else if ( + cur_objs_.find(target) != cur_objs_.end() or + target.isLocatedOnThisNode() + ) { + // intra-rank edge + info.inter_recv_vol[this_node] += volume; + } else { + // inter-rank edge + info.inter_recv_vol[target.getCurrNode()] += volume; + } + } + } + } + + cur_clusters_.emplace(shared_id, std::move(info)); } } } @@ -653,9 +726,6 @@ BytesType TemperedLB::computeMemoryUsage() { } // Compute max object size - // @todo: Slight issue here that this will only count migratable objects - // (those contained in cur_objs), for our current use case this is not a - // problem, but it should include the max of non-migratable double max_object_working_bytes = 0; for (auto const& [obj_id, _] : cur_objs_) { if (obj_working_bytes_.find(obj_id) != obj_working_bytes_.end()) { @@ -690,69 +760,189 @@ void TemperedLB::workStatsHandler(std::vector const& vec) { } double TemperedLB::computeWork( - double load, double inter_comm_bytes, double intra_comm_bytes + double load, double inter_comm_bytes, double intra_comm_bytes, + double shared_comm_bytes ) const { - return α * load + β * inter_comm_bytes + δ * intra_comm_bytes + γ; + // The work model based on input parameters + return + α * load + + β * inter_comm_bytes + + δ * intra_comm_bytes + + ζ * shared_comm_bytes + + γ; } -double TemperedLB::computeRankWork( - std::set exclude, std::set include +WorkBreakdown TemperedLB::computeWorkBreakdown( + NodeType node, + std::unordered_map const& objs, + std::set const& exclude, + std::unordered_map const& include ) { - auto const load = this_new_load_; + double load = 0; // Communication bytes sent/recv'ed within the rank double intra_rank_bytes_sent = 0, intra_rank_bytes_recv = 0; // Communication bytes sent/recv'ed off rank double inter_rank_bytes_sent = 0, inter_rank_bytes_recv = 0; - for (auto const& [obj, _] : cur_objs_) { - if (auto it = send_edges_.find(obj); it != send_edges_.end()) { - for (auto const& [target, volume] : it->second) { - vt_debug_print( - verbose, temperedlb, - "computeRankWork: send obj={}, target={}\n", - obj, target - ); - if ( - cur_objs_.find(target) != cur_objs_.end() or - target.isLocatedOnThisNode() - ) { - intra_rank_bytes_sent += volume; - } else { - inter_rank_bytes_sent += volume; + auto computeEdgeVolumesAndLoad = [&](ObjIDType obj, LoadType obj_load) { + if (exclude.find(obj) == exclude.end()) { + if (auto it = send_edges_.find(obj); it != send_edges_.end()) { + for (auto const& [target, volume] : it->second) { + vt_debug_print( + verbose, temperedlb, + "computeWorkBreakdown: send obj={}, target={}\n", + obj, target + ); + if ( + cur_objs_.find(target) != cur_objs_.end() or + target.isLocatedOnThisNode() + ) { + intra_rank_bytes_sent += volume; + } else { + inter_rank_bytes_sent += volume; + } } } - } - if (auto it = recv_edges_.find(obj); it != recv_edges_.end()) { - for (auto const& [target, volume] : it->second) { - vt_debug_print( - verbose, temperedlb, - "computeRankWork: recv obj={}, target={}\n", - obj, target - ); - if ( - cur_objs_.find(target) != cur_objs_.end() or - target.isLocatedOnThisNode() - ) { - intra_rank_bytes_recv += volume; - } else { - inter_rank_bytes_recv += volume; + if (auto it = recv_edges_.find(obj); it != recv_edges_.end()) { + for (auto const& [target, volume] : it->second) { + vt_debug_print( + verbose, temperedlb, + "computeWorkBreakdown: recv obj={}, target={}\n", + obj, target + ); + if ( + cur_objs_.find(target) != cur_objs_.end() or + target.isLocatedOnThisNode() + ) { + intra_rank_bytes_recv += volume; + } else { + inter_rank_bytes_recv += volume; + } } } } + + load += obj_load; + }; + + for (auto const& [obj, obj_load] : objs) { + computeEdgeVolumesAndLoad(obj, obj_load); } + for (auto const& [obj, obj_load] : include) { + computeEdgeVolumesAndLoad(obj, obj_load); + } + + double shared_volume = 0; + auto const& shared_blocks_here = getSharedBlocksHere(); + + for (auto const& sid : shared_blocks_here) { + if (auto it = shared_block_edge_.find(sid); it != shared_block_edge_.end()) { + auto const& [home_node, volume] = it->second; + if (home_node != node) { + shared_volume += volume; + } + } else { + vtAbort("Could not find shared edge volume!"); + } + } + + auto const inter_vol = std::max(inter_rank_bytes_sent, inter_rank_bytes_recv); + auto const intra_vol = std::max(intra_rank_bytes_sent, intra_rank_bytes_recv); + + WorkBreakdown w; + w.work = computeWork(load, inter_vol, intra_vol, shared_volume); + w.intra_send_vol = intra_rank_bytes_sent; + w.intra_recv_vol = intra_rank_bytes_recv; + w.inter_send_vol = inter_rank_bytes_sent; + w.inter_recv_vol = inter_rank_bytes_recv; + w.shared_vol = shared_volume; + vt_print( temperedlb, - "computeRankWork: intra sent={}, recv={}, inter sent={}, recv={}\n", + "computeWorkBreakdown: load={}, intra sent={}, recv={}," + " inter sent={}, recv={}, shared_vol={}, work={}\n", + load, intra_rank_bytes_sent, intra_rank_bytes_recv, - inter_rank_bytes_sent, inter_rank_bytes_recv + inter_rank_bytes_sent, inter_rank_bytes_recv, + shared_volume, w.work ); - auto const inter_vol = std::max(inter_rank_bytes_sent, inter_rank_bytes_recv); - auto const intra_vol = std::max(intra_rank_bytes_sent, intra_rank_bytes_recv); + return w; +} + +double TemperedLB::computeWorkAfterClusterSwap( + NodeType node, NodeInfo const& info, ClusterInfo const& to_remove, + ClusterInfo const& to_add +) { + // Start with the existing work for the node and work backwards to compute the + // new work with the cluster removed + double node_work = info.work; + + // Remove/add clusters' load factor from work model + node_work -= α * to_remove.load; + node_work += α * to_add.load; + + // Remove/add clusters' intra-comm + double const node_intra_send = info.intra_send_vol; + double const node_intra_recv = info.intra_recv_vol; + node_work -= δ * std::max(node_intra_send, node_intra_recv); + node_work += δ * std::max( + node_intra_send - to_remove.intra_send_vol + to_add.intra_send_vol, + node_intra_recv - to_remove.intra_recv_vol + to_add.intra_recv_vol + ); + + // Uninitialized destination means that the cluster is empty + // If to_remove it was remote, remove that component from the work + if ( + to_remove.home_node != node and + to_remove.home_node != uninitialized_destination + ) { + node_work -= ζ * to_remove.edge_weight; + } + + // If to_add is now remote, add that component to the work + if ( + to_add.home_node != node and + to_add.home_node != uninitialized_destination + ) { + node_work += ζ * to_add.edge_weight; + } + + double node_inter_send = info.inter_send_vol; + double node_inter_recv = info.inter_recv_vol; + node_work -= β * std::max(node_inter_send, node_inter_recv); + + // All edges outside the to_remove cluster that are also off the node need to + // be removed from the inter-node volumes + for (auto const& [target, volume] : to_remove.inter_send_vol) { + if (target != node) { + node_inter_send -= volume; + } + } + for (auto const& [target, volume] : to_remove.inter_recv_vol) { + if (target != node) { + node_inter_recv -= volume; + } + } + + // All edges outside the to_add cluster that are now off the node need to + // be added from the inter-node volumes + for (auto const& [target, volume] : to_add.inter_send_vol) { + if (target != node) { + node_inter_send += volume; + } + } + for (auto const& [target, volume] : to_add.inter_recv_vol) { + if (target != node) { + node_inter_recv += volume; + } + } - return computeWork(load, inter_vol, intra_vol); + node_work += β * std::max(node_inter_send, node_inter_recv); + + return node_work; } void TemperedLB::doLBStages(LoadType start_imb) { @@ -816,6 +1006,18 @@ void TemperedLB::doLBStages(LoadType start_imb) { send_edges_[from_obj].emplace_back(to_obj, bytes); recv_edges_[to_obj].emplace_back(from_obj, bytes); has_comm = true; + } else if (key.commCategory() == elm::CommCategory::WriteShared) { + auto const to_node = key.toNode(); + auto const shared_id = key.sharedID(); + auto const bytes = volume.bytes; + shared_block_edge_[shared_id] = std::make_tuple(to_node, bytes); + has_comm = true; + } else if (key.commCategory() == elm::CommCategory::ReadOnlyShared) { + auto const to_node = key.toNode(); + auto const shared_id = key.sharedID(); + auto const bytes = volume.bytes; + shared_block_edge_[shared_id] = std::make_tuple(to_node, bytes); + has_comm = true; } } @@ -852,7 +1054,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { } this_new_load_ = this_load; - this_work = this_new_work_ = computeRankWork(); + this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); + this_work = this_new_work_ = this_new_breakdown_.work; runInEpochCollective("TemperedLB::doLBStages -> Rank_load_modeled", [=] { // Perform the reduction for Rank_load_modeled -> processor load only @@ -897,12 +1100,11 @@ void TemperedLB::doLBStages(LoadType start_imb) { computeClusterSummary(); // Verbose printing about local clusters - for (auto const& [shared_id, value] : cur_clusters_) { - auto const& [shared_bytes, cluster_load] = value; + for (auto const& [shared_id, cluster_info] : cur_clusters_) { vt_debug_print( verbose, temperedlb, - "Local cluster: id={}, bytes={}, load={}\n", - shared_id, shared_bytes, cluster_load + "Local cluster: id={}: {}\n", + shared_id, cluster_info ); } } @@ -928,12 +1130,11 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Some very verbose printing about all remote clusters we know about that // we can shut off later for (auto const& [node, clusters] : other_rank_clusters_) { - for (auto const& [shared_id, value] : clusters) { - auto const& [shared_bytes, cluster_load] = value; + for (auto const& [shared_id, cluster_info] : clusters) { vt_debug_print( verbose, temperedlb, - "Remote cluster: node={}, id={}, bytes={}, load={}\n", - node, shared_id, shared_bytes, cluster_load + "Remote cluster: node={}, id={}, {}\n", + node, shared_id, cluster_info ); } } @@ -941,9 +1142,10 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Move remove cluster information to shared_block_size_ so we have all // the sizes in the same place for (auto const& [node, clusters] : other_rank_clusters_) { - for (auto const& [shared_id, value] : clusters) { - auto const& [shared_bytes, _] = value; - shared_block_size_[shared_id] = shared_bytes; + for (auto const& [shared_id, cluster_info] : clusters) { + shared_block_size_[shared_id] = cluster_info.bytes; + shared_block_edge_[shared_id] = + std::make_tuple(cluster_info.home_node, cluster_info.edge_weight); } } @@ -976,7 +1178,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { (iter_ == num_iters_ - 1) || transfer_type_ == TransferTypeEnum::SwapClusters ) { - this_new_work_ = computeRankWork(); + this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); + this_new_work_ = this_new_breakdown_.work; runInEpochCollective("TemperedLB::doLBStages -> Rank_load_modeled", [=] { // Perform the reduction for Rank_load_modeled -> processor load only proxy_.allreduce<&TemperedLB::loadStatsHandler, collective::PlusOp>( @@ -1087,7 +1290,9 @@ void TemperedLB::loadStatsHandler(std::vector const& vec) { } } -void TemperedLB::rejectionStatsHandler(int n_rejected, int n_transfers) { +void TemperedLB::rejectionStatsHandler( + int n_rejected, int n_transfers, int n_unhomed_blocks +) { double rej = static_cast(n_rejected) / static_cast(n_rejected + n_transfers) * 100.0; @@ -1095,9 +1300,10 @@ void TemperedLB::rejectionStatsHandler(int n_rejected, int n_transfers) { if (this_node == 0) { vt_debug_print( terse, temperedlb, - "TemperedLB::rejectionStatsHandler: n_transfers={} n_rejected={} " + "TemperedLB::rejectionStatsHandler: n_transfers={} n_unhomed_blocks={}" + " n_rejected={} " "rejection_rate={:0.1f}%\n", - n_transfers, n_rejected, rej + n_transfers, n_unhomed_blocks, n_rejected, rej ); } } @@ -1281,7 +1487,13 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { if (epoch != no_epoch) { envelopeSetEpoch(msg->env, epoch); } - msg->addNodeLoad(this_node, this_new_load_); + NodeInfo info{ + this_new_load_, this_new_work_, + this_new_breakdown_.inter_send_vol, this_new_breakdown_.inter_recv_vol, + this_new_breakdown_.intra_send_vol, this_new_breakdown_.intra_recv_vol, + this_new_breakdown_.shared_vol + }; + msg->addNodeInfo(this_node, info); if (has_memory_data_) { msg->addNodeClusters(this_node, rank_bytes_, cur_clusters_); } @@ -1294,7 +1506,13 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) { if (epoch != no_epoch) { envelopeSetEpoch(msg->env, epoch); } - msg->addNodeLoad(this_node, this_new_load_); + NodeInfo info{ + this_new_load_, this_new_work_, + this_new_breakdown_.inter_send_vol, this_new_breakdown_.inter_recv_vol, + this_new_breakdown_.intra_send_vol, this_new_breakdown_.intra_recv_vol, + this_new_breakdown_.shared_vol + }; + msg->addNodeInfo(this_node, info); if (has_memory_data_) { msg->addNodeClusters(this_node, rank_bytes_, cur_clusters_); } @@ -1313,7 +1531,7 @@ void TemperedLB::propagateIncomingAsync(LoadMsgAsync* msg) { normal, temperedlb, "TemperedLB::propagateIncomingAsync: trial={}, iter={}, k_max={}, " "k_cur={}, from_node={}, load info size={}\n", - trial_, iter_, k_max_, k_cur_async, from_node, msg->getNodeLoad().size() + trial_, iter_, k_max_, k_cur_async, from_node, msg->getNodeInfo().size() ); auto const this_node = theContext()->getNode(); @@ -1328,11 +1546,11 @@ void TemperedLB::propagateIncomingAsync(LoadMsgAsync* msg) { } } - for (auto&& elm : msg->getNodeLoad()) { + for (auto&& elm : msg->getNodeInfo()) { if (load_info_.find(elm.first) == load_info_.end()) { load_info_[elm.first] = elm.second; - if (isUnderloaded(elm.second)) { + if (isUnderloaded(elm.second.load)) { underloaded_.insert(elm.first); } } @@ -1359,7 +1577,7 @@ void TemperedLB::propagateIncomingSync(LoadMsgSync* msg) { normal, temperedlb, "TemperedLB::propagateIncomingSync: trial={}, iter={}, k_max={}, " "k_cur={}, from_node={}, load info size={}\n", - trial_, iter_, k_max_, k_cur_, from_node, msg->getNodeLoad().size() + trial_, iter_, k_max_, k_cur_, from_node, msg->getNodeInfo().size() ); auto const this_node = theContext()->getNode(); @@ -1374,11 +1592,11 @@ void TemperedLB::propagateIncomingSync(LoadMsgSync* msg) { } } - for (auto&& elm : msg->getNodeLoad()) { + for (auto&& elm : msg->getNodeInfo()) { if (new_load_info_.find(elm.first) == new_load_info_.end()) { new_load_info_[elm.first] = elm.second; - if (isUnderloaded(elm.second)) { + if (isUnderloaded(elm.second.load)) { new_underloaded_.insert(elm.first); } } @@ -1413,7 +1631,7 @@ std::vector TemperedLB::createCMF(NodeSetType const& under) { for (auto&& pe : under) { auto iter = load_info_.find(pe); vtAssert(iter != load_info_.end(), "Node must be in load_info_"); - auto load = iter->second; + auto load = iter->second.load; if (load > l_max) { l_max = load; } @@ -1429,7 +1647,7 @@ std::vector TemperedLB::createCMF(NodeSetType const& under) { auto iter = load_info_.find(pe); vtAssert(iter != load_info_.end(), "Node must be in load_info_"); - auto load = iter->second; + auto load = iter->second.load; sum_p += 1. - factor * load; cmf.push_back(sum_p); } @@ -1473,7 +1691,7 @@ NodeType TemperedLB::sampleFromCMF( std::vector TemperedLB::makeUnderloaded() const { std::vector under = {}; for (auto&& elm : load_info_) { - if (isUnderloaded(elm.second)) { + if (isUnderloaded(elm.second.load)) { under.push_back(elm.first); } } @@ -1489,7 +1707,7 @@ std::vector TemperedLB::makeSufficientlyUnderloaded( std::vector sufficiently_under = {}; for (auto&& elm : load_info_) { bool eval = Criterion(criterion_)( - this_new_load_, elm.second, load_to_accommodate, target_max_load_ + this_new_load_, elm.second.load, load_to_accommodate, target_max_load_ ); if (eval) { sufficiently_under.push_back(elm.first); @@ -1725,7 +1943,7 @@ void TemperedLB::originalTransfer() { // Find load of selected node auto load_iter = load_info_.find(selected_node); vtAssert(load_iter != load_info_.end(), "Selected node not found"); - auto& selected_load = load_iter->second; + auto& selected_load = load_iter->second.load; // Evaluate criterion for proposed transfer bool eval = Criterion(criterion_)( @@ -1791,7 +2009,7 @@ void TemperedLB::originalTransfer() { // compute rejection rate because it will be printed runInEpochCollective("TemperedLB::originalTransfer -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( - n_rejected, n_transfers + n_rejected, n_transfers, 0 ); }); } @@ -1883,22 +2101,24 @@ bool TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_byte auto const try_after_mem = try_total_bytes + src_bytes; return not (src_after_mem > this->mem_thresh_ or try_after_mem > this->mem_thresh_); -} // bool memoryTransferCriterion +} -double TemperedLB::loadTransferCriterion(double before_w_src, double before_w_dst, double src_l, double dst_l) { +double TemperedLB::loadTransferCriterion( + double before_w_src, double before_w_dst, double after_w_src, + double after_w_dst +) { // Compute maximum work of original arrangement auto const w_max_0 = std::max(before_w_src, before_w_dst); // Compute maximum work of arrangement after proposed load transfer - auto const after_w_src = before_w_src - src_l + dst_l; - auto const after_w_dst = before_w_dst + src_l - dst_l; auto const w_max_new = std::max(after_w_src, after_w_dst); // Return criterion value return w_max_0 - w_max_new; -} // double loadTransferCriterion +} void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { +#if 0 is_swapping_ = true; auto criterion = [&,this](auto src_cluster, auto try_cluster) -> double { @@ -2056,6 +2276,8 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { }); computeClusterSummary(); + this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); + this_new_work_ = this_new_breakdown_.work; vt_debug_print( normal, temperedlb, @@ -2073,83 +2295,82 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { pending_actions_.pop_back(); action(); } +#endif } void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { is_swapping_ = true; - auto criterion = [&,this](auto src_cluster, auto try_cluster) -> double { - auto const& [src_id, src_bytes, src_load] = src_cluster; - auto const& [try_rank, try_total_load, try_total_bytes, - try_id, try_bytes, try_load] = try_cluster; + auto const this_node = theContext()->getNode(); + + NodeInfo this_info{ + this_new_load_, this_new_work_, + this_new_breakdown_.inter_send_vol, this_new_breakdown_.inter_recv_vol, + this_new_breakdown_.intra_send_vol, this_new_breakdown_.intra_recv_vol, + this_new_breakdown_.shared_vol + }; - auto const src_after_mem = current_memory_usage_ - src_bytes + try_bytes; - auto const try_after_mem = try_total_bytes + src_bytes - try_bytes; + auto criterion = [&,this]( + auto try_rank, auto const& try_info, auto try_mem, + auto const& src_cluster, auto const& try_cluster + ) -> double { + if (try_mem - try_cluster.bytes + src_cluster.bytes > mem_thresh_) { + return - std::numeric_limits::infinity(); + } - // Check whether strict bounds on memory are satisfied - if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) { + auto const src_mem = current_memory_usage_; + if (src_mem + try_cluster.bytes - src_cluster.bytes > mem_thresh_) { return - std::numeric_limits::infinity(); } + double const src_new_work = + computeWorkAfterClusterSwap(this_node, this_info, src_cluster, try_cluster); + double const dest_new_work = + computeWorkAfterClusterSwap(try_rank, try_info, try_cluster, src_cluster); + // Return load transfer criterion - return loadTransferCriterion(this_new_load_, try_total_load, src_load, try_load); + return loadTransferCriterion( + this_new_work_, try_info.work, src_new_work, dest_new_work + ); }; auto const& try_clusters = msg->locked_clusters; auto const& try_rank = msg->locked_node; - auto const& try_load = msg->locked_load; auto const& try_total_bytes = msg->locked_bytes; + auto const& try_info = msg->locked_info; double best_c_try = -1.0; std::tuple best_swap = {-1,-1}; for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { - auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; - // try swapping with empty cluster first { - double c_try = criterion( - std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), - std::make_tuple( - try_rank, - try_load, - try_total_bytes, - -1, - 0, - 0 - ) - ); - if (c_try > 0.0) { - if (c_try > best_c_try) { - best_c_try = c_try; - best_swap = std::make_tuple(src_shared_id, -1); - } + ClusterInfo empty_cluster; + double c_try = criterion( + try_rank, try_info, try_total_bytes, src_cluster, empty_cluster + ); + if (c_try > 0.0) { + if (c_try > best_c_try) { + best_c_try = c_try; + best_swap = std::make_tuple(src_shared_id, -1); } + } } for (auto const& [try_shared_id, try_cluster] : try_clusters) { - auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; - double c_try = criterion( - std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), - std::make_tuple( - try_rank, - try_load, - try_total_bytes, - try_shared_id, - try_cluster_bytes, - try_cluster_load - ) - ); - vt_debug_print( - verbose, temperedlb, - "testing a possible swap (rank {}): {} {} c_try={}\n", - try_rank, src_shared_id, try_shared_id, c_try - ); - if (c_try > 0.0) { - if (c_try > best_c_try) { - best_c_try = c_try; - best_swap = std::make_tuple(src_shared_id, try_shared_id); - } + double c_try = criterion( + try_rank, try_info, try_total_bytes, src_cluster, try_cluster + ); + vt_debug_print( + verbose, temperedlb, + "testing a possible swap (rank {}): {} {} c_try={}\n", + try_rank, src_shared_id, try_shared_id, c_try + ); + if (c_try > 0.0) { + if (c_try > best_c_try) { + best_c_try = c_try; + best_swap = std::make_tuple(src_shared_id, try_shared_id); } + } } } @@ -2169,8 +2390,6 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { give_obj_working_bytes ] = removeClusterToSend(src_shared_id); - auto const this_node = theContext()->getNode(); - runInEpochRooted("giveCluster", [&]{ proxy_[try_rank].template send<&TemperedLB::giveCluster>( this_node, @@ -2183,6 +2402,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { }); computeClusterSummary(); + this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); + this_new_work_ = this_new_breakdown_.work; vt_debug_print( normal, temperedlb, @@ -2210,6 +2431,8 @@ void TemperedLB::giveCluster( std::unordered_map const& give_obj_working_bytes, SharedIDType take_cluster ) { + auto const this_node = theContext()->getNode(); + n_transfers_swap_++; vtAssert(give_shared_blocks_size.size() == 1, "Must be one block right now"); @@ -2229,8 +2452,6 @@ void TemperedLB::giveCluster( } if (take_cluster != -1) { - auto const this_node = theContext()->getNode(); - auto const& [ take_objs, take_obj_shared_block, @@ -2249,6 +2470,8 @@ void TemperedLB::giveCluster( } computeClusterSummary(); + this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); + this_new_work_ = this_new_breakdown_.work; vt_debug_print( normal, temperedlb, @@ -2339,9 +2562,16 @@ void TemperedLB::satisfyLockRequest() { lock.requesting_node ); + NodeInfo this_info{ + this_new_load_, this_new_work_, + this_new_breakdown_.inter_send_vol, this_new_breakdown_.inter_recv_vol, + this_new_breakdown_.intra_send_vol, this_new_breakdown_.intra_recv_vol, + this_new_breakdown_.shared_vol + }; + proxy_[lock.requesting_node].template send<&TemperedLB::lockObtained>( - this_node, this_new_load_, cur_clusters_, current_memory_usage_, - max_object_working_bytes_, lock.c_try + this_node, cur_clusters_, current_memory_usage_, + max_object_working_bytes_, lock.c_try, this_info ); is_locked_ = true; @@ -2350,6 +2580,7 @@ void TemperedLB::satisfyLockRequest() { } void TemperedLB::trySubClustering() { +#if 0 is_subclustering_ = true; n_transfers_swap_ = 0; @@ -2396,7 +2627,7 @@ void TemperedLB::trySubClustering() { // cluster size that this rank has if (total_clusters_bytes + avg_cluster_bytes < mem_thresh_) { if ( - auto target_rank_load = load_info_.find(try_rank)->second; + auto target_rank_load = load_info_.find(try_rank)->second.load; target_rank_load < target_max_load_ ) { @@ -2452,10 +2683,11 @@ void TemperedLB::trySubClustering() { if (theConfig()->vt_debug_temperedlb) { runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( - n_rejected, n_transfers_swap_ + n_rejected, n_transfers_swap_, 0 ); }); } +#endif } void TemperedLB::swapClusters() { @@ -2484,47 +2716,62 @@ void TemperedLB::swapClusters() { n_transfers_swap_ = 0; + auto const this_node = theContext()->getNode(); + + NodeInfo this_info{ + this_new_load_, this_new_work_, + this_new_breakdown_.inter_send_vol, this_new_breakdown_.inter_recv_vol, + this_new_breakdown_.intra_send_vol, this_new_breakdown_.intra_recv_vol, + this_new_breakdown_.shared_vol + }; + auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: swapClusters"); theTerm()->pushEpoch(lazy_epoch); - auto criterion = [this](auto src_cluster, auto try_cluster) -> double { - // FIXME: this does not swaps with an empty cluster - auto const& [src_id, src_bytes, src_load] = src_cluster; - auto const& [try_rank, try_id, try_bytes, try_load, try_mem] = try_cluster; + auto criterion = [&,this]( + auto try_rank, auto try_mem, auto const& src_cluster, auto const& try_cluster + ) -> double { // Necessary but not sufficient check regarding memory bounds - if (try_mem - try_bytes + src_bytes > mem_thresh_) { + if (try_mem - try_cluster.bytes + src_cluster.bytes > mem_thresh_) { + return - std::numeric_limits::infinity(); + } + + auto const src_mem = current_memory_usage_; + if (src_mem + try_cluster.bytes - src_cluster.bytes > mem_thresh_) { return - std::numeric_limits::infinity(); } + auto const& try_info = load_info_.find(try_rank)->second; + + double const src_new_work = + computeWorkAfterClusterSwap(this_node, this_info, src_cluster, try_cluster); + double const dest_new_work = + computeWorkAfterClusterSwap(try_rank, try_info, try_cluster, src_cluster); + // Return load transfer criterion - return loadTransferCriterion(this_new_load_, load_info_.find(try_rank)->second, src_load, try_load); + return loadTransferCriterion( + this_new_work_, try_info.work, src_new_work, dest_new_work + ); }; - auto const this_node = theContext()->getNode(); - // Identify and message beneficial cluster swaps for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { bool found_potential_good_swap = false; // Approximate the memory usage on the target - BytesType try_approx_mem_usage = + BytesType try_mem = other_rank_working_bytes_.find(try_rank)->second; for (auto const& [try_shared_id, try_cluster] : try_clusters) { - auto const& [try_cluster_bytes, _] = try_cluster; - try_approx_mem_usage += try_cluster_bytes; + try_mem += try_cluster.bytes; } // Iterate over source clusters for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { - auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; - // Compute approximation swap criterion for empty cluster "swap" case { - double c_try = criterion( - std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), - std::make_tuple(try_rank, 0, 0, 0, try_approx_mem_usage) - ); + ClusterInfo empty_cluster; + double c_try = criterion(try_rank, try_mem, src_cluster, empty_cluster); if (c_try > 0.0) { // Try to obtain lock for feasible swap found_potential_good_swap = true; @@ -2535,15 +2782,8 @@ void TemperedLB::swapClusters() { // Iterate over target clusters for (auto const& [try_shared_id, try_cluster] : try_clusters) { - auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; // Decide whether swap is beneficial - double c_try = criterion( - std::make_tuple(src_shared_id, src_cluster_bytes, src_cluster_load), - std::make_tuple( - try_rank, try_shared_id, try_cluster_bytes, try_cluster_load, - try_approx_mem_usage - ) - ); + double c_try = criterion(try_rank, try_mem, src_cluster, try_cluster); if (c_try > 0.0) { // Try to obtain lock for feasible swap found_potential_good_swap = true; @@ -2583,13 +2823,26 @@ void TemperedLB::swapClusters() { getSharedBlocksHere().size(), mem_thresh_, this_new_load_ ); + auto const& shared_blocks_here = getSharedBlocksHere(); + int remote_block_count = 0; + for (auto const& sid : shared_blocks_here) { + if (auto it = shared_block_edge_.find(sid); it != shared_block_edge_.end()) { + auto const& [home_node, volume] = it->second; + if (home_node != this_node) { + remote_block_count++; + } + } else { + vtAbort("Could not find shared edge volume!"); + } + } + // Report on rejection rate in debug mode int n_rejected = 0; if (theConfig()->vt_debug_temperedlb) { runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( - n_rejected, n_transfers_swap_ + n_rejected, n_transfers_swap_, remote_block_count ); }); } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index eb2a0613b8..d28b759839 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -59,6 +59,13 @@ namespace vt { namespace vrt { namespace collection { namespace lb { +struct WorkBreakdown { + double work = 0; + double intra_send_vol = 0, intra_recv_vol = 0; + double inter_send_vol = 0, inter_recv_vol = 0; + double shared_vol = 0; +}; + struct TemperedLB : BaseLB { using LoadMsgAsync = balance::LoadMsgAsync; using LoadMsgSync = balance::LoadMsg; @@ -119,7 +126,9 @@ struct TemperedLB : BaseLB { void inLazyMigrations(balance::LazyMigrationMsg* msg); void loadStatsHandler(std::vector const& vec); void workStatsHandler(std::vector const& vec); - void rejectionStatsHandler(int n_rejected, int n_transfers); + void rejectionStatsHandler( + int n_rejected, int n_transfers, int n_unhomed_blocks + ); void thunkMigrations(); void setupDone(); @@ -171,33 +180,32 @@ struct TemperedLB : BaseLB { LockedInfoMsg() = default; LockedInfoMsg( - NodeType in_locked_node, LoadType in_locked_load, + NodeType in_locked_node, ClusterSummaryType in_locked_clusters, BytesType in_locked_bytes, BytesType in_locked_max_object_working_bytes, - double in_locked_c_try + double in_locked_c_try, + NodeInfo in_locked_info ) : locked_node(in_locked_node), - locked_load(in_locked_load), locked_clusters(in_locked_clusters), locked_bytes(in_locked_bytes), locked_max_object_working_bytes(in_locked_max_object_working_bytes), - locked_c_try(in_locked_c_try) + locked_c_try(in_locked_c_try), + locked_info(in_locked_info) { } template void serialize(SerializerT& s) { MessageParentType::serialize(s); s | locked_node; - s | locked_load; s | locked_clusters; s | locked_bytes; s | locked_max_object_working_bytes; s | locked_c_try; + s | locked_info; } /// The node that is locked NodeType locked_node = uninitialized_destination; - /// The total load of the locked node - LoadType locked_load = 0; /// The up-to-date summary of the clusters ClusterSummaryType locked_clusters = {}; /// The total bytes for the locked node @@ -207,6 +215,8 @@ struct TemperedLB : BaseLB { /// The approximate criterion value at the time it was locked with possible /// out-of-date info double locked_c_try = 0; + /// All the node info + NodeInfo locked_info; }; /** @@ -234,15 +244,19 @@ struct TemperedLB : BaseLB { * * \param[in] before_w_src: original work on source rank * \param[in] before_w_dst: original work on destination rank - * \param[in] src_l: sum of object loads to be transferred from source - * \param[in] dst_l: sum of object loads to be transferred from destination + * \param[in] after_w_src: new work on source rank + * \param[in] after_w_dst: new work on destination rank */ - double loadTransferCriterion(double before_w_src, double before_w_dst, double src_l, double dst_l); + double loadTransferCriterion( + double before_w_src, double before_w_dst, double after_w_src, + double after_w_dst + ); /** * \brief Compute the amount of work based on the work model * - * \note Model: α * load + β * inter_comm_bytes + δ * intra_comm_bytes + γ + * \note Model: α * load + β * inter_comm_bytes + δ * intra_comm_bytes + + * ζ * shared_comm_bytes + γ * * \param[in] load the load for a rank * \param[in] comm_bytes the external communication @@ -250,20 +264,30 @@ struct TemperedLB : BaseLB { * \return the amount of work */ double computeWork( - double load, double inter_comm_bytes, double intra_comm_bytes + double load, double inter_comm_bytes, double intra_comm_bytes, + double shared_comm_bytes ) const; /** - * \brief Compute the rank's work + * \brief Compute work based on a a set of objects * - * \param[in] exclude a set of objects to exclude that are in cur_objs_ - * \param[in] include a set of objects to include that are not in cur_objs_ + * \param[in] node the node these objects are mapped to + * \param[in] objs input set of objects + * \param[in] exclude a set of objects to exclude that are in objs + * \param[in] include a map of objects to include that are not in objs * * \return the amount of work currently for the set of objects */ - double computeRankWork( - std::set exclude = {}, - std::set include = {} + WorkBreakdown computeWorkBreakdown( + NodeType node, + std::unordered_map const& objs, + std::set const& exclude = {}, + std::unordered_map const& include = {} + ); + + double computeWorkAfterClusterSwap( + NodeType node, NodeInfo const& info, ClusterInfo const& to_remove, + ClusterInfo const& to_add ); /** @@ -360,8 +384,8 @@ struct TemperedLB : BaseLB { */ bool target_pole_ = false; std::random_device seed_; - std::unordered_map load_info_ = {}; - std::unordered_map new_load_info_ = {}; + std::unordered_map load_info_ = {}; + std::unordered_map new_load_info_ = {}; objgroup::proxy::Proxy proxy_ = {}; bool is_overloaded_ = false; bool is_underloaded_ = false; @@ -373,6 +397,7 @@ struct TemperedLB : BaseLB { EdgeMapType recv_edges_; LoadType this_new_load_ = 0.0; LoadType this_new_work_ = 0.0; + WorkBreakdown this_new_breakdown_; LoadType new_imbalance_ = 0.0; LoadType new_work_imbalance_ = 0.0; LoadType work_mean_ = 0.0; @@ -391,6 +416,7 @@ struct TemperedLB : BaseLB { double β = 0.0; double γ = 0.0; double δ = 0.0; + double ζ = 0.0; std::vector propagated_k_; std::mt19937 gen_propagate_; std::mt19937 gen_sample_; @@ -444,6 +470,8 @@ struct TemperedLB : BaseLB { std::unordered_map obj_shared_block_; /// Shared block size in bytes std::unordered_map shared_block_size_; + /// Shared block edges + std::unordered_map> shared_block_edge_; /// Working bytes for each object std::unordered_map obj_working_bytes_; /// Cluster summary based on current local assignment From 0cf60878807994ad2340f5531ababece3649cf30 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 17 Jan 2024 17:01:58 -0800 Subject: [PATCH 066/126] #2201: temperedlb: compute working bytes correctly in criterion --- .../balance/temperedlb/tempered_msgs.h | 4 ++ .../balance/temperedlb/temperedlb.cc | 54 +++++++++++++++++-- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index 5bcadb9458..ed6bb1a5cf 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -63,12 +63,16 @@ struct ClusterInfo { std::unordered_map inter_send_vol, inter_recv_vol; NodeType home_node = uninitialized_destination; BytesType edge_weight = 0; + BytesType max_object_working_bytes = 0; + BytesType max_object_working_bytes_outside = 0; template void serialize(SerializerT& s) { s | load | bytes | intra_send_vol | intra_recv_vol; s | inter_send_vol | inter_recv_vol; s | home_node | edge_weight; + s | max_object_working_bytes; + s | max_object_working_bytes_outside; } }; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index a1aff60462..d3298cd2d1 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -653,15 +653,38 @@ void TemperedLB::computeClusterSummary() { info.edge_weight = shared_volume; std::set cluster_objs; + BytesType max_object_working_bytes = 0; + BytesType max_object_working_bytes_outside = 0; + for (auto const& [obj_id, obj_load] : cur_objs_) { if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { if (iter->second == shared_id) { cluster_objs.insert(obj_id); info.load += obj_load; + if ( + auto it = obj_working_bytes_.find(obj_id); + it != obj_working_bytes_.end() + ) { + max_object_working_bytes = std::max( + max_object_working_bytes, it->second + ); + } + } else { + if ( + auto it = obj_working_bytes_.find(obj_id); + it != obj_working_bytes_.end() + ) { + max_object_working_bytes_outside = std::max( + max_object_working_bytes_outside, it->second + ); + } } } } + info.max_object_working_bytes = max_object_working_bytes; + info.max_object_working_bytes_outside = max_object_working_bytes_outside; + if (info.load != 0) { for (auto&& obj : cluster_objs) { if (auto it = send_edges_.find(obj); it != send_edges_.end()) { @@ -2312,14 +2335,32 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto criterion = [&,this]( auto try_rank, auto const& try_info, auto try_mem, + auto try_max_object_working_bytes, auto const& src_cluster, auto const& try_cluster ) -> double { - if (try_mem - try_cluster.bytes + src_cluster.bytes > mem_thresh_) { + BytesType try_new_mem = try_mem; + try_new_mem -= try_cluster.bytes; + try_new_mem += src_cluster.bytes; + try_new_mem -= try_max_object_working_bytes; + try_new_mem += std::max( + try_cluster.max_object_working_bytes_outside, + src_cluster.max_object_working_bytes + ); + + if (try_new_mem > mem_thresh_) { return - std::numeric_limits::infinity(); } - auto const src_mem = current_memory_usage_; - if (src_mem + try_cluster.bytes - src_cluster.bytes > mem_thresh_) { + BytesType src_new_mem = current_memory_usage_; + src_new_mem -= src_cluster.bytes; + src_new_mem += try_cluster.bytes; + src_new_mem -= max_object_working_bytes_; + src_new_mem += std::max( + src_cluster.max_object_working_bytes_outside, + try_cluster.max_object_working_bytes + ); + + if (src_new_mem > mem_thresh_) { return - std::numeric_limits::infinity(); } @@ -2337,6 +2378,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto const& try_clusters = msg->locked_clusters; auto const& try_rank = msg->locked_node; auto const& try_total_bytes = msg->locked_bytes; + auto const& try_max_owm = msg->locked_max_object_working_bytes; auto const& try_info = msg->locked_info; double best_c_try = -1.0; @@ -2346,7 +2388,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { { ClusterInfo empty_cluster; double c_try = criterion( - try_rank, try_info, try_total_bytes, src_cluster, empty_cluster + try_rank, try_info, try_total_bytes, try_max_owm, + src_cluster, empty_cluster ); if (c_try > 0.0) { if (c_try > best_c_try) { @@ -2358,7 +2401,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { for (auto const& [try_shared_id, try_cluster] : try_clusters) { double c_try = criterion( - try_rank, try_info, try_total_bytes, src_cluster, try_cluster + try_rank, try_info, try_total_bytes, try_max_owm, + src_cluster, try_cluster ); vt_debug_print( verbose, temperedlb, From f32091049b71f107cb8c27914c3aabfef1e6fe0b Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Thu, 18 Jan 2024 09:13:35 -0800 Subject: [PATCH 067/126] #2201: lb_manager: fix offset when run on phase 0 --- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 751c9dd235..de4705aa5e 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -130,10 +130,10 @@ LBType LBManager::decideLBToRun(PhaseType phase, bool try_file) { } else { auto interval = theConfig()->vt_lb_interval; vtAssert(interval != 0, "LB Interval must not be 0"); + vt::PhaseType offset = theConfig()->vt_lb_run_lb_first_phase ? 0 : 1; if ( - phase % interval == 1 || - (interval == 1 && phase != 0) || - (phase == 0 && theConfig()->vt_lb_run_lb_first_phase) + phase % interval == offset || + (interval == 1 && phase != 0) ) { bool name_match = false; for (auto&& elm : get_lb_names()) { From 8f2b153531d2cc82db1c1c1c26e42b20311daee0 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Thu, 18 Jan 2024 09:38:50 -0800 Subject: [PATCH 068/126] #2201: temperedlb: add the rest of the memory model --- .../balance/temperedlb/tempered_msgs.h | 6 + .../balance/temperedlb/temperedlb.cc | 122 ++++++++++++++++-- .../balance/temperedlb/temperedlb.h | 11 ++ 3 files changed, 125 insertions(+), 14 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index ed6bb1a5cf..b710b74d8e 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -65,6 +65,9 @@ struct ClusterInfo { BytesType edge_weight = 0; BytesType max_object_working_bytes = 0; BytesType max_object_working_bytes_outside = 0; + BytesType max_object_serialized_bytes = 0; + BytesType max_object_serialized_bytes_outside = 0; + BytesType cluster_footprint = 0; template void serialize(SerializerT& s) { @@ -73,6 +76,9 @@ struct ClusterInfo { s | home_node | edge_weight; s | max_object_working_bytes; s | max_object_working_bytes_outside; + s | max_object_serialized_bytes; + s | max_object_serialized_bytes_outside; + s | cluster_footprint; } }; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index d3298cd2d1..d4eb5e5219 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -587,6 +587,8 @@ void TemperedLB::readClustersMemoryData() { SharedIDType shared_id = -1; BytesType shared_bytes = 0; BytesType working_bytes = 0; + BytesType footprint_bytes = 0; + BytesType serialized_bytes = 0; for (auto const& [key, variant] : data_map) { if (key == "shared_id") { // Because of how JSON is stored this is always a double, even @@ -608,28 +610,47 @@ void TemperedLB::readClustersMemoryData() { if (BytesType const* val = std::get_if(&variant)) { working_bytes = *val; } else { - vtAbort("\"working_bytes\" in variant does not match double"); + vtAbort("\"task_working_bytes\" in variant does not match double"); + } + } + if (key == "task_footprint_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + footprint_bytes = *val; + } else { + vtAbort( + "\"task_footprint_bytes\" in variant does not match double" + ); + } + } + if (key == "task_serialized_bytes") { + if (BytesType const* val = std::get_if(&variant)) { + serialized_bytes = *val; + } else { + vtAbort( + "\"task_serialized_bytes\" in variant does not match double" + ); } } if (key == "rank_working_bytes") { if (BytesType const* val = std::get_if(&variant)) { rank_bytes_ = *val; } else { - vtAbort("\"rank_bytes\" in variant does not match double"); + vtAbort("\"rank_working_bytes\" in variant does not match double"); } } - // @todo: for now, skip "task_serialized_bytes" and - // "task_footprint_bytes" } vt_debug_print( verbose, temperedlb, - "obj={} shared_block={} bytes={}\n", - obj, shared_id, shared_bytes + "obj={} sid={} bytes={} footprint={} serialized={}, working={}\n", + obj, shared_id, shared_bytes, footprint_bytes, serialized_bytes, + working_bytes ); obj_shared_block_[obj] = shared_id; obj_working_bytes_[obj] = working_bytes; + obj_footprint_bytes_[obj] = footprint_bytes; + obj_serialized_bytes_[obj] = serialized_bytes; shared_block_size_[shared_id] = shared_bytes; // @todo: remove this hack once we have good data @@ -655,6 +676,9 @@ void TemperedLB::computeClusterSummary() { std::set cluster_objs; BytesType max_object_working_bytes = 0; BytesType max_object_working_bytes_outside = 0; + BytesType max_object_serialized_bytes = 0; + BytesType max_object_serialized_bytes_outside = 0; + BytesType cluster_footprint = 0; for (auto const& [obj_id, obj_load] : cur_objs_) { if (auto iter = obj_shared_block_.find(obj_id); iter != obj_shared_block_.end()) { @@ -669,6 +693,20 @@ void TemperedLB::computeClusterSummary() { max_object_working_bytes, it->second ); } + if ( + auto it = obj_serialized_bytes_.find(obj_id); + it != obj_serialized_bytes_.end() + ) { + max_object_serialized_bytes = std::max( + max_object_serialized_bytes, it->second + ); + } + if ( + auto it = obj_footprint_bytes_.find(obj_id); + it != obj_footprint_bytes_.end() + ) { + cluster_footprint += it->second; + } } else { if ( auto it = obj_working_bytes_.find(obj_id); @@ -678,12 +716,23 @@ void TemperedLB::computeClusterSummary() { max_object_working_bytes_outside, it->second ); } + if ( + auto it = obj_serialized_bytes_.find(obj_id); + it != obj_serialized_bytes_.end() + ) { + max_object_serialized_bytes_outside = std::max( + max_object_serialized_bytes_outside, it->second + ); + } } } } + info.cluster_footprint = cluster_footprint; info.max_object_working_bytes = max_object_working_bytes; info.max_object_working_bytes_outside = max_object_working_bytes_outside; + info.max_object_serialized_bytes = max_object_serialized_bytes; + info.max_object_serialized_bytes_outside = max_object_serialized_bytes_outside; if (info.load != 0) { for (auto&& obj : cluster_objs) { @@ -748,12 +797,21 @@ BytesType TemperedLB::computeMemoryUsage() { total_shared_bytes += shared_block_size_.find(block_id)->second; } - // Compute max object size - double max_object_working_bytes = 0; + // Compute max object working and serialized bytes for (auto const& [obj_id, _] : cur_objs_) { - if (obj_working_bytes_.find(obj_id) != obj_working_bytes_.end()) { + if ( + auto it = obj_serialized_bytes_.find(obj_id); + it != obj_serialized_bytes_.end() + ) { + max_object_serialized_bytes_ = + std::max(max_object_serialized_bytes_, it->second); + } + if ( + auto it = obj_working_bytes_.find(obj_id); + it != obj_working_bytes_.end() + ) { max_object_working_bytes_ = - std::max(max_object_working_bytes, obj_working_bytes_.find(obj_id)->second); + std::max(max_object_working_bytes_, it->second); } else { vt_debug_print( verbose, temperedlb, @@ -761,8 +819,24 @@ BytesType TemperedLB::computeMemoryUsage() { ); } } + + // Sum up all footprint bytes + double object_footprint_bytes = 0; + for (auto const& [obj_id, _] : cur_objs_) { + if ( + auto it = obj_footprint_bytes_.find(obj_id); + it != obj_footprint_bytes_.end() + ) { + object_footprint_bytes += it->second; + } + } + return current_memory_usage_ = - rank_bytes_ + total_shared_bytes + max_object_working_bytes_; + rank_bytes_ + + total_shared_bytes + + max_object_working_bytes_ + + object_footprint_bytes + + max_object_serialized_bytes_; } std::set TemperedLB::getSharedBlocksHere() const { @@ -2301,6 +2375,7 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { computeClusterSummary(); this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); this_new_work_ = this_new_breakdown_.work; + computeMemoryUsage(); vt_debug_print( normal, temperedlb, @@ -2336,6 +2411,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto criterion = [&,this]( auto try_rank, auto const& try_info, auto try_mem, auto try_max_object_working_bytes, + auto try_max_object_serialized_bytes, auto const& src_cluster, auto const& try_cluster ) -> double { BytesType try_new_mem = try_mem; @@ -2346,6 +2422,13 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { try_cluster.max_object_working_bytes_outside, src_cluster.max_object_working_bytes ); + try_new_mem -= try_max_object_serialized_bytes; + try_new_mem += std::max( + try_cluster.max_object_serialized_bytes_outside, + src_cluster.max_object_serialized_bytes + ); + try_new_mem -= try_cluster.cluster_footprint; + try_new_mem += src_cluster.cluster_footprint; if (try_new_mem > mem_thresh_) { return - std::numeric_limits::infinity(); @@ -2359,6 +2442,13 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { src_cluster.max_object_working_bytes_outside, try_cluster.max_object_working_bytes ); + src_new_mem -= max_object_serialized_bytes_; + src_new_mem += std::max( + src_cluster.max_object_serialized_bytes_outside, + try_cluster.max_object_serialized_bytes + ); + src_new_mem += try_cluster.cluster_footprint; + src_new_mem -= src_cluster.cluster_footprint; if (src_new_mem > mem_thresh_) { return - std::numeric_limits::infinity(); @@ -2379,6 +2469,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto const& try_rank = msg->locked_node; auto const& try_total_bytes = msg->locked_bytes; auto const& try_max_owm = msg->locked_max_object_working_bytes; + auto const& try_max_osm = msg->locked_max_object_serialized_bytes; auto const& try_info = msg->locked_info; double best_c_try = -1.0; @@ -2388,7 +2479,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { { ClusterInfo empty_cluster; double c_try = criterion( - try_rank, try_info, try_total_bytes, try_max_owm, + try_rank, try_info, try_total_bytes, try_max_owm, try_max_osm, src_cluster, empty_cluster ); if (c_try > 0.0) { @@ -2401,7 +2492,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { for (auto const& [try_shared_id, try_cluster] : try_clusters) { double c_try = criterion( - try_rank, try_info, try_total_bytes, try_max_owm, + try_rank, try_info, try_total_bytes, try_max_owm, try_max_osm, src_cluster, try_cluster ); vt_debug_print( @@ -2448,6 +2539,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { computeClusterSummary(); this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); this_new_work_ = this_new_breakdown_.work; + computeMemoryUsage(); vt_debug_print( normal, temperedlb, @@ -2516,6 +2608,7 @@ void TemperedLB::giveCluster( computeClusterSummary(); this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); this_new_work_ = this_new_breakdown_.work; + computeMemoryUsage(); vt_debug_print( normal, temperedlb, @@ -2615,7 +2708,8 @@ void TemperedLB::satisfyLockRequest() { proxy_[lock.requesting_node].template send<&TemperedLB::lockObtained>( this_node, cur_clusters_, current_memory_usage_, - max_object_working_bytes_, lock.c_try, this_info + max_object_working_bytes_, max_object_serialized_bytes_, + lock.c_try, this_info ); is_locked_ = true; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index d28b759839..f93df475d5 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -183,12 +183,14 @@ struct TemperedLB : BaseLB { NodeType in_locked_node, ClusterSummaryType in_locked_clusters, BytesType in_locked_bytes, BytesType in_locked_max_object_working_bytes, + BytesType in_locked_max_object_serialized_bytes, double in_locked_c_try, NodeInfo in_locked_info ) : locked_node(in_locked_node), locked_clusters(in_locked_clusters), locked_bytes(in_locked_bytes), locked_max_object_working_bytes(in_locked_max_object_working_bytes), + locked_max_object_serialized_bytes(in_locked_max_object_serialized_bytes), locked_c_try(in_locked_c_try), locked_info(in_locked_info) { } @@ -200,6 +202,7 @@ struct TemperedLB : BaseLB { s | locked_clusters; s | locked_bytes; s | locked_max_object_working_bytes; + s | locked_max_object_serialized_bytes; s | locked_c_try; s | locked_info; } @@ -212,6 +215,8 @@ struct TemperedLB : BaseLB { BytesType locked_bytes = 0; /// The largest working bytes for the locked node BytesType locked_max_object_working_bytes = 0; + /// The largest serialized bytes for the locked node + BytesType locked_max_object_serialized_bytes = 0; /// The approximate criterion value at the time it was locked with possible /// out-of-date info double locked_c_try = 0; @@ -474,6 +479,10 @@ struct TemperedLB : BaseLB { std::unordered_map> shared_block_edge_; /// Working bytes for each object std::unordered_map obj_working_bytes_; + /// Serialized bytes for each object + std::unordered_map obj_serialized_bytes_; + /// Footprint bytes for each object + std::unordered_map obj_footprint_bytes_; /// Cluster summary based on current local assignment ClusterSummaryType cur_clusters_; /// Clusters that we know of on other ranks (might be out of date) @@ -484,6 +493,8 @@ struct TemperedLB : BaseLB { BytesType mem_thresh_ = 0; /// The max working bytes for an object currently residing here BytesType max_object_working_bytes_ = 0; + /// The max serialized bytes for an object currently residing here + BytesType max_object_serialized_bytes_ = 0; /// Current memory usage based on distribution BytesType current_memory_usage_ = 0; /// Whether this rank is locked or now From bb97080b09eb50a7f6298957180c44d950392b9d Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Thu, 18 Jan 2024 10:04:25 -0800 Subject: [PATCH 069/126] #2201: temperedlb: switch work breakdown print to debug print --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index d4eb5e5219..26cbfbebbe 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -956,8 +956,8 @@ WorkBreakdown TemperedLB::computeWorkBreakdown( w.inter_recv_vol = inter_rank_bytes_recv; w.shared_vol = shared_volume; - vt_print( - temperedlb, + vt_debug_print( + normal, temperedlb, "computeWorkBreakdown: load={}, intra sent={}, recv={}," " inter sent={}, recv={}, shared_vol={}, work={}\n", load, From 25bb18dca8d9129d35dcf5cd8b724127ac2bd4e3 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 22 Jan 2024 10:56:02 -0800 Subject: [PATCH 070/126] #2201: temperedlb: add abort if we go over the threshold --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 26cbfbebbe..1f8000f613 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1187,13 +1187,19 @@ void TemperedLB::doLBStages(LoadType start_imb) { ); if (has_memory_data_) { + double const memory_usage = computeMemoryUsage(); + vt_debug_print( terse, temperedlb, "Current memory info: total memory usage={}, shared blocks here={}, " - "memory_threshold={}\n", computeMemoryUsage(), + "memory_threshold={}\n", memory_usage, getSharedBlocksHere().size(), mem_thresh_ ); + if (memory_usage > mem_thresh_) { + vtAbort("This should never be possible to go over the threshold\n"); + } + computeClusterSummary(); // Verbose printing about local clusters From 94fcac3f45131ff61a24464503c27740a032d0d8 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 22 Jan 2024 13:15:57 -0800 Subject: [PATCH 071/126] #2201: temperedlb: add a bunch of prints for debugging --- .../balance/temperedlb/temperedlb.cc | 35 ++++++++++++++++--- .../balance/temperedlb/temperedlb.h | 1 + 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 1f8000f613..22855e524b 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2403,8 +2403,15 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { } void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { + consider_swaps_counter_++; is_swapping_ = true; + vt_debug_print( + verbose, temperedlb, + "considerSwapsAfterLock: consider_swaps_counter_={} start\n", + consider_swaps_counter_ + ); + auto const this_node = theContext()->getNode(); NodeInfo this_info{ @@ -2532,6 +2539,11 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { ] = removeClusterToSend(src_shared_id); runInEpochRooted("giveCluster", [&]{ + vt_debug_print( + verbose, temperedlb, + "considerSwapsAfterLock: giveCluster swapping {} for {}, epoch={:x}\n", + src_shared_id, try_shared_id, theMsg()->getEpoch() + ); proxy_[try_rank].template send<&TemperedLB::giveCluster>( this_node, give_shared_blocks_size, @@ -2556,7 +2568,14 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { proxy_[try_rank].template send<&TemperedLB::releaseLock>(); + vt_debug_print( + verbose, temperedlb, + "considerSwapsAfterLock: consider_swaps_counter_={} finish\n", + consider_swaps_counter_ + ); + is_swapping_ = false; + consider_swaps_counter_--; if (pending_actions_.size() > 0) { auto action = pending_actions_.back(); @@ -2618,8 +2637,10 @@ void TemperedLB::giveCluster( vt_debug_print( normal, temperedlb, - "giveCluster: total memory usage={}, shared blocks here={}, " - "memory_threshold={}, give_cluster={}, take_cluster={}\n", computeMemoryUsage(), + "giveCluster: from_rank={}, epoch={:x} total memory usage={}, shared blocks here={}, " + "memory_threshold={}, give_cluster={}, take_cluster={}\n", + from_rank, theMsg()->getEpoch(), + computeMemoryUsage(), getSharedBlocksHere().size(), mem_thresh_, give_shared_blocks_size.begin()->first, take_cluster ); @@ -2650,8 +2671,8 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { vt_debug_print( normal, temperedlb, - "lockObtained: is_locked_={}, is_subclustering_={}\n", - is_locked_, is_subclustering_ + "lockObtained: is_locked_={}, is_subclustering_={}, is_swapping_={}\n", + is_locked_, is_subclustering_, is_swapping_ ); auto cur_epoch = theMsg()->getEpoch(); @@ -2678,6 +2699,12 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { } else if (is_swapping_) { pending_actions_.push_back(action); } else { + vt_debug_print( + normal, temperedlb, + "lockObtained: running action immediately\n" + ); + + action(); } } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index f93df475d5..31a7cf5d23 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -515,6 +515,7 @@ struct TemperedLB : BaseLB { bool is_subclustering_ = false; /// Ready to satify looks bool ready_to_satisfy_locks_ = false; + int consider_swaps_counter_ = 0; }; }}}} /* end namespace vt::vrt::collection::lb */ From 65bb487a4534ab762efc38b59303ae55571020ca Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Mon, 22 Jan 2024 13:39:21 -0800 Subject: [PATCH 072/126] #2201: temperedlb: set locked while it has a lock to avoid giving a lock while swapping --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 22855e524b..dc785cf262 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2405,6 +2405,7 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { consider_swaps_counter_++; is_swapping_ = true; + is_locked_ = true; vt_debug_print( verbose, temperedlb, @@ -2575,6 +2576,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { ); is_swapping_ = false; + is_locked_ = false; consider_swaps_counter_--; if (pending_actions_.size() > 0) { From 870abb9d2c0cebbb45388d28b5ef1eb6394b8d5c Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Thu, 21 Mar 2024 16:16:48 -0700 Subject: [PATCH 073/126] #2201: temperedlb: make greek symbols line up with paper --- .../balance/temperedlb/temperedlb.cc | 18 +++++++++--------- .../collection/balance/temperedlb/temperedlb.h | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index dc785cf262..ed3afa99c1 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -300,11 +300,11 @@ Description: β in the work model (inter-node communication in work model) )" }, { - "gamma", + "epislon", R"( Values: Defaut: 1.0 -Description: γ in the work model (constant in work model) +Description: ε in the work model (constant in work model) )" }, { @@ -312,15 +312,15 @@ Description: γ in the work model (constant in work model) R"( Values: Defaut: 1.0 -Description: δ in the work model (intra-node communication in work model) +Description: δ in the work model (shared-memory-edges in work model) )" }, { - "zeta", + "gamma", R"( Values: Defaut: 1.0 -Description: ζ in the work model (shared-memory-edges in work model) +Description: γ in the work model (intra-node communication in work model) )" } }; @@ -425,7 +425,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { β = config->getOrDefault("beta", β); γ = config->getOrDefault("gamma", γ); δ = config->getOrDefault("delta", δ); - ζ = config->getOrDefault("zeta", ζ); + ε = config->getOrDefault("epsilon", ε); num_iters_ = config->getOrDefault("iters", num_iters_); num_trials_ = config->getOrDefault("trials", num_trials_); @@ -864,9 +864,9 @@ double TemperedLB::computeWork( return α * load + β * inter_comm_bytes + - δ * intra_comm_bytes + - ζ * shared_comm_bytes + - γ; + γ * intra_comm_bytes + + δ * shared_comm_bytes + + ε; } WorkBreakdown TemperedLB::computeWorkBreakdown( diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 31a7cf5d23..2ebf43b26c 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -421,7 +421,7 @@ struct TemperedLB : BaseLB { double β = 0.0; double γ = 0.0; double δ = 0.0; - double ζ = 0.0; + double ε = 0.0; std::vector propagated_k_; std::mt19937 gen_propagate_; std::mt19937 gen_sample_; From 734e005c52234d2689f03ff1f47693552025ce92 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 9 Apr 2024 11:26:31 -0700 Subject: [PATCH 074/126] #2201: temperedlb: fix some typos --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index ed3afa99c1..c6b06013aa 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -300,7 +300,7 @@ Description: β in the work model (inter-node communication in work model) )" }, { - "epislon", + "epsilon", R"( Values: Defaut: 1.0 @@ -996,7 +996,7 @@ double TemperedLB::computeWorkAfterClusterSwap( to_remove.home_node != node and to_remove.home_node != uninitialized_destination ) { - node_work -= ζ * to_remove.edge_weight; + node_work -= δ * to_remove.edge_weight; } // If to_add is now remote, add that component to the work @@ -1004,7 +1004,7 @@ double TemperedLB::computeWorkAfterClusterSwap( to_add.home_node != node and to_add.home_node != uninitialized_destination ) { - node_work += ζ * to_add.edge_weight; + node_work += δ * to_add.edge_weight; } double node_inter_send = info.inter_send_vol; From f6b484cdbec3064f05ad98324ae00ec5d9a0fa9b Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Tue, 9 Apr 2024 13:12:15 -0600 Subject: [PATCH 075/126] #2201: temperedlb: read shared block home ranks from json file --- .../vrt/collection/balance/temperedlb/temperedlb.cc | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index c6b06013aa..494e5acba9 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -599,6 +599,15 @@ void TemperedLB::readClustersMemoryData() { vtAbort("\"shared_id\" in variant does not match double"); } } + if (key == "home_rank") { + // Because of how JSON is stored this is always a double, even + // though it should be an integer + if (double const* val = std::get_if(&variant)) { + home_rank = static_cast(*val); + } else { + vtAbort("\"home_rank\" in variant does not match double"); + } + } if (key == "shared_bytes") { if (BytesType const* val = std::get_if(&variant)) { shared_bytes = *val; @@ -652,9 +661,7 @@ void TemperedLB::readClustersMemoryData() { obj_footprint_bytes_[obj] = footprint_bytes; obj_serialized_bytes_[obj] = serialized_bytes; shared_block_size_[shared_id] = shared_bytes; - - // @todo: remove this hack once we have good data - shared_block_edge_[shared_id] = std::make_tuple(this_node, shared_bytes); + shared_block_edge_[shared_id] = std::make_tuple(home_rank, shared_bytes); } } } From 09fff920aed23a578d2d9a83dd0542dfbd90cf45 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Tue, 9 Apr 2024 13:49:15 -0600 Subject: [PATCH 076/126] #2201: temperedlb: fix compile error and warning --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 494e5acba9..fcc2eae4e2 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -576,7 +576,6 @@ void TemperedLB::runLB(LoadType total_load) { } void TemperedLB::readClustersMemoryData() { - auto const this_node = theContext()->getNode(); if (load_model_->hasUserData()) { for (auto obj : *load_model_) { if (obj.isMigratable()) { @@ -585,6 +584,7 @@ void TemperedLB::readClustersMemoryData() { ); SharedIDType shared_id = -1; + vt::NodeType home_rank = vt::uninitialized_destination; BytesType shared_bytes = 0; BytesType working_bytes = 0; BytesType footprint_bytes = 0; From af7c35735767bd5331914cae3b930af071855ffc Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Tue, 9 Apr 2024 14:11:33 -0600 Subject: [PATCH 077/126] #2201: temperedlb: make symmedges prints debug verbose --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index fcc2eae4e2..2f88728b6f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1135,8 +1135,8 @@ void TemperedLB::doLBStages(LoadType start_imb) { for (auto const& [from_obj, to_edges] : send_edges_) { for (auto const& [to_obj, volume] : to_edges) { - vt_print( - temperedlb, + vt_debug_print( + verbose, temperedlb, "SymmEdges: from={}, to={}, volume={}\n", from_obj, to_obj, volume ); From e719cb753677adc797d7ab3b6fd8fa1064560c53 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Tue, 9 Apr 2024 15:06:47 -0600 Subject: [PATCH 078/126] #2201: temperedlb: print final unhomed blocks without debug --- .../balance/temperedlb/temperedlb.cc | 55 +++++++++++++------ .../balance/temperedlb/temperedlb.h | 11 +++- 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 2f88728b6f..e5fab321bc 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -856,6 +856,23 @@ std::set TemperedLB::getSharedBlocksHere() const { return blocks_here; } +int TemperedLB::getRemoteBlockCountHere() const { + auto this_node = theContext()->getNode(); + auto const& shared_blocks_here = getSharedBlocksHere(); + int remote_block_count = 0; + for (auto const& sid : shared_blocks_here) { + if (auto it = shared_block_edge_.find(sid); it != shared_block_edge_.end()) { + auto const& [home_node, volume] = it->second; + if (home_node != this_node) { + remote_block_count++; + } + } else { + vtAbort("Could not find shared edge volume!"); + } + } + return remote_block_count; +} + void TemperedLB::workStatsHandler(std::vector const& vec) { auto const& work = vec[1]; work_mean_ = work.avg(); @@ -1343,6 +1360,13 @@ void TemperedLB::doLBStages(LoadType start_imb) { best_trial, new_imbalance_ ); } + + auto remote_block_count = getRemoteBlockCountHere(); + runInEpochCollective("TemperedLB::doLBStages -> compute unhomed", [=] { + proxy_.allreduce<&TemperedLB::remoteBlockCountHandler, collective::PlusOp>( + remote_block_count + ); + }); } else if (this_node == 0) { vt_debug_print( terse, temperedlb, @@ -1418,6 +1442,17 @@ void TemperedLB::rejectionStatsHandler( } } +void TemperedLB::remoteBlockCountHandler(int n_unhomed_blocks) { + auto this_node = theContext()->getNode(); + if (this_node == 0) { + vt_print( + temperedlb, + "After load balancing, {} blocks will be off their home ranks\n", + n_unhomed_blocks + ); + } +} + void TemperedLB::informAsync() { propagated_k_.assign(k_max_, false); @@ -2857,10 +2892,9 @@ void TemperedLB::trySubClustering() { getSharedBlocksHere().size(), mem_thresh_, this_new_load_ ); - int n_rejected = 0; - // Report on rejection rate in debug mode if (theConfig()->vt_debug_temperedlb) { + int n_rejected = 0; runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( n_rejected, n_transfers_swap_, 0 @@ -3003,23 +3037,10 @@ void TemperedLB::swapClusters() { getSharedBlocksHere().size(), mem_thresh_, this_new_load_ ); - auto const& shared_blocks_here = getSharedBlocksHere(); - int remote_block_count = 0; - for (auto const& sid : shared_blocks_here) { - if (auto it = shared_block_edge_.find(sid); it != shared_block_edge_.end()) { - auto const& [home_node, volume] = it->second; - if (home_node != this_node) { - remote_block_count++; - } - } else { - vtAbort("Could not find shared edge volume!"); - } - } - - // Report on rejection rate in debug mode - int n_rejected = 0; if (theConfig()->vt_debug_temperedlb) { + int n_rejected = 0; + auto remote_block_count = getRemoteBlockCountHere(); runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( n_rejected, n_transfers_swap_, remote_block_count diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index 2ebf43b26c..ee71a77447 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -129,6 +129,7 @@ struct TemperedLB : BaseLB { void rejectionStatsHandler( int n_rejected, int n_transfers, int n_unhomed_blocks ); + void remoteBlockCountHandler(int n_unhomed_blocks); void thunkMigrations(); void setupDone(); @@ -150,10 +151,18 @@ struct TemperedLB : BaseLB { * \brief Get the shared blocks that are located on this node with the current * object assignment * - * \return the number of shared blocks here + * \return the set of shared blocks here */ std::set getSharedBlocksHere() const; + /** + * \brief Get the number of shared blocks that are located on this node with + * the current object assignment but are not homed here + * + * \return the number of unhomed shared blocks here + */ + int getRemoteBlockCountHere() const; + /** * \brief Compute the current cluster assignment summary for this rank */ From 2ba5ec095ed8fcb838ae8392c51b418cc1ed4f6d Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Tue, 9 Apr 2024 15:19:35 -0600 Subject: [PATCH 079/126] #2201: tools: NOT to merge: add paper reproducer input and script --- tools/1959-tasks/ccm-lb-delta-1e-11.config | 1 + tools/1959-tasks/simulate.sh | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tools/1959-tasks/ccm-lb-delta-1e-11.config create mode 100755 tools/1959-tasks/simulate.sh diff --git a/tools/1959-tasks/ccm-lb-delta-1e-11.config b/tools/1959-tasks/ccm-lb-delta-1e-11.config new file mode 100644 index 0000000000..cbe1354eef --- /dev/null +++ b/tools/1959-tasks/ccm-lb-delta-1e-11.config @@ -0,0 +1 @@ +0 TemperedLB transfer=SwapClusters knowledge=Complete rollback=false iters=12 memory_threshold=66000000000.0 delta=0.00000000001 diff --git a/tools/1959-tasks/simulate.sh b/tools/1959-tasks/simulate.sh new file mode 100755 index 0000000000..88c0827112 --- /dev/null +++ b/tools/1959-tasks/simulate.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +if [ -z $2 ]; then + echo "Error: missing arguments" + echo "Syntax: $0 " + exit 1 +fi + +vt_src_dir=$1 +vt_build_dir=$2 + +mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase From 9e56bcfa99a684e41bf7cffccf52468da3c7de43 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Mon, 15 Apr 2024 13:53:54 -0600 Subject: [PATCH 080/126] #2201: temperedlb: fix typos in comments and strings --- src/vt/vrt/collection/balance/temperedlb/tempered_enums.h | 2 +- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index 260db5fec0..fd6daa9f83 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -96,7 +96,7 @@ enum struct TransferTypeEnum : uint8_t { * of entire clusters, including the nullset, between ranks are attempted. * This is especially useful when shared memory constraints are present, * as breaking shared memory clusters results in higher overall memory - * footprint, in constrast with whole cluster swaps. + * footprint, in contrast with whole cluster swaps. */ SwapClusters = 2, }; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index e5fab321bc..8186ac8f42 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -187,7 +187,7 @@ Default: Original of entire clusters, according the nullset, between ranks are attempted. This is especially useful when shared memory constraints are present, as breaking shared memory clusters results in higher overall memory - footprint, in constrast with whole cluster swaps. + footprint, in contrast with whole cluster swaps. )" }, { @@ -1015,7 +1015,7 @@ double TemperedLB::computeWorkAfterClusterSwap( ); // Uninitialized destination means that the cluster is empty - // If to_remove it was remote, remove that component from the work + // If to_remove was remote, remove that component from the work if ( to_remove.home_node != node and to_remove.home_node != uninitialized_destination @@ -1074,7 +1074,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { auto this_node = theContext()->getNode(); - // Read in memory information if it's available before be do any trials + // Read in memory information if it's available before we do any trials readClustersMemoryData(); if (transfer_type_ == TransferTypeEnum::SwapClusters) { @@ -1266,7 +1266,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { } } - // Move remove cluster information to shared_block_size_ so we have all + // Move remote cluster information to shared_block_size_ so we have all // the sizes in the same place for (auto const& [node, clusters] : other_rank_clusters_) { for (auto const& [shared_id, cluster_info] : clusters) { From 76b47921894c88a08af069f5b1862ade6b559398 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Mon, 15 Apr 2024 13:57:29 -0600 Subject: [PATCH 081/126] #2201: tools: NOT to merge: update user-defined toy problem readme --- tools/user-defined-memory-toy-problem/README | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/user-defined-memory-toy-problem/README b/tools/user-defined-memory-toy-problem/README index 920c57f7e9..51fe2420b2 100644 --- a/tools/user-defined-memory-toy-problem/README +++ b/tools/user-defined-memory-toy-problem/README @@ -61,7 +61,7 @@ home rank after the relevant tasks complete. Each of four ranks has three shared blocks. The memory constrains dictate that at most four unique shared_id values can coexist on each rank. Under these -memory constraints, it is possible to perfectly balance the load (time). There +memory constraints, it is possible to balance the load (time) well. There is more than one way to do so. The communication cost to migrate a task off-rank is extremely low, but the cost to communicate back the result should be significant enough to discourage migrating shared_ids to other ranks without it @@ -73,9 +73,9 @@ rank-averaged load. The sum of the loads for the task corresponding to one of its shared_id values is more than the rank-averaged load, so the tasks for that shared_id will need to be split across two ranks to achieve good balance. The tasks for the other shared_ids across all ranks do not need to be split across -multiple ranks to perfectly balance the load (time). +multiple ranks to balance the load (time). -Below is one solution with a perfectly balanced load and decent communication. +Below is one solution with a well balanced load and decent communication. I have not evaluated whether it is optimal. Rank 0: @@ -97,4 +97,4 @@ Rank 2: Rank 3: [3,0,0],[3,0,1],[3,0,2] (home) [3,1,0],[3,1,1],[3,1,2] (home) -[3,2,0],[3,2,1],[3,2,2] (home) \ No newline at end of file +[3,2,0],[3,2,1],[3,2,2] (home) From 0c0773217ef3b4b54eff5aea59b9360935593f24 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Mon, 15 Apr 2024 14:00:26 -0600 Subject: [PATCH 082/126] #2201: tools: NOT to merge: add alternative in paper reproducer script --- tools/1959-tasks/simulate.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/1959-tasks/simulate.sh b/tools/1959-tasks/simulate.sh index 88c0827112..81b2d03f2d 100755 --- a/tools/1959-tasks/simulate.sh +++ b/tools/1959-tasks/simulate.sh @@ -10,3 +10,6 @@ vt_src_dir=$1 vt_build_dir=$2 mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase + +# Or, if you don't want to use an LB config file: +#mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_run_lb_first_phase --vt_lb_name="TemperedLB" --vt_lb_args="transfer=SwapClusters knowledge=Complete rollback=false iters=12 memory_threshold=66000000000.0 delta=0.00000000001" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase From 1824c8eb9e951824fb325a388ea5ef9adef93f16 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Tue, 30 Apr 2024 17:21:19 -0700 Subject: [PATCH 083/126] #2201: temperedlb: remove subclustering for now --- .../balance/temperedlb/temperedlb.cc | 327 +----------------- .../balance/temperedlb/temperedlb.h | 15 - 2 files changed, 3 insertions(+), 339 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 8186ac8f42..20540d5e60 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1089,7 +1089,6 @@ void TemperedLB::doLBStages(LoadType start_imb) { other_rank_clusters_.clear(); max_load_over_iters_.clear(); is_overloaded_ = is_underloaded_ = false; - is_subclustering_ = false; ready_to_satisfy_locks_ = false; LoadType best_imb_this_trial = start_imb + 10; @@ -1194,7 +1193,6 @@ void TemperedLB::doLBStages(LoadType start_imb) { underloaded_.clear(); load_info_.clear(); is_overloaded_ = is_underloaded_ = false; - is_subclustering_ = false; ready_to_satisfy_locks_ = false; other_rank_clusters_.clear(); @@ -2262,188 +2260,6 @@ double TemperedLB::loadTransferCriterion( return w_max_0 - w_max_new; } -void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr msg) { -#if 0 - is_swapping_ = true; - - auto criterion = [&,this](auto src_cluster, auto try_cluster) -> double { - auto const& [src_id, src_bytes, src_load] = src_cluster; - auto const& [try_rank, try_total_load, try_total_bytes] = try_cluster; - - - // Check whether strict bounds on memory are satisfied - if (not memoryTransferCriterion(try_total_bytes, src_bytes)) { - return - std::numeric_limits::infinity(); - } - - // Return load transfer criterion - return loadTransferCriterion(this_new_load_, try_total_load, src_load, 0.); - }; - - auto const& try_clusters = msg->locked_clusters; - auto const& try_rank = msg->locked_node; - auto const& try_load = msg->locked_load; - auto const& try_total_bytes = msg->locked_bytes; - - vt_print( - temperedlb, - "considerSubClustersAfterLock: try_rank={} try_load={}\n", try_rank, try_load - ); - - // get the shared blocks current residing on this rank - auto shared_blocks_here = getSharedBlocksHere(); - - // Shared IDs when added to this rank don't put it over the limit - std::set possible_transfers; - - for (auto const& shared_id : shared_blocks_here) { - // Allow shared blocks that don't put it over memory or already exist on - // try_rank - if (try_clusters.find(shared_id) == try_clusters.end()) { - if (try_total_bytes + shared_block_size_[shared_id] < mem_thresh_) { - possible_transfers.insert(shared_id); - } - } else { - possible_transfers.insert(shared_id); - } - } - - vt_print( - temperedlb, - "considerSubClustersAfterLock: possible_transfers={}\n", - possible_transfers.size() - ); - - // Now, we will greedily try to find a combo of objects that will reduce our - // max - - // We can prune some clusters out of this mix based on the requirements that - // this is beneficial - auto const amount_over_average = this_new_load_ - target_max_load_; - auto const amount_under_average = target_max_load_ - try_load; - - // Any sub-cluster that is smaller than amount_over_average or smaller than - // amount_under_average we can just skip. We start by skipping all entire - // clusters that don't fit this criteria since sub-clusters will also be - // eliminated from those - - vt_print( - temperedlb, - "considerSubClustersAfterLock: over={}, under={}\n", amount_over_average, - amount_under_average - ); - - std::set clusters_to_split; - - for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { - auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; - if ( - src_cluster_load < amount_over_average or - src_cluster_load < amount_under_average - ) { - // skip it - } else { - clusters_to_split.insert(src_shared_id); - } - } - - double best_c_try = -1.0; - std::set best_selected; - SharedIDType best_id = -1; - for (auto const& shared_id : clusters_to_split) { - auto const& [src_cluster_bytes, src_cluster_load] = cur_clusters_[shared_id]; - - std::set objs; - for (auto const& [obj_id, shared_id_obj] : obj_shared_block_) { - if (shared_id_obj == shared_id) { - objs.emplace(obj_id, cur_objs_[obj_id]); - } - } - - std::set selected; - LoadType load_sum = 0; - for (auto const& [obj_id, load] : objs) { - load_sum += load; - selected.insert(obj_id); - - // We will not consider empty cluster "swaps" here. - if (selected.size() != objs.size()) { - auto src_cluster_bytes_add = - try_clusters.find(shared_id) == try_clusters.end() ? src_cluster_bytes : 0; - - double c_try = criterion( - std::make_tuple(shared_id, src_cluster_bytes_add, load_sum), - std::make_tuple(try_rank, try_load, try_total_bytes) - ); - - vt_debug_print( - terse, temperedlb, - "testing a possible sub-cluster (rank {}): id={} load={} c_try={}, " - "amount over average={}, amount under average={}\n", - try_rank, shared_id, load_sum, c_try, amount_over_average, - amount_under_average - ); - - if (c_try > 0.0) { - best_c_try = c_try; - best_selected = selected; - best_id = shared_id; - } - } - } - } - - if (best_c_try > 0.0) { - vt_debug_print( - normal, temperedlb, - "best_c_try={}, picked subcluster with id={} for rank ={}\n", - best_c_try, best_id, try_rank - ); - - auto const& [ - give_objs, - give_obj_shared_block, - give_shared_blocks_size, - give_obj_working_bytes - ] = removeClusterToSend(best_id, best_selected); - - auto const this_node = theContext()->getNode(); - - runInEpochRooted("giveSubCluster", [&]{ - proxy_[try_rank].template send<&TemperedLB::giveCluster>( - this_node, - give_shared_blocks_size, - give_objs, - give_obj_shared_block, - give_obj_working_bytes, - -1 - ); - }); - - computeClusterSummary(); - this_new_breakdown_ = computeWorkBreakdown(this_node, cur_objs_); - this_new_work_ = this_new_breakdown_.work; - computeMemoryUsage(); - - vt_debug_print( - normal, temperedlb, - "best_c_try={}, sub-cluster sent to rank={}\n", - best_c_try, try_rank - ); - } - - proxy_[try_rank].template send<&TemperedLB::releaseLock>(); - - is_swapping_ = false; - - if (pending_actions_.size() > 0) { - auto action = pending_actions_.back(); - pending_actions_.pop_back(); - action(); - } -#endif -} - void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { consider_swaps_counter_++; is_swapping_ = true; @@ -2715,8 +2531,8 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { vt_debug_print( normal, temperedlb, - "lockObtained: is_locked_={}, is_subclustering_={}, is_swapping_={}\n", - is_locked_, is_subclustering_, is_swapping_ + "lockObtained: is_locked_={}, is_swapping_={}\n", + is_locked_, is_swapping_ ); auto cur_epoch = theMsg()->getEpoch(); @@ -2724,11 +2540,7 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) { auto action = [this, msg, cur_epoch]{ theMsg()->pushEpoch(cur_epoch); - if (is_subclustering_) { - considerSubClustersAfterLock(msg); - } else { - considerSwapsAfterLock(msg); - } + considerSwapsAfterLock(msg); theMsg()->popEpoch(cur_epoch); theTerm()->consume(cur_epoch); }; @@ -2794,140 +2606,7 @@ void TemperedLB::satisfyLockRequest() { } } -void TemperedLB::trySubClustering() { -#if 0 - is_subclustering_ = true; - n_transfers_swap_ = 0; - - auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: subCluster"); - theTerm()->pushEpoch(lazy_epoch); - - auto const this_node = theContext()->getNode(); - - vt_print( - temperedlb, - "SUBcluster: load={} max_load={}\n", - this_new_load_, max_load_over_iters_.back() - ); - - // Only ranks that are close to max should do this...otherwise its a waste - // Very aggressive to start. - if ( - auto n_iters = max_load_over_iters_.size(); - this_new_load_ / max_load_over_iters_[n_iters - 1] > 0.80 - ) { - BytesType avg_cluster_bytes = 0; - for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { - auto const& [src_cluster_bytes, src_cluster_load] = src_cluster; - avg_cluster_bytes += src_cluster_bytes; - } - avg_cluster_bytes /= cur_clusters_.size(); - - for (auto const& [try_rank, try_clusters] : other_rank_clusters_) { - - BytesType total_clusters_bytes = 0; - for (auto const& [try_shared_id, try_cluster] : try_clusters) { - auto const& [try_cluster_bytes, try_cluster_load] = try_cluster; - total_clusters_bytes += try_cluster_bytes; - } - - vt_print( - temperedlb, - "SUBcluster: load={} max_load={}, try_rank={}\n", - this_new_load_, max_load_over_iters_.back(), try_rank - ); - - - // Only target ranks where the target rank has room for the average - // cluster size that this rank has - if (total_clusters_bytes + avg_cluster_bytes < mem_thresh_) { - if ( - auto target_rank_load = load_info_.find(try_rank)->second.load; - target_rank_load < target_max_load_ - ) { - - vt_print( - temperedlb, - "SUBcluster: load={} max_load={}, try_rank={} sending lock\n", - this_new_load_, max_load_over_iters_.back(), try_rank - ); - - // c-value is now the ratio of load compared to this rank. prefer - // ranks that have less load and have fewer clusters. - proxy_[try_rank].template send<&TemperedLB::tryLock>( - this_node, this_new_load_ / target_rank_load - ); - } - } - - } - - } else { - // do nothing--not loaded enough, may be a target to put load - } - - // We have to be very careful here since we will allow some reentrancy here. - constexpr int turn_scheduler_times = 10; - for (int i = 0; i < turn_scheduler_times; i++) { - theSched()->runSchedulerOnceImpl(); - } - - while (not theSched()->workQueueEmpty()) { - theSched()->runSchedulerOnceImpl(); - } - - ready_to_satisfy_locks_ = true; - satisfyLockRequest(); - - // Finalize epoch, we have sent our initial round of messages - // from here everything is message driven - theTerm()->finishedEpoch(lazy_epoch); - theTerm()->popEpoch(lazy_epoch); - vt::runSchedulerThrough(lazy_epoch); - - vt_debug_print( - normal, temperedlb, - "After subclustering iteration: total memory usage={}, shared blocks here={}, " - "memory_threshold={}, load={}\n", computeMemoryUsage(), - getSharedBlocksHere().size(), mem_thresh_, this_new_load_ - ); - - // Report on rejection rate in debug mode - if (theConfig()->vt_debug_temperedlb) { - int n_rejected = 0; - runInEpochCollective("TemperedLB::swapClusters -> compute rejection", [=] { - proxy_.allreduce<&TemperedLB::rejectionStatsHandler, collective::PlusOp>( - n_rejected, n_transfers_swap_, 0 - ); - }); - } -#endif -} - void TemperedLB::swapClusters() { -#if 0 - // Do the test to see if we should start sub-clustering. This is probably far - // too aggressive. We could check as an conservative check that requires more - // computation to see if a cluster is blocking progress. - if (auto const len = max_load_over_iters_.size(); len > 2) { - double const i1 = max_load_over_iters_[len-1]; - double const i2 = max_load_over_iters_[len-2]; - - vt_debug_print( - terse, temperedlb, - "swapClusters: check for subclustering: i1={}, i2={}," - " criteria=abs={} tol={}\n", - i1, i2, std::abs(i1 - i2), 0.01*i1 - ); - - // the max is mostly stable - if (std::abs(i1 - i2) < 0.01*i1) { - trySubClustering(); - return; - } - } -#endif - n_transfers_swap_ = 0; auto const this_node = theContext()->getNode(); diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index ee71a77447..c3f83d2d17 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -311,24 +311,11 @@ struct TemperedLB : BaseLB { */ void considerSwapsAfterLock(MsgSharedPtr msg); - /** - * \brief Consider possible subcluster transfers with all the up-to-date info - * from a rank - * - * \param[in] msg update message with all the info - */ - void considerSubClustersAfterLock(MsgSharedPtr msg); - /** * \brief Release a lock on a rank */ void releaseLock(); - /** - * \brief Try sub-clustering---i.e., breaking up clusters to improve LB - */ - void trySubClustering(); - /** * \brief Give a cluster to a rank * @@ -520,8 +507,6 @@ struct TemperedLB : BaseLB { bool is_swapping_ = false; /// Max-load over ranks vector std::vector max_load_over_iters_; - /// Whether we are sub-clustering - bool is_subclustering_ = false; /// Ready to satify looks bool ready_to_satisfy_locks_ = false; int consider_swaps_counter_ = 0; From a26af8aa5c37a68c73960c6327c55eab746e1559 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Wed, 1 May 2024 15:01:53 -0700 Subject: [PATCH 084/126] #2201: temperedlb: stop using greek letters to avoid making some compilers unhappy --- .../balance/temperedlb/temperedlb.cc | 38 +++++++++---------- .../balance/temperedlb/temperedlb.h | 10 ++--- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 20540d5e60..fb1005b1dc 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -304,7 +304,7 @@ Description: β in the work model (inter-node communication in work model) R"( Values: Defaut: 1.0 -Description: ε in the work model (constant in work model) +Description: ε in the work model (memory term in work model) )" }, { @@ -421,11 +421,11 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { vtAbort(s); } - α = config->getOrDefault("alpha", α); - β = config->getOrDefault("beta", β); - γ = config->getOrDefault("gamma", γ); - δ = config->getOrDefault("delta", δ); - ε = config->getOrDefault("epsilon", ε); + alpha = config->getOrDefault("alpha", alpha); + beta = config->getOrDefault("beta", beta); + gamma = config->getOrDefault("gamma", gamma); + delta = config->getOrDefault("delta", delta); + epsilon = config->getOrDefault("epsilon", epsilon); num_iters_ = config->getOrDefault("iters", num_iters_); num_trials_ = config->getOrDefault("trials", num_trials_); @@ -886,11 +886,11 @@ double TemperedLB::computeWork( ) const { // The work model based on input parameters return - α * load + - β * inter_comm_bytes + - γ * intra_comm_bytes + - δ * shared_comm_bytes + - ε; + alpha * load + + beta * inter_comm_bytes + + gamma * intra_comm_bytes + + delta * shared_comm_bytes + + epsilon; } WorkBreakdown TemperedLB::computeWorkBreakdown( @@ -1002,14 +1002,14 @@ double TemperedLB::computeWorkAfterClusterSwap( double node_work = info.work; // Remove/add clusters' load factor from work model - node_work -= α * to_remove.load; - node_work += α * to_add.load; + node_work -= alpha * to_remove.load; + node_work += alpha * to_add.load; // Remove/add clusters' intra-comm double const node_intra_send = info.intra_send_vol; double const node_intra_recv = info.intra_recv_vol; - node_work -= δ * std::max(node_intra_send, node_intra_recv); - node_work += δ * std::max( + node_work -= delta * std::max(node_intra_send, node_intra_recv); + node_work += delta * std::max( node_intra_send - to_remove.intra_send_vol + to_add.intra_send_vol, node_intra_recv - to_remove.intra_recv_vol + to_add.intra_recv_vol ); @@ -1020,7 +1020,7 @@ double TemperedLB::computeWorkAfterClusterSwap( to_remove.home_node != node and to_remove.home_node != uninitialized_destination ) { - node_work -= δ * to_remove.edge_weight; + node_work -= delta * to_remove.edge_weight; } // If to_add is now remote, add that component to the work @@ -1028,12 +1028,12 @@ double TemperedLB::computeWorkAfterClusterSwap( to_add.home_node != node and to_add.home_node != uninitialized_destination ) { - node_work += δ * to_add.edge_weight; + node_work += delta * to_add.edge_weight; } double node_inter_send = info.inter_send_vol; double node_inter_recv = info.inter_recv_vol; - node_work -= β * std::max(node_inter_send, node_inter_recv); + node_work -= beta * std::max(node_inter_send, node_inter_recv); // All edges outside the to_remove cluster that are also off the node need to // be removed from the inter-node volumes @@ -1061,7 +1061,7 @@ double TemperedLB::computeWorkAfterClusterSwap( } } - node_work += β * std::max(node_inter_send, node_inter_recv); + node_work += beta * std::max(node_inter_send, node_inter_recv); return node_work; } diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index c3f83d2d17..b30cd66499 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -413,11 +413,11 @@ struct TemperedLB : BaseLB { KnowledgeEnum knowledge_ = KnowledgeEnum::Log; bool setup_done_ = false; bool propagate_next_round_ = false; - double α = 1.0; - double β = 0.0; - double γ = 0.0; - double δ = 0.0; - double ε = 0.0; + double alpha = 1.0; + double beta = 0.0; + double gamma = 0.0; + double delta = 0.0; + double epsilon = 0.0; std::vector propagated_k_; std::mt19937 gen_propagate_; std::mt19937 gen_sample_; From 54873d2df9dc44e5b045ae5a8cc3330ed6f4937f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 7 May 2024 21:48:43 +0200 Subject: [PATCH 085/126] #2201: temperedlb: do not capture structured bindings Captured structured bindings are only legal since C++20. --- .../collection/balance/temperedlb/temperedlb.cc | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index fb1005b1dc..ddd1fd2c1e 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2382,7 +2382,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { } if (best_c_try > 0) { - auto const& [src_shared_id, try_shared_id] = best_swap; + auto const src_shared_id = std::get<0>(best_swap); + auto const try_shared_id = std::get<1>(best_swap); vt_debug_print( normal, temperedlb, @@ -2390,12 +2391,11 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { best_c_try, src_shared_id, try_shared_id, try_rank ); - auto const& [ - give_objs, - give_obj_shared_block, - give_shared_blocks_size, - give_obj_working_bytes - ] = removeClusterToSend(src_shared_id); + auto const& give_data = removeClusterToSend(src_shared_id); + auto const& give_objs = std::get<0>(give_data); + auto const& give_obj_shared_block = std::get<1>(give_data); + auto const& give_shared_blocks_size = std::get<2>(give_data); + auto const& give_obj_working_bytes = std::get<3>(give_data); runInEpochRooted("giveCluster", [&]{ vt_debug_print( From 81076b2985ce12468372855bf5e6178571c87090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 9 May 2024 21:36:08 +0200 Subject: [PATCH 086/126] #2201: tools: fix shellcheck complaints --- tools/1959-tasks/simulate.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/1959-tasks/simulate.sh b/tools/1959-tasks/simulate.sh index 81b2d03f2d..9d6a147deb 100755 --- a/tools/1959-tasks/simulate.sh +++ b/tools/1959-tasks/simulate.sh @@ -1,6 +1,6 @@ #!/bin/bash -if [ -z $2 ]; then +if [ -z "$2" ]; then echo "Error: missing arguments" echo "Syntax: $0 " exit 1 @@ -9,7 +9,7 @@ fi vt_src_dir=$1 vt_build_dir=$2 -mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase +mpiexec --n 14 "${vt_build_dir}"/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase # Or, if you don't want to use an LB config file: #mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_run_lb_first_phase --vt_lb_name="TemperedLB" --vt_lb_args="transfer=SwapClusters knowledge=Complete rollback=false iters=12 memory_threshold=66000000000.0 delta=0.00000000001" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase From cd51729912a29682a347f1e279d7233750388bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 9 May 2024 21:41:58 +0200 Subject: [PATCH 087/126] #2201: temperedlb: add FIXME --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index ddd1fd2c1e..4e09018800 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2382,6 +2382,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { } if (best_c_try > 0) { + // FIXME C++20: use structured binding auto const src_shared_id = std::get<0>(best_swap); auto const try_shared_id = std::get<1>(best_swap); @@ -2391,6 +2392,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { best_c_try, src_shared_id, try_shared_id, try_rank ); + // FIXME C++20: use structured binding auto const& give_data = removeClusterToSend(src_shared_id); auto const& give_objs = std::get<0>(give_data); auto const& give_obj_shared_block = std::get<1>(give_data); From a668eb9c69a47905a18da4b3191559c36b130ce0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 9 May 2024 21:51:34 +0200 Subject: [PATCH 088/126] #2201: temperedlb: reduce duplication --- src/vt/elm/elm_lb_data.cc | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/vt/elm/elm_lb_data.cc b/src/vt/elm/elm_lb_data.cc index 4387d8c3fd..dfa7bd82e1 100644 --- a/src/vt/elm/elm_lb_data.cc +++ b/src/vt/elm/elm_lb_data.cc @@ -89,19 +89,17 @@ void ElementLBData::sendToEntity( void ElementLBData::addWritableSharedID( NodeType home, int shared_id, double bytes ) { - elm::CommKey key(elm::CommKey::WriteSharedTag{}, home, shared_id); - phase_comm_[cur_phase_][key].sendMsg(bytes); - subphase_comm_[cur_phase_].resize(cur_subphase_ + 1); - subphase_comm_[cur_phase_].at(cur_subphase_)[key].sendMsg(bytes); + sendComm( + elm::CommKey{elm::CommKey::WriteSharedTag{}, home, shared_id}, bytes + ); } void ElementLBData::addReadOnlySharedID( NodeType home, int shared_id, double bytes ) { - elm::CommKey key(elm::CommKey::ReadOnlySharedTag{}, home, shared_id); - phase_comm_[cur_phase_][key].sendMsg(bytes); - subphase_comm_[cur_phase_].resize(cur_subphase_ + 1); - subphase_comm_[cur_phase_].at(cur_subphase_)[key].sendMsg(bytes); + sendComm( + elm::CommKey{elm::CommKey::ReadOnlySharedTag{}, home, shared_id}, bytes + ); } void ElementLBData::sendComm(elm::CommKey key, double bytes) { From 002c532e099cfb58199733c9027bc30b2fde2914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 10 May 2024 19:08:14 +0200 Subject: [PATCH 089/126] #2201: temperedlb: include fmt correctly --- src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index b710b74d8e..b59dfc0afd 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -46,9 +46,8 @@ #include "vt/config.h" -#include +#include INCLUDE_FMT_FORMAT -#include #include namespace vt::vrt::collection::lb { @@ -103,7 +102,7 @@ using RankSummaryType = std::tuple; } /* end namespace vt::vrt::collection::lb */ -namespace fmt { +VT_FMT_NAMESPACE_BEGIN /// Custom fmt formatter/print for \c vt::vrt::collection::lb::ClusterInfo template <> @@ -136,7 +135,7 @@ struct formatter<::vt::vrt::collection::lb::ClusterInfo> { } }; -} /* end namespace fmt */ +VT_FMT_NAMESPACE_END namespace vt { namespace vrt { namespace collection { namespace balance { From b13b62592866839498909363269f326071e1c990 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Mon, 20 May 2024 21:02:33 +0200 Subject: [PATCH 090/126] #2201: temperedlb: filter by `isMigratable` --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 4e09018800..53d63f63bd 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1100,7 +1100,9 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Copy this node's object assignments to a local, mutable copy cur_objs_.clear(); for (auto obj : *load_model_) { - cur_objs_[obj] = getModeledValue(obj); + if (obj.isMigratable()) { + cur_objs_[obj] = getModeledValue(obj); + } } send_edges_.clear(); From 66a7294c3da14ac6f02626f9ce47084bd2bf6a87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 22 May 2024 22:41:50 +0200 Subject: [PATCH 091/126] #2201: revert "temperedlb: filter by `isMigratable`" This reverts commit 25a307d84c4145498f9fa6a7671d33d0c0f1852b. --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 53d63f63bd..4e09018800 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1100,9 +1100,7 @@ void TemperedLB::doLBStages(LoadType start_imb) { // Copy this node's object assignments to a local, mutable copy cur_objs_.clear(); for (auto obj : *load_model_) { - if (obj.isMigratable()) { - cur_objs_[obj] = getModeledValue(obj); - } + cur_objs_[obj] = getModeledValue(obj); } send_edges_.clear(); From 098aed9bc7a20b7f2a6d100fb4ec2d7fd0fd9673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Fri, 24 May 2024 22:41:01 +0200 Subject: [PATCH 092/126] #2201: baselb: filter by `isMigratable` --- src/vt/vrt/collection/balance/baselb/baselb.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 4f19fc9648..194d24893d 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -143,6 +143,12 @@ std::shared_ptr BaseLB::normalizeReassignments() { auto const new_node = std::get<1>(transfer); auto const current_node = obj_id.curr_node; + if (not obj_id.isMigratable()) { + vt_debug_print( + verbose, lb, "BaseLB::normalizeReassignments(): obj not migratable\n" + ); + continue; + } if (current_node == new_node) { vt_debug_print( verbose, lb, "BaseLB::normalizeReassignments(): self migration\n" From a0a6688574ccbc2a9a5d242f7b0d9ee7af80f97f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 28 May 2024 23:08:29 +0200 Subject: [PATCH 093/126] #2201: temperedlb: check if the obj is migratable during transfer stage --- .../vrt/collection/balance/temperedlb/temperedlb.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 4e09018800..0646cb2188 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2088,16 +2088,17 @@ void TemperedLB::originalTransfer() { vtAssert(load_iter != load_info_.end(), "Selected node not found"); auto& selected_load = load_iter->second.load; - // Evaluate criterion for proposed transfer - bool eval = Criterion(criterion_)( + // Check if object is migratable and evaluate criterion for proposed transfer + bool is_migratable = obj_id.isMigratable(); + bool eval = obj_id.isMigratable() && Criterion(criterion_)( this_new_load_, selected_load, obj_load, target_max_load_ ); vt_debug_print( verbose, temperedlb, "TemperedLB::originalTransfer: trial={}, iter={}, under.size()={}, " "selected_node={}, selected_load={:e}, obj_id={:x}, home={}, " - "obj_load={}, target_max_load={}, this_new_load_={}, " - "criterion={}\n", + "is_migratable()={}, obj_load={}, target_max_load={}, " + "this_new_load_={}, criterion={}\n", trial_, iter_, under.size(), @@ -2105,6 +2106,7 @@ void TemperedLB::originalTransfer() { selected_load, obj_id.id, obj_id.getHomeNode(), + is_migratable, LoadType(obj_load), LoadType(target_max_load_), LoadType(this_new_load_), @@ -2112,7 +2114,7 @@ void TemperedLB::originalTransfer() { ); // Decide about proposed migration based on criterion evaluation - if (eval) { + if (is_migratable and eval) { ++n_transfers; // Transfer the object load in seconds // to match the object load units on the receiving end From 1b1d05875252bb4d89ae9d821fab86d0ec44bc9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 28 May 2024 23:24:04 +0200 Subject: [PATCH 094/126] #2201: temperedlb: remove redundant code --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 0646cb2188..5e89c33335 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2107,9 +2107,9 @@ void TemperedLB::originalTransfer() { obj_id.id, obj_id.getHomeNode(), is_migratable, - LoadType(obj_load), - LoadType(target_max_load_), - LoadType(this_new_load_), + obj_load, + target_max_load_, + this_new_load_, eval ); From 1cfe2e95b553de86e91b785b3f5ad93d1006e6f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 29 May 2024 19:02:29 +0200 Subject: [PATCH 095/126] #2201: fix unused variable warning --- src/vt/vrt/collection/balance/model/load_model.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 99d2172c62..535d7f339b 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -222,7 +222,7 @@ struct LoadModel * * \return the comm info */ - virtual CommMapType getComm(PhaseOffset when) const { + virtual CommMapType getComm([[maybe_unused]] PhaseOffset when) const { return CommMapType{}; } From abdf1efcec83db664266acceb95c7e3d7d5249ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 4 Jun 2024 18:12:30 +0200 Subject: [PATCH 096/126] #2201: temperedlb: keep `eval` and `is_migratable` separate --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 5e89c33335..38bfae37e9 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2090,7 +2090,7 @@ void TemperedLB::originalTransfer() { // Check if object is migratable and evaluate criterion for proposed transfer bool is_migratable = obj_id.isMigratable(); - bool eval = obj_id.isMigratable() && Criterion(criterion_)( + bool eval = Criterion(criterion_)( this_new_load_, selected_load, obj_load, target_max_load_ ); vt_debug_print( From cbdf71a3bf99c4cd761e24f2431428a874ea25d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 4 Jun 2024 21:21:15 +0200 Subject: [PATCH 097/126] #2201: baselb: abort when not migratable --- src/vt/vrt/collection/balance/baselb/baselb.cc | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/vt/vrt/collection/balance/baselb/baselb.cc b/src/vt/vrt/collection/balance/baselb/baselb.cc index 194d24893d..dcbb3f4f3a 100644 --- a/src/vt/vrt/collection/balance/baselb/baselb.cc +++ b/src/vt/vrt/collection/balance/baselb/baselb.cc @@ -143,12 +143,9 @@ std::shared_ptr BaseLB::normalizeReassignments() { auto const new_node = std::get<1>(transfer); auto const current_node = obj_id.curr_node; - if (not obj_id.isMigratable()) { - vt_debug_print( - verbose, lb, "BaseLB::normalizeReassignments(): obj not migratable\n" - ); - continue; - } + vtAbortIf( + not obj_id.isMigratable(), "Transfering object that is not migratable" + ); if (current_node == new_node) { vt_debug_print( verbose, lb, "BaseLB::normalizeReassignments(): self migration\n" From cbf6f1be9270de410be0f74bea5e82d9470d8e24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 4 Jun 2024 21:23:21 +0200 Subject: [PATCH 098/126] #2201: temperedlb: use gamma as coefficient --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 38bfae37e9..cdfcfe5081 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1008,8 +1008,8 @@ double TemperedLB::computeWorkAfterClusterSwap( // Remove/add clusters' intra-comm double const node_intra_send = info.intra_send_vol; double const node_intra_recv = info.intra_recv_vol; - node_work -= delta * std::max(node_intra_send, node_intra_recv); - node_work += delta * std::max( + node_work -= gamma * std::max(node_intra_send, node_intra_recv); + node_work += gamma * std::max( node_intra_send - to_remove.intra_send_vol + to_add.intra_send_vol, node_intra_recv - to_remove.intra_recv_vol + to_add.intra_recv_vol ); From 19df7f8291536e160ab076114dcd9f3bf65a57c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 4 Jun 2024 21:26:46 +0200 Subject: [PATCH 099/126] #2201: temperedlb: remove NormBySelf --- .../vrt/collection/balance/temperedlb/tempered_enums.h | 10 +--------- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 10 +--------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h index fd6daa9f83..9313c6241a 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_enums.h @@ -152,14 +152,6 @@ enum struct CMFTypeEnum : uint8_t { * target load and the load of the most loaded processor in the CMF. */ NormByMax = 1, - /** - * \brief Compute the CMF factor using the load of this processor - * - * Do not remove processors from the CMF that exceed the target load until the - * next iteration. Use a CMF factor of 1.0/x, where x is the load of the - * processor that is computing the CMF. - */ - NormBySelf = 2, /** * \brief Narrow the CMF to only include processors that can accommodate the * transfer @@ -169,7 +161,7 @@ enum struct CMFTypeEnum : uint8_t { * in the CMF that will pass the chosen Criterion for the object being * considered for transfer. */ - NormByMaxExcludeIneligible = 3, + NormByMaxExcludeIneligible = 2, }; /// Enum for determining fanout and rounds diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index cdfcfe5081..1500ca5b47 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -215,7 +215,7 @@ Default: FewestMigrations { "cmf", R"( -Values: {Original, NormByMax, NormBySelf, NormByMaxExcludeIneligible} +Values: {Original, NormByMax, NormByMaxExcludeIneligible} Default: NormByMax Description: Approach for computing the CMF used to pick an object to transfer. Options @@ -229,10 +229,6 @@ Default: NormByMax until the next iteration. Use a CMF factor of 1.0/x, where x is the greater of the target load and the load of the most loaded processor in the CMF. - NormBySelf: compute the CMF factor using the load of this processor. Do not - remove processors from the CMF that exceed the target load until the next - iteration. Use a CMF factor of 1.0/x, where x is the load of the processor - that is computing the CMF. NormByMaxExcludeIneligible: narrow the CMF to only include processors that can accommodate the transfer. Use a CMF factor of 1.0/x, where x is the greater of the target load and the load of the most loaded processor in @@ -475,7 +471,6 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { "cmf", "CMFTypeEnum", { {CMFTypeEnum::Original, "Original"}, {CMFTypeEnum::NormByMax, "NormByMax"}, - {CMFTypeEnum::NormBySelf, "NormBySelf"}, {CMFTypeEnum::NormByMaxExcludeIneligible, "NormByMaxExcludeIneligible"} } ); @@ -1764,9 +1759,6 @@ std::vector TemperedLB::createCMF(NodeSetType const& under) { case CMFTypeEnum::Original: factor = 1.0 / target_max_load_; break; - case CMFTypeEnum::NormBySelf: - factor = 1.0 / this_new_load_; - break; case CMFTypeEnum::NormByMax: case CMFTypeEnum::NormByMaxExcludeIneligible: { From 7a16983171f1e7f29c1f2111dbf3300150b7a9e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Tue, 11 Jun 2024 22:42:06 +0200 Subject: [PATCH 100/126] #2201: temperedlb: add comment for inter-node comm --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 1500ca5b47..b0e6b137ef 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -1026,6 +1026,7 @@ double TemperedLB::computeWorkAfterClusterSwap( node_work += delta * to_add.edge_weight; } + // Update formulae for inter-node communication double node_inter_send = info.inter_send_vol; double node_inter_recv = info.inter_recv_vol; node_work -= beta * std::max(node_inter_send, node_inter_recv); From 5735cc314a9cabba976cd7180256537196c3a4ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 12 Jun 2024 17:16:44 +0200 Subject: [PATCH 101/126] #2201: test running LB on first phase --- tests/unit/collection/test_lb.extended.cc | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 2c6379cc38..626b31d764 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -150,6 +150,11 @@ TEST_P(TestLoadBalancerOther, test_load_balancer_other_keep_last_elm) { runTest(GetParam(), "test_load_balancer_other_keep_last_elm"); } +TEST_P(TestLoadBalancerOther, test_load_balancer_other_run_lb_first_phase) { + vt::theConfig()->vt_lb_run_lb_first_phase = true; + runTest(GetParam(), "test_load_balancer_other_keep_last_elm"); +} + TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_2) { runTest(GetParam(), "test_load_balancer_greedy_2"); } @@ -159,6 +164,11 @@ TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_keep_last_elm) { runTest(GetParam(), "test_load_balancer_greedy_keep_last_elm"); } +TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_run_lb_first_phase) { + vt::theConfig()->vt_lb_run_lb_first_phase = true; + runTest(GetParam(), "test_load_balancer_greedy_keep_last_elm"); +} + TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { // setup auto const this_node = theContext()->getNode(); From ad35330cff098e3bb8c4489d58db3d94d1c114ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Wed, 12 Jun 2024 17:47:18 +0200 Subject: [PATCH 102/126] #2201: reduce code duplication --- src/vt/vrt/collection/balance/lb_data_holder.cc | 17 +++++------------ src/vt/vrt/collection/balance/lb_data_holder.h | 3 --- .../vrt/collection/balance/model/load_model.h | 4 +--- src/vt/vrt/collection/balance/node_lb_data.cc | 1 - 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_data_holder.cc b/src/vt/vrt/collection/balance/lb_data_holder.cc index 74bdf92aa1..dd8a3dfb34 100644 --- a/src/vt/vrt/collection/balance/lb_data_holder.cc +++ b/src/vt/vrt/collection/balance/lb_data_holder.cc @@ -252,9 +252,7 @@ std::unique_ptr LBDataHolder::toJson(PhaseType phase) const { i = 0; if (node_comm_.find(phase) != node_comm_.end()) { - for (auto&& elm : node_comm_.at(phase)) { - auto volume = elm.second; - auto const& key = elm.first; + for (auto const& [key, volume] : node_comm_.at(phase)) { j["communications"][i]["bytes"] = volume.bytes; j["communications"][i]["messages"] = volume.messages; @@ -296,16 +294,11 @@ std::unique_ptr LBDataHolder::toJson(PhaseType phase) const { outputEntity(j["communications"][i]["from"], key.fromObj()); break; } - case elm::CommCategory::ReadOnlyShared: { - j["communications"][i]["type"] = "ReadOnlyShared"; - j["communications"][i]["to"]["type"] = "node"; - j["communications"][i]["to"]["id"] = key.toNode(); - j["communications"][i]["from"]["type"] = "shared_id"; - j["communications"][i]["from"]["id"] = key.sharedID(); - break; - } + case elm::CommCategory::ReadOnlyShared: case elm::CommCategory::WriteShared: { - j["communications"][i]["type"] = "WriteShared"; + j["communications"][i]["type"] = + (key.cat_ == elm::CommCategory::ReadOnlyShared) ? + "ReadOnlyShared" : "WriteShared"; j["communications"][i]["to"]["type"] = "node"; j["communications"][i]["to"]["id"] = key.toNode(); j["communications"][i]["from"]["type"] = "shared_id"; diff --git a/src/vt/vrt/collection/balance/lb_data_holder.h b/src/vt/vrt/collection/balance/lb_data_holder.h index fb2c9fce48..729949693f 100644 --- a/src/vt/vrt/collection/balance/lb_data_holder.h +++ b/src/vt/vrt/collection/balance/lb_data_holder.h @@ -46,12 +46,9 @@ #include "vt/config.h" #include "vt/vrt/collection/balance/lb_common.h" -#include "vt/elm/elm_comm.h" #include #include -#include -#include #include diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index 535d7f339b..e737662f60 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -222,9 +222,7 @@ struct LoadModel * * \return the comm info */ - virtual CommMapType getComm([[maybe_unused]] PhaseOffset when) const { - return CommMapType{}; - } + virtual CommMapType getComm(PhaseOffset when) const = 0; /** * \brief Provide an estimate of the given object's load during a specified interval diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index 71e72e4aed..e6efc89807 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -272,7 +272,6 @@ getRecvSendDirection(elm::CommKeyType const& comm) { // this case is just to avoid warning of not handled enum case elm::CommCategory::CollectiveToCollectionBcast: case elm::CommCategory::LocalInvoke: - return std::make_pair(ElementIDType{}, ElementIDType{}); case elm::CommCategory::WriteShared: case elm::CommCategory::ReadOnlyShared: return std::make_pair(ElementIDType{}, ElementIDType{}); From 5e5eb8512d3c3a47efeaf41abfd8a82123dbd0bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 13 Jun 2024 14:54:36 +0200 Subject: [PATCH 103/126] #2201: provide basic implementation for `getComm` --- src/vt/vrt/collection/balance/model/load_model.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/model/load_model.h b/src/vt/vrt/collection/balance/model/load_model.h index e737662f60..535d7f339b 100644 --- a/src/vt/vrt/collection/balance/model/load_model.h +++ b/src/vt/vrt/collection/balance/model/load_model.h @@ -222,7 +222,9 @@ struct LoadModel * * \return the comm info */ - virtual CommMapType getComm(PhaseOffset when) const = 0; + virtual CommMapType getComm([[maybe_unused]] PhaseOffset when) const { + return CommMapType{}; + } /** * \brief Provide an estimate of the given object's load during a specified interval From ef5c9b8bc5418fec881c7b8be29e1726d251c3c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 16:25:40 +0200 Subject: [PATCH 104/126] #2201: Revert "tools: NOT to merge: add alternative in paper reproducer script" This reverts commit 0a967017c07f31110a81be17413ffe4eef2e4ba0. --- tools/1959-tasks/simulate.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/1959-tasks/simulate.sh b/tools/1959-tasks/simulate.sh index 9d6a147deb..3a0c4b6197 100755 --- a/tools/1959-tasks/simulate.sh +++ b/tools/1959-tasks/simulate.sh @@ -9,7 +9,4 @@ fi vt_src_dir=$1 vt_build_dir=$2 -mpiexec --n 14 "${vt_build_dir}"/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase - -# Or, if you don't want to use an LB config file: -#mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_run_lb_first_phase --vt_lb_name="TemperedLB" --vt_lb_args="transfer=SwapClusters knowledge=Complete rollback=false iters=12 memory_threshold=66000000000.0 delta=0.00000000001" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase +mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase From 1e3e8cfcbea1794f7851b990b8118de5644cb92c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 16:25:56 +0200 Subject: [PATCH 105/126] #2201: Revert "tools: NOT to merge: update user-defined toy problem readme" This reverts commit af8b4130d91ba9dfa77c6b3ac76545d9d5b8e2f6. --- tools/user-defined-memory-toy-problem/README | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/user-defined-memory-toy-problem/README b/tools/user-defined-memory-toy-problem/README index 51fe2420b2..920c57f7e9 100644 --- a/tools/user-defined-memory-toy-problem/README +++ b/tools/user-defined-memory-toy-problem/README @@ -61,7 +61,7 @@ home rank after the relevant tasks complete. Each of four ranks has three shared blocks. The memory constrains dictate that at most four unique shared_id values can coexist on each rank. Under these -memory constraints, it is possible to balance the load (time) well. There +memory constraints, it is possible to perfectly balance the load (time). There is more than one way to do so. The communication cost to migrate a task off-rank is extremely low, but the cost to communicate back the result should be significant enough to discourage migrating shared_ids to other ranks without it @@ -73,9 +73,9 @@ rank-averaged load. The sum of the loads for the task corresponding to one of its shared_id values is more than the rank-averaged load, so the tasks for that shared_id will need to be split across two ranks to achieve good balance. The tasks for the other shared_ids across all ranks do not need to be split across -multiple ranks to balance the load (time). +multiple ranks to perfectly balance the load (time). -Below is one solution with a well balanced load and decent communication. +Below is one solution with a perfectly balanced load and decent communication. I have not evaluated whether it is optimal. Rank 0: @@ -97,4 +97,4 @@ Rank 2: Rank 3: [3,0,0],[3,0,1],[3,0,2] (home) [3,1,0],[3,1,1],[3,1,2] (home) -[3,2,0],[3,2,1],[3,2,2] (home) +[3,2,0],[3,2,1],[3,2,2] (home) \ No newline at end of file From 42a81acda29633b903468d014d83a89de6d96816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 16:27:13 +0200 Subject: [PATCH 106/126] #2201: Revert "tools: NOT to merge: add paper reproducer input and script" This reverts commit 4d4238c0d59bd12d5b00f02982c279599973b0d2. --- tools/1959-tasks/ccm-lb-delta-1e-11.config | 1 - tools/1959-tasks/simulate.sh | 12 ------------ 2 files changed, 13 deletions(-) delete mode 100644 tools/1959-tasks/ccm-lb-delta-1e-11.config delete mode 100755 tools/1959-tasks/simulate.sh diff --git a/tools/1959-tasks/ccm-lb-delta-1e-11.config b/tools/1959-tasks/ccm-lb-delta-1e-11.config deleted file mode 100644 index cbe1354eef..0000000000 --- a/tools/1959-tasks/ccm-lb-delta-1e-11.config +++ /dev/null @@ -1 +0,0 @@ -0 TemperedLB transfer=SwapClusters knowledge=Complete rollback=false iters=12 memory_threshold=66000000000.0 delta=0.00000000001 diff --git a/tools/1959-tasks/simulate.sh b/tools/1959-tasks/simulate.sh deleted file mode 100755 index 3a0c4b6197..0000000000 --- a/tools/1959-tasks/simulate.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -if [ -z "$2" ]; then - echo "Error: missing arguments" - echo "Syntax: $0 " - exit 1 -fi - -vt_src_dir=$1 -vt_build_dir=$2 - -mpiexec --n 14 ${vt_build_dir}/tools/workload_replay/simulate_replay 0 1 --vt_lb --vt_lb_file_name="${vt_src_dir}/tools/1959-tasks/ccm-lb-delta-1e-11.config" --vt_lb_data_in --vt_lb_data_dir_in="${vt_src_dir}/tools/1959-tasks" --vt_debug_level=terse --vt_debug_phase From 9805d9d2644e65777d7ac756787a8fe98d30ba00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 16:28:36 +0200 Subject: [PATCH 107/126] #2201: Revert "tools: NOT to merge: for now, add the user-defined problem to repo to make it easy to run" This reverts commit d1ab995e3f6a2bd4784dec07cdb3f4f4fd94449b. --- tools/user-defined-memory-toy-problem/README | 100 ------ .../toy_mem.0.json | 285 ------------------ .../toy_mem.1.json | 1 - .../toy_mem.2.json | 1 - .../toy_mem.3.json | 1 - 5 files changed, 388 deletions(-) delete mode 100644 tools/user-defined-memory-toy-problem/README delete mode 100644 tools/user-defined-memory-toy-problem/toy_mem.0.json delete mode 100644 tools/user-defined-memory-toy-problem/toy_mem.1.json delete mode 100644 tools/user-defined-memory-toy-problem/toy_mem.2.json delete mode 100644 tools/user-defined-memory-toy-problem/toy_mem.3.json diff --git a/tools/user-defined-memory-toy-problem/README b/tools/user-defined-memory-toy-problem/README deleted file mode 100644 index 920c57f7e9..0000000000 --- a/tools/user-defined-memory-toy-problem/README +++ /dev/null @@ -1,100 +0,0 @@ -These files describe a toy problem for testing whether a memory-aware load -balancer is achieving a sensible solution. - -The 3D vt index in these files is: - (rank_index, decomp_index_on_rank, task_index_on_decomp). - -Each task appears in the JSON files on its home rank (rank_index) where -communication costs will be zero, so no communication edges were included. -However, see the final paragraph for details about communication patterns that -will emerge when the tasks are migrated off the home rank. - -The "user-defined" section of the JSON data contains the following fields: -- "task_serialized_bytes": This is the serialized size of the task, which can be - used for modeling the migration cost of the task. It should not be included - when computing the memory usage on a rank. -- "shared_id": This uniquely identifies a block of data on which multiple tasks - will operate. While not important, the shared_id was computing using: - shared_id = decomp_index_on_rank * num_ranks + rank_index -- "shared_bytes": This is the size of the block of data being operated on by the - relevant set of tasks. This memory cost will be incurred exactly once on each - MPI rank on which a task with this shared_id exists. -- "task_footprint_bytes": This is the footprinted size of the task in its - non-running state. We will incur this memory cost once for each individual - task, even if there are other tasks on this rank with the same shared_id. This - can be greater than task_serialized_bytes when the task has data members that - have greater capacity than is being used at serialization time. -- "task_working_bytes": This is the high water mark of the additional working - memory required by the individual task, such as temporary memory needed for - intermediate computation. This value does not include memory shared with other - tasks (i.e., shared_bytes), nor does it include the task_footprint bytes or - task_serialized_bytes. This cost is incurred for each individual task, but - only one at a time because tasks will not run concurrently. -- "rank_working_bytes": This is the amount of memory that the particular rank - needs while processing tasks. This may include global data, constants, and - completely unrelated data pre-allocated by the application. It is assumed to - be constant over time but may vary from rank to rank. This value does not - include shared_bytes, task_working_bytes, task_footprint_bytes, or - task_serialized_bytes. - -The maximum memory usage for determining if task placement is feasible will be: - max_memory_usage = rank_working_bytes + shared_level_memory + max_task_level_memory - -Computing shared_level_memory: Let S be the set of unique shared_id values on -the rank being considered. Then shared_level_memory is simply the sum of -shared_bytes values for each shared_id in S. - -Computing max_task_level_memory: Let T be the set of all tasks on a rank, -regardless of the shared_id on which they operate. Then max_task_level_memory -is the sum of task_footprint_bytes values for each task in T plus the maximum -over the task_working_bytes values for each task in T. - -Any communication-aware load balancer should also consider the communication -implied by this memory data. The task_serialized_bytes is the serialized size -of the task, so migrating it will require a communication of at least that size -from the home rank to the target rank. For applications where the shared memory -corresponding shared_id is writeable, at least shared_bytes per unique shared_id -on a target rank will need to be communicated from the target rank back to the -home rank after the relevant tasks complete. - -***Spoilers*** - -Each of four ranks has three shared blocks. The memory constrains dictate that -at most four unique shared_id values can coexist on each rank. Under these -memory constraints, it is possible to perfectly balance the load (time). There -is more than one way to do so. The communication cost to migrate a task off-rank -is extremely low, but the cost to communicate back the result should be -significant enough to discourage migrating shared_ids to other ranks without it -resulting in a better balanced load. - -One of the ranks has exactly the rank-averaged load, so it is best if the tasks -on that rank are left in place. Another rank has more than twice the -rank-averaged load. The sum of the loads for the task corresponding to one of -its shared_id values is more than the rank-averaged load, so the tasks for that -shared_id will need to be split across two ranks to achieve good balance. The -tasks for the other shared_ids across all ranks do not need to be split across -multiple ranks to perfectly balance the load (time). - -Below is one solution with a perfectly balanced load and decent communication. -I have not evaluated whether it is optimal. - -Rank 0: -[0,1,1],[0,1,3],[0,1,4] (part of block home) -[1,0,0],[1,0,1] (whole block not home) -[2,2,0],[2,2,1] (whole block not home) - -Rank 1: -[1,1,0],[1,1,1] (home) -[1,2,0] (home) -[0,0,0],[0,0,1],[0,0,2] (whole block not home) -[0,1,0],[0,1,2] (part of block not home) - -Rank 2: -[2,0,0],[2,0,1],[2,0,2] (home) -[2,1,0],[2,1,1] (home) -[0,2,0],[0,2,1],[0,2,2] (whole block not home) - -Rank 3: -[3,0,0],[3,0,1],[3,0,2] (home) -[3,1,0],[3,1,1],[3,1,2] (home) -[3,2,0],[3,2,1],[3,2,2] (home) \ No newline at end of file diff --git a/tools/user-defined-memory-toy-problem/toy_mem.0.json b/tools/user-defined-memory-toy-problem/toy_mem.0.json deleted file mode 100644 index 36de223728..0000000000 --- a/tools/user-defined-memory-toy-problem/toy_mem.0.json +++ /dev/null @@ -1,285 +0,0 @@ -{ - "type": "LBDatafile", - "phases": [ - { - "id": 0, - "tasks": [ - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 2883587, - "index": [ - 0, - 1, - 4 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 10.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 4, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 2621443, - "index": [ - 0, - 1, - 3 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 35.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 4, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 2359299, - "index": [ - 0, - 2, - 2 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 10.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 8, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 2097155, - "index": [ - 0, - 1, - 2 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 25.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 4, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 1835011, - "index": [ - 0, - 0, - 2 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 10.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 0, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 524291, - "index": [ - 0, - 1, - 0 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 20.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 4, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 262147, - "index": [ - 0, - 0, - 0 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 10.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 0, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 786435, - "index": [ - 0, - 2, - 0 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 20.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 8, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 1048579, - "index": [ - 0, - 0, - 1 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 15.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 0, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 1310723, - "index": [ - 0, - 1, - 1 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 30.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 4, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - }, - { - "entity": { - "collection_id": 7, - "home": 0, - "id": 1572867, - "index": [ - 0, - 2, - 1 - ], - "migratable": true, - "type": "object" - }, - "node": 0, - "resource": "cpu", - "time": 5.0, - "user_defined": { - "rank_working_bytes": 980000000.0, - "shared_bytes": 1600000000.0, - "shared_id": 8, - "task_footprint_bytes": 1024.0, - "task_serialized_bytes": 1024.0, - "task_working_bytes": 110000000.0 - } - } - ] - } - ] -} diff --git a/tools/user-defined-memory-toy-problem/toy_mem.1.json b/tools/user-defined-memory-toy-problem/toy_mem.1.json deleted file mode 100644 index 875b316c0b..0000000000 --- a/tools/user-defined-memory-toy-problem/toy_mem.1.json +++ /dev/null @@ -1 +0,0 @@ -{"type":"LBDatafile","phases":[{"id":0,"tasks":[{"entity":{"collection_id":7,"home":1,"id":1310727,"index":[1,1,1],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":5,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":1048583,"index":[1,0,1],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":1,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":786439,"index":[1,2,0],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":9,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":262151,"index":[1,0,0],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":1,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":1,"id":524295,"index":[1,1,0],"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":5,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}}]}]} \ No newline at end of file diff --git a/tools/user-defined-memory-toy-problem/toy_mem.2.json b/tools/user-defined-memory-toy-problem/toy_mem.2.json deleted file mode 100644 index e77e11c03a..0000000000 --- a/tools/user-defined-memory-toy-problem/toy_mem.2.json +++ /dev/null @@ -1 +0,0 @@ -{"type":"LBDatafile","phases":[{"id":0,"tasks":[{"entity":{"collection_id":7,"home":2,"id":1835019,"index":[2,0,2],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":15.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":2,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":524299,"index":[2,1,0],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":6,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":262155,"index":[2,0,0],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":2,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":786443,"index":[2,2,0],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":10,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":1048587,"index":[2,0,1],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":2,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":1310731,"index":[2,1,1],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":6,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":2,"id":1572875,"index":[2,2,1],"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":2.5,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":10,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}}]}]} \ No newline at end of file diff --git a/tools/user-defined-memory-toy-problem/toy_mem.3.json b/tools/user-defined-memory-toy-problem/toy_mem.3.json deleted file mode 100644 index b0ef40536a..0000000000 --- a/tools/user-defined-memory-toy-problem/toy_mem.3.json +++ /dev/null @@ -1 +0,0 @@ -{"type":"LBDatafile","phases":[{"id":0,"tasks":[{"entity":{"collection_id":7,"home":3,"id":2359311,"index":[3,2,2],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":11,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":2097167,"index":[3,1,2],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":15.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":7,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1835023,"index":[3,0,2],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":3,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":524303,"index":[3,1,0],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":7,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":262159,"index":[3,0,0],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":3,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":786447,"index":[3,2,0],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":11,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1048591,"index":[3,0,1],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":5.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":3,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1310735,"index":[3,1,1],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":20.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":7,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}},{"entity":{"collection_id":7,"home":3,"id":1572879,"index":[3,2,1],"migratable":true,"type":"object"},"node":3,"resource":"cpu","time":10.0,"user_defined":{"rank_working_bytes":980000000.0,"shared_bytes":1600000000.0,"shared_id":11,"task_footprint_bytes":1024.0,"task_serialized_bytes":1024.0,"task_working_bytes":110000000.0}}]}]} \ No newline at end of file From 2d07a6e214ad10791630dd110979fbe78d86fd8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 18:34:39 +0200 Subject: [PATCH 108/126] #2201: remove obsolete comment --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index b0e6b137ef..dfc4eb6316 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2723,7 +2723,7 @@ void TemperedLB::swapClusters() { ); }); } -} // void TemperedLB::originalTransfer() +} void TemperedLB::thunkMigrations() { vt_debug_print( From d99cdb465304fcd2c687a6814512a11f7780c347 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 19:44:06 +0200 Subject: [PATCH 109/126] #2201: lb: reduce code duplication --- .../balance/temperedlb/temperedlb.cc | 49 ++++--------------- 1 file changed, 10 insertions(+), 39 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index dfc4eb6316..7eadc9adcc 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -585,62 +585,33 @@ void TemperedLB::readClustersMemoryData() { BytesType footprint_bytes = 0; BytesType serialized_bytes = 0; for (auto const& [key, variant] : data_map) { + auto val = std::get_if(&variant); + vtAbortIf(!val, '"' + key + "\" in variant does not match double"); + if (key == "shared_id") { // Because of how JSON is stored this is always a double, even // though it should be an integer - if (double const* val = std::get_if(&variant)) { - shared_id = static_cast(*val); - } else { - vtAbort("\"shared_id\" in variant does not match double"); - } + shared_id = static_cast(*val); } if (key == "home_rank") { // Because of how JSON is stored this is always a double, even // though it should be an integer - if (double const* val = std::get_if(&variant)) { - home_rank = static_cast(*val); - } else { - vtAbort("\"home_rank\" in variant does not match double"); - } + home_rank = static_cast(*val); } if (key == "shared_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - shared_bytes = *val; - } else { - vtAbort("\"shared_bytes\" in variant does not match double"); - } + shared_bytes = *val; } if (key == "task_working_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - working_bytes = *val; - } else { - vtAbort("\"task_working_bytes\" in variant does not match double"); - } + working_bytes = *val; } if (key == "task_footprint_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - footprint_bytes = *val; - } else { - vtAbort( - "\"task_footprint_bytes\" in variant does not match double" - ); - } + footprint_bytes = *val; } if (key == "task_serialized_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - serialized_bytes = *val; - } else { - vtAbort( - "\"task_serialized_bytes\" in variant does not match double" - ); - } + serialized_bytes = *val; } if (key == "rank_working_bytes") { - if (BytesType const* val = std::get_if(&variant)) { - rank_bytes_ = *val; - } else { - vtAbort("\"rank_working_bytes\" in variant does not match double"); - } + rank_bytes_ = *val; } } From d243bbedd939232ecce609e0ebe63557c3e174cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Skrzy=C5=84ski?= Date: Thu, 27 Jun 2024 20:16:32 +0200 Subject: [PATCH 110/126] #2201: lb: use named constant for uninitialized --- src/vt/configs/types/types_sentinels.h | 1 + src/vt/configs/types/types_type.h | 2 ++ src/vt/elm/elm_comm.h | 3 ++- .../collection/balance/temperedlb/tempered_msgs.h | 1 - .../vrt/collection/balance/temperedlb/temperedlb.cc | 13 +++++++------ 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/vt/configs/types/types_sentinels.h b/src/vt/configs/types/types_sentinels.h index 9bcd343d25..57d38c151f 100644 --- a/src/vt/configs/types/types_sentinels.h +++ b/src/vt/configs/types/types_sentinels.h @@ -89,6 +89,7 @@ static constexpr SequentialIDType const first_seq_id = 1; static constexpr PriorityType const no_priority = 0; static constexpr PriorityLevelType const no_priority_level = 0; static constexpr ThreadIDType const no_thread_id = 0; +static constexpr SharedIDType const no_shared_id = -1; } // end namespace vt diff --git a/src/vt/configs/types/types_type.h b/src/vt/configs/types/types_type.h index d09d9fe4ca..9962261752 100644 --- a/src/vt/configs/types/types_type.h +++ b/src/vt/configs/types/types_type.h @@ -117,6 +117,8 @@ using PriorityLevelType = uint8_t; using ComponentIDType = uint32_t; /// Used to hold a unique ID for a user-level thread on a particular node using ThreadIDType = uint64_t; +/// Used to hold a shared ID +using SharedIDType = int; // Action types for attaching a closure to a runtime function /// Used for generically store an action to perform diff --git a/src/vt/elm/elm_comm.h b/src/vt/elm/elm_comm.h index cdc2b2fc97..44f610a269 100644 --- a/src/vt/elm/elm_comm.h +++ b/src/vt/elm/elm_comm.h @@ -44,6 +44,7 @@ #if !defined INCLUDED_VT_ELM_ELM_COMM_H #define INCLUDED_VT_ELM_ELM_COMM_H +#include "vt/configs/types/types_type.h" #include "vt/elm/elm_id.h" #include @@ -129,7 +130,7 @@ struct CommKey { ElementIDStruct edge_id_ = {}; NodeType nfrom_ = uninitialized_destination; NodeType nto_ = uninitialized_destination; - int shared_id_ = -1; + SharedIDType shared_id_ = no_shared_id; CommCategory cat_ = CommCategory::SendRecv; ElementIDStruct fromObj() const { return from_; } diff --git a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h index b59dfc0afd..65c5168cd3 100644 --- a/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h +++ b/src/vt/vrt/collection/balance/temperedlb/tempered_msgs.h @@ -52,7 +52,6 @@ namespace vt::vrt::collection::lb { -using SharedIDType = int; using BytesType = double; struct ClusterInfo { diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 7eadc9adcc..7311d4daad 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -578,7 +578,7 @@ void TemperedLB::readClustersMemoryData() { obj, {balance::PhaseOffset::NEXT_PHASE, balance::PhaseOffset::WHOLE_PHASE} ); - SharedIDType shared_id = -1; + SharedIDType shared_id = vt::no_shared_id; vt::NodeType home_rank = vt::uninitialized_destination; BytesType shared_bytes = 0; BytesType working_bytes = 0; @@ -2146,7 +2146,7 @@ auto TemperedLB::removeClusterToSend( shared_id ); - if (shared_id != -1) { + if (shared_id != no_shared_id) { give_shared_blocks_size[shared_id] = shared_block_size_[shared_id]; } @@ -2311,7 +2311,8 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { auto const& try_info = msg->locked_info; double best_c_try = -1.0; - std::tuple best_swap = {-1,-1}; + std::tuple best_swap = + {no_shared_id, no_shared_id}; for (auto const& [src_shared_id, src_cluster] : cur_clusters_) { // try swapping with empty cluster first { @@ -2323,7 +2324,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { if (c_try > 0.0) { if (c_try > best_c_try) { best_c_try = c_try; - best_swap = std::make_tuple(src_shared_id, -1); + best_swap = std::make_tuple(src_shared_id, no_shared_id); } } } @@ -2440,7 +2441,7 @@ void TemperedLB::giveCluster( obj_working_bytes_.emplace(elm); } - if (take_cluster != -1) { + if (take_cluster != no_shared_id) { auto const& [ take_objs, take_obj_shared_block, @@ -2454,7 +2455,7 @@ void TemperedLB::giveCluster( take_objs, take_obj_shared_block, take_obj_working_bytes, - -1 + no_shared_id ); } From a3b5233c903a777fb0a42c407acfe148fc7ee687 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Tue, 13 Aug 2024 13:52:06 -0700 Subject: [PATCH 111/126] #2201: tests: add tests for temperedLB with load, load+memory, and load+memory+homing cases --- tests/CMakeLists.txt | 9 ++ .../synthetic-dataset-blocks.0.json | 1 + .../synthetic-dataset-blocks.1.json | 1 + .../synthetic-dataset-blocks.2.json | 1 + .../synthetic-dataset-blocks.3.json | 1 + tests/unit/lb/test_temperedlb.cc | 87 +++++++++++++++++++ 6 files changed, 100 insertions(+) create mode 100644 tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json create mode 100644 tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json create mode 100644 tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json create mode 100644 tests/data/synthetic-blocks/synthetic-dataset-blocks.3.json create mode 100644 tests/unit/lb/test_temperedlb.cc diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index e0d10b060e..8cf9666ada 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -215,6 +215,15 @@ foreach(SUB_DIR ${UNIT_TEST_SUBDIRS_LIST}) endif() endforeach() +# Copy synthetic blocks data files to /tests/synthetic-blocks-data +set(SYNTHETIC_BLOCKS_DATA_DEST "${CMAKE_BINARY_DIR}/tests/synthetic-blocks-data") +file(MAKE_DIRECTORY ${SYNTHETIC_BLOCKS_DATA_DEST}) +file(GLOB SYNTHETIC_BLOCKS_DATA_FILES "${CMAKE_SOURCE_DIR}/tests/data/synthetic-blocks/*") +foreach(SYNTHETIC_BLOCKS_DATA_FILE ${SYNTHETIC_BLOCKS_DATA_FILES}) + get_filename_component(FILE_NAME ${SYNTHETIC_BLOCKS_DATA_FILE} NAME) + configure_file(${SYNTHETIC_BLOCKS_DATA_FILE} ${SYNTHETIC_BLOCKS_DATA_DEST} COPYONLY) +endforeach() + # # Performance Tests # diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json new file mode 100644 index 0000000000..26afaaea2c --- /dev/null +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json @@ -0,0 +1 @@ +{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json new file mode 100644 index 0000000000..160cf422d5 --- /dev/null +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json @@ -0,0 +1 @@ +{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json new file mode 100644 index 0000000000..5b1e88a01d --- /dev/null +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json @@ -0,0 +1 @@ +{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.3.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.3.json new file mode 100644 index 0000000000..f2868aa3bd --- /dev/null +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.3.json @@ -0,0 +1 @@ +{"metadata":{"type":"LBDatafile","rank":3},"phases":[{"id":0,"tasks":[]}]} diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc new file mode 100644 index 0000000000..c54a610d5e --- /dev/null +++ b/tests/unit/lb/test_temperedlb.cc @@ -0,0 +1,87 @@ +#include +#include +#include + +#include "test_helpers.h" +#include "test_parallel_harness.h" + +namespace vt { namespace tests { namespace unit { namespace lb { + +#if vt_check_enabled(lblite) + +using TestTemperedLB = TestParallelHarness; + +std::string writeTemperedLBConfig(std::string transfer_strategy, + bool mem_constraints, + double delta = 0.0, + double beta = 0.0, + double gamma = 0.0) { + int this_rank; + MPI_Comm_rank(MPI_COMM_WORLD, &this_rank); + auto config_file = getUniqueFilename(); + if (this_rank == 0) { + std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; + cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << + " alpha=1.0" << + " beta=" << beta << + " gamma=" << gamma << + " delta=" << delta; + if (mem_constraints) { + cfg_file_ << " memory_threshold=20.0"; + } + cfg_file_.close(); + } + return config_file; +} + +void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { + // Set configuration + theConfig()->vt_lb = true; + theConfig()->vt_lb_name = "TemperedLB"; + theConfig()->vt_lb_data_in = true; + theConfig()->vt_lb_file_name = config_file; + theConfig()->vt_lb_data_file_in="synthetic-dataset-blocks.%p.json"; + theConfig()->vt_lb_data_dir_in="synthetic-blocks-data"; + + // Replay load balancing + int initial_phase = 0; + int phases_to_run = 1; + int phase_mod = 0; + vt::vrt::collection::balance::replay::replayWorkloads( + initial_phase, phases_to_run, phase_mod); + + // Get information for the last phase (this problem only has one) + auto phase_info = theLBManager()->getPhaseInfo(); + + // Assert that temperedLB found the correct imbalance + auto imb = (phase_info->max_load / phase_info->avg_load) - 1; + EXPECT_EQ(imb, expected_imb); + + // Clear the LB config ahead of next test + vrt::collection::balance::ReadLBConfig::clear(); +} + +TEST_F(TestTemperedLB, test_load_only) { + auto cfg = writeTemperedLBConfig("Original", false); + runTemperedLBTest(cfg); +} + +TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { + auto cfg = writeTemperedLBConfig("SwapClusters", true); + runTemperedLBTest(cfg); +} + +TEST_F(TestTemperedLB, test_load_memory_homing_swapclusters) { + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0); + runTemperedLBTest(cfg); +} + +TEST_F(TestTemperedLB, test_load_memory_homing_comms) { + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0, 1.0); + double expected_imbalance = 0.25; // placeholder for value from MILP + runTemperedLBTest(cfg, expected_imbalance); +} + +#endif + +}}}} /* end namespace vt::tests::unit::lb */ From 8bb402b0f1c05a7b2ebdd030135728e4f7d8dc78 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Tue, 13 Aug 2024 16:57:07 -0400 Subject: [PATCH 112/126] #2201: tests: remove trailing whitespace --- tests/unit/lb/test_temperedlb.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index c54a610d5e..3636c08eb3 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -21,10 +21,10 @@ std::string writeTemperedLBConfig(std::string transfer_strategy, auto config_file = getUniqueFilename(); if (this_rank == 0) { std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; - cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << - " alpha=1.0" << + cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << + " alpha=1.0" << " beta=" << beta << - " gamma=" << gamma << + " gamma=" << gamma << " delta=" << delta; if (mem_constraints) { cfg_file_ << " memory_threshold=20.0"; From 7aa6f6e8446b21f6db6628f7456e7e91d6d1cdd2 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Wed, 14 Aug 2024 14:08:03 -0400 Subject: [PATCH 113/126] #2201: tests: avoid calculating imbalance manually --- tests/unit/lb/test_temperedlb.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 3636c08eb3..019c2bd57e 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -29,6 +29,9 @@ std::string writeTemperedLBConfig(std::string transfer_strategy, if (mem_constraints) { cfg_file_ << " memory_threshold=20.0"; } + if (transfer_strategy == "SwapClusters") { + cfg_file_ << " rollback=false"; + } cfg_file_.close(); } return config_file; @@ -37,7 +40,6 @@ std::string writeTemperedLBConfig(std::string transfer_strategy, void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { // Set configuration theConfig()->vt_lb = true; - theConfig()->vt_lb_name = "TemperedLB"; theConfig()->vt_lb_data_in = true; theConfig()->vt_lb_file_name = config_file; theConfig()->vt_lb_data_file_in="synthetic-dataset-blocks.%p.json"; @@ -54,8 +56,7 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { auto phase_info = theLBManager()->getPhaseInfo(); // Assert that temperedLB found the correct imbalance - auto imb = (phase_info->max_load / phase_info->avg_load) - 1; - EXPECT_EQ(imb, expected_imb); + EXPECT_EQ(phase_info->imb_load_post_lb, expected_imb); // Clear the LB config ahead of next test vrt::collection::balance::ReadLBConfig::clear(); @@ -78,7 +79,7 @@ TEST_F(TestTemperedLB, test_load_memory_homing_swapclusters) { TEST_F(TestTemperedLB, test_load_memory_homing_comms) { auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0, 1.0); - double expected_imbalance = 0.25; // placeholder for value from MILP + double expected_imbalance = 0.0; // placeholder for value from MILP runTemperedLBTest(cfg, expected_imbalance); } From 6840a7554cb8728b472505f00b6e1f5b774263d1 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Wed, 14 Aug 2024 14:59:16 -0400 Subject: [PATCH 114/126] #2201: tests: only run tests on four nodes; renamed shared_id to shared_block_id in synthetic-blocks json data" --- tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json | 2 +- tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json | 2 +- tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json | 2 +- tests/unit/lb/test_temperedlb.cc | 4 ++++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json index 26afaaea2c..349ac9d033 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} +{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_block_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json index 160cf422d5..9c4486b8c8 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} +{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_block_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_block_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json index 5b1e88a01d..dc7bd5e4d3 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} +{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_block_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 019c2bd57e..d66bc8161e 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -63,21 +63,25 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { } TEST_F(TestTemperedLB, test_load_only) { + SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("Original", false); runTemperedLBTest(cfg); } TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { + SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("SwapClusters", true); runTemperedLBTest(cfg); } TEST_F(TestTemperedLB, test_load_memory_homing_swapclusters) { + SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0); runTemperedLBTest(cfg); } TEST_F(TestTemperedLB, test_load_memory_homing_comms) { + SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0, 1.0); double expected_imbalance = 0.0; // placeholder for value from MILP runTemperedLBTest(cfg, expected_imbalance); From 84acbb2bc4cb9c789eaa8fd2f3139535c39bee65 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Mon, 19 Aug 2024 14:10:38 -0400 Subject: [PATCH 115/126] #2201: update test cases; restore shared_id key to json data files; add option for SwapClusters without memory constraints --- .../balance/temperedlb/temperedlb.cc | 2 +- .../synthetic-dataset-blocks.0.json | 2 +- .../synthetic-dataset-blocks.1.json | 2 +- .../synthetic-dataset-blocks.2.json | 2 +- tests/unit/lb/test_temperedlb.cc | 26 ++++++++++++++----- 5 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 7311d4daad..a26e8ee362 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -492,7 +492,7 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { terse, temperedlb, "TemperedLB::inputParams: using knowledge={}, fanout={}, rounds={}, " "iters={}, criterion={}, trials={}, deterministic={}, inform={}, " - "ordering={}, cmf={}, rollback={}, targetpole={}\n", + "transfer={}, ordering={}, cmf={}, rollback={}, targetpole={}\n", knowledge_converter_.getString(knowledge_), f_, k_max_, num_iters_, criterion_converter_.getString(criterion_), num_trials_, deterministic_, inform_type_converter_.getString(inform_type_), diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json index 349ac9d033..26afaaea2c 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_block_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} +{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json index 9c4486b8c8..160cf422d5 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_block_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_block_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_block_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} +{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json index dc7bd5e4d3..5b1e88a01d 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_block_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} +{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index d66bc8161e..9dbea33901 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -26,11 +26,13 @@ std::string writeTemperedLBConfig(std::string transfer_strategy, " beta=" << beta << " gamma=" << gamma << " delta=" << delta; - if (mem_constraints) { - cfg_file_ << " memory_threshold=20.0"; - } if (transfer_strategy == "SwapClusters") { cfg_file_ << " rollback=false"; + if (mem_constraints) { + cfg_file_ << " memory_threshold=20.0"; + } else { + cfg_file_ << " memory_threshold=1e8"; + } } cfg_file_.close(); } @@ -64,7 +66,7 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { TEST_F(TestTemperedLB, test_load_only) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("Original", false); + auto cfg = writeTemperedLBConfig("SwapClusters", false); runTemperedLBTest(cfg); } @@ -74,15 +76,27 @@ TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { runTemperedLBTest(cfg); } +TEST_F(TestTemperedLB, test_load_no_memory_delta_10) { + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1.0); + runTemperedLBTest(cfg); +} + +TEST_F(TestTemperedLB, test_load_no_memory_delta_01) { + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 0.1); + runTemperedLBTest(cfg); +} + TEST_F(TestTemperedLB, test_load_memory_homing_swapclusters) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 0.1); runTemperedLBTest(cfg); } TEST_F(TestTemperedLB, test_load_memory_homing_comms) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1.0, 1.0); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 0.1, 1.0); double expected_imbalance = 0.0; // placeholder for value from MILP runTemperedLBTest(cfg, expected_imbalance); } From 29ecfd56986da64ce559646d84e3b93029f68c6f Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Thu, 5 Sep 2024 09:25:54 -0400 Subject: [PATCH 116/126] #2201: wip: fix review comments; add collection_id to synthetic data --- .../balance/temperedlb/temperedlb.cc | 57 ++++++++++++------- .../balance/temperedlb/temperedlb.h | 2 +- .../synthetic-dataset-blocks.0.json | 2 +- .../synthetic-dataset-blocks.1.json | 2 +- .../synthetic-dataset-blocks.2.json | 2 +- tests/unit/lb/test_temperedlb.cc | 7 +-- 6 files changed, 45 insertions(+), 27 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index a26e8ee362..73385921a6 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -257,6 +257,7 @@ Default: true Description: If the final iteration of a trial has a worse imbalance than any earlier iteration, it will roll back to the iteration with the best imbalance. + If transfer_strategy is SwapClusters, rollback is automatically set to false. )" }, { @@ -291,32 +292,32 @@ Description: α in the work model (load in work model) "beta", R"( Values: -Defaut: 1.0 +Defaut: 0.0 Description: β in the work model (inter-node communication in work model) )" }, { - "epsilon", + "gamma", R"( Values: -Defaut: 1.0 -Description: ε in the work model (memory term in work model) +Defaut: 0.0 +Description: γ in the work model (intra-node communication in work model) )" }, { "delta", R"( Values: -Defaut: 1.0 +Defaut: 0.0 Description: δ in the work model (shared-memory-edges in work model) )" }, { - "gamma", + "epsilon", R"( Values: -Defaut: 1.0 -Description: γ in the work model (intra-node communication in work model) +Defaut: infinity +Description: ε in the work model (memory term in work model) )" } }; @@ -456,6 +457,10 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) { ); transfer_type_ = transfer_type_converter_.getFromConfig(config, transfer_type_); + if (transfer_type_ == TransferTypeEnum::SwapClusters) { + rollback_ = false; + } + balance::LBArgsEnumConverter obj_ordering_converter_( "ordering", "ObjectOrderEnum", { {ObjectOrderEnum::Arbitrary, "Arbitrary"}, @@ -1066,10 +1071,22 @@ void TemperedLB::doLBStages(LoadType start_imb) { if (first_iter) { // Copy this node's object assignments to a local, mutable copy cur_objs_.clear(); + int total_num_objs = 0; + int num_migratable_objs = 0; for (auto obj : *load_model_) { - cur_objs_[obj] = getModeledValue(obj); + total_num_objs++; + if (obj.isMigratable()) { + num_migratable_objs++; + cur_objs_[obj] = getModeledValue(obj); + } } + vt_debug_print( + normal, temperedlb, + "TemperedLB::doLBStages: Found {} migratable objects out of {}.\n", + num_migratable_objs, total_num_objs + ); + send_edges_.clear(); recv_edges_.clear(); bool has_comm = false; @@ -1326,12 +1343,14 @@ void TemperedLB::doLBStages(LoadType start_imb) { ); } - auto remote_block_count = getRemoteBlockCountHere(); - runInEpochCollective("TemperedLB::doLBStages -> compute unhomed", [=] { - proxy_.allreduce<&TemperedLB::remoteBlockCountHandler, collective::PlusOp>( - remote_block_count - ); - }); + // Skip this block when not using SwapClusters + if (transfer_type_ == TransferTypeEnum::SwapClusters) { + auto remote_block_count = getRemoteBlockCountHere(); + runInEpochCollective("TemperedLB::doLBStages -> compute unhomed", [=] { + proxy_.allreduce<&TemperedLB::remoteBlockCountHandler, + collective::PlusOp>(remote_block_count); + }); + } } else if (this_node == 0) { vt_debug_print( terse, temperedlb, @@ -2269,7 +2288,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { try_new_mem += src_cluster.cluster_footprint; if (try_new_mem > mem_thresh_) { - return - std::numeric_limits::infinity(); + return - epsilon; } BytesType src_new_mem = current_memory_usage_; @@ -2289,7 +2308,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { src_new_mem -= src_cluster.cluster_footprint; if (src_new_mem > mem_thresh_) { - return - std::numeric_limits::infinity(); + return - epsilon; } double const src_new_work = @@ -2596,12 +2615,12 @@ void TemperedLB::swapClusters() { // Necessary but not sufficient check regarding memory bounds if (try_mem - try_cluster.bytes + src_cluster.bytes > mem_thresh_) { - return - std::numeric_limits::infinity(); + return - epsilon; } auto const src_mem = current_memory_usage_; if (src_mem + try_cluster.bytes - src_cluster.bytes > mem_thresh_) { - return - std::numeric_limits::infinity(); + return - epsilon; } auto const& try_info = load_info_.find(try_rank)->second; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h index b30cd66499..f28084973f 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.h +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.h @@ -417,7 +417,7 @@ struct TemperedLB : BaseLB { double beta = 0.0; double gamma = 0.0; double delta = 0.0; - double epsilon = 0.0; + double epsilon = std::numeric_limits::infinity(); std::vector propagated_k_; std::mt19937 gen_propagate_; std::mt19937 gen_sample_; diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json index 26afaaea2c..792f750954 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} +{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json index 160cf422d5..99f476a215 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} +{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json index 5b1e88a01d..a09b3dba90 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json @@ -1 +1 @@ -{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} +{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"collection_id":7,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 9dbea33901..7496df99a9 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -27,7 +27,6 @@ std::string writeTemperedLBConfig(std::string transfer_strategy, " gamma=" << gamma << " delta=" << delta; if (transfer_strategy == "SwapClusters") { - cfg_file_ << " rollback=false"; if (mem_constraints) { cfg_file_ << " memory_threshold=20.0"; } else { @@ -40,6 +39,9 @@ std::string writeTemperedLBConfig(std::string transfer_strategy, } void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { + // Clear the LB config + vrt::collection::balance::ReadLBConfig::clear(); + // Set configuration theConfig()->vt_lb = true; theConfig()->vt_lb_data_in = true; @@ -59,9 +61,6 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { // Assert that temperedLB found the correct imbalance EXPECT_EQ(phase_info->imb_load_post_lb, expected_imb); - - // Clear the LB config ahead of next test - vrt::collection::balance::ReadLBConfig::clear(); } TEST_F(TestTemperedLB, test_load_only) { From 31472d497bd73e8afa0313e95ac1f066102d3726 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Wed, 11 Sep 2024 14:00:14 -0400 Subject: [PATCH 117/126] #2201: loosen strict inequalities for criterion; remove epsilon from computeWork --- .../collection/balance/temperedlb/criterion.h | 2 +- .../balance/temperedlb/temperedlb.cc | 18 +- .../synthetic-dataset-blocks.0.json | 160 +++++++++++++++++- .../synthetic-dataset-blocks.1.json | 141 ++++++++++++++- .../synthetic-dataset-blocks.2.json | 52 +++++- tests/unit/lb/test_temperedlb.cc | 13 +- 6 files changed, 370 insertions(+), 16 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/criterion.h b/src/vt/vrt/collection/balance/temperedlb/criterion.h index 42e8b7befe..dfbc79f380 100644 --- a/src/vt/vrt/collection/balance/temperedlb/criterion.h +++ b/src/vt/vrt/collection/balance/temperedlb/criterion.h @@ -63,7 +63,7 @@ struct GrapevineCriterion { struct ModifiedGrapevineCriterion { bool operator()(LoadType over, LoadType under, LoadType obj, LoadType) const { - return obj < over - under; + return obj <= over - under; } }; diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index 73385921a6..e809ec3204 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -193,7 +193,7 @@ Default: Original { "ordering", R"( -Values: {Arbitrary, ElmID, FewestMigrations, SmallObject, LargestObjects} +Values: {Arbitrary, ElmID, FewestMigrations, SmallObjects, LargestObjects} Default: FewestMigrations Description: The order in which local objects are considered for transfer. Options are: @@ -860,8 +860,8 @@ double TemperedLB::computeWork( alpha * load + beta * inter_comm_bytes + gamma * intra_comm_bytes + - delta * shared_comm_bytes + - epsilon; + delta * shared_comm_bytes; + // epsilon; } WorkBreakdown TemperedLB::computeWorkBreakdown( @@ -1894,7 +1894,7 @@ std::vector TemperedLB::orderObjects( auto single_obj_load = this_new_load; for (auto &obj : cur_objs) { auto obj_load = obj.second; - if (obj_load > over_avg && obj_load < single_obj_load) { + if (obj_load >= over_avg && obj_load < single_obj_load) { single_obj_load = obj_load; } } @@ -2340,7 +2340,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { try_rank, try_info, try_total_bytes, try_max_owm, try_max_osm, src_cluster, empty_cluster ); - if (c_try > 0.0) { + if (c_try >= 0.0) { if (c_try > best_c_try) { best_c_try = c_try; best_swap = std::make_tuple(src_shared_id, no_shared_id); @@ -2358,7 +2358,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { "testing a possible swap (rank {}): {} {} c_try={}\n", try_rank, src_shared_id, try_shared_id, c_try ); - if (c_try > 0.0) { + if (c_try >= 0.0) { if (c_try > best_c_try) { best_c_try = c_try; best_swap = std::make_tuple(src_shared_id, try_shared_id); @@ -2367,7 +2367,7 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr msg) { } } - if (best_c_try > 0) { + if (best_c_try >= 0) { // FIXME C++20: use structured binding auto const src_shared_id = std::get<0>(best_swap); auto const try_shared_id = std::get<1>(best_swap); @@ -2653,7 +2653,7 @@ void TemperedLB::swapClusters() { { ClusterInfo empty_cluster; double c_try = criterion(try_rank, try_mem, src_cluster, empty_cluster); - if (c_try > 0.0) { + if (c_try >= 0.0) { // Try to obtain lock for feasible swap found_potential_good_swap = true; proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try); @@ -2665,7 +2665,7 @@ void TemperedLB::swapClusters() { for (auto const& [try_shared_id, try_cluster] : try_clusters) { // Decide whether swap is beneficial double c_try = criterion(try_rank, try_mem, src_cluster, try_cluster); - if (c_try > 0.0) { + if (c_try >= 0.0) { // Try to obtain lock for feasible swap found_potential_good_swap = true; proxy_[try_rank].template send<&TemperedLB::tryLock>(this_node, c_try); diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json index 792f750954..83cd135ae1 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.0.json @@ -1 +1,159 @@ -{"metadata":{"type":"LBDatafile","rank":0},"phases":[{"id":0,"tasks":[{"entity":{"home":0,"id":1,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":3,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":2,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":0.5,"user_defined":{"shared_id":1,"shared_bytes":9.0,"home_rank":0}},{"entity":{"home":0,"id":0,"migratable":true,"collection_id":7,"type":"object"},"node":0,"resource":"cpu","time":1.0,"user_defined":{"shared_id":0,"shared_bytes":9.0,"home_rank":0}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":5},"messages":1,"from":{"type":"object","id":0},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":4},"messages":1,"from":{"type":"object","id":1},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":2},"messages":1,"from":{"type":"object","id":3},"bytes":1.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":3},"bytes":0.5}]}]} +{ + "metadata": { + "rank": 0, + "type": "LBDatafile" + }, + "phases": [ + { + "communications": [ + { + "bytes": 2.0, + "from": { + "collection_id": 7, + "home": 0, + "seq_id": 0, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 1, + "seq_id": 5, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + }, + { + "bytes": 1.0, + "from": { + "collection_id": 7, + "home": 0, + "seq_id": 1, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 1, + "seq_id": 4, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + }, + { + "bytes": 1.0, + "from": { + "collection_id": 7, + "home": 0, + "seq_id": 3, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 0, + "seq_id": 2, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + }, + { + "bytes": 0.5, + "from": { + "collection_id": 7, + "home": 0, + "seq_id": 3, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 2, + "seq_id": 8, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + } + ], + "id": 0, + "tasks": [ + { + "entity": { + "collection_id": 7, + "home": 0, + "seq_id": 1, + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 0.5, + "user_defined": { + "home_rank": 0, + "shared_bytes": 9.0, + "shared_id": 0 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "seq_id": 3, + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 0.5, + "user_defined": { + "home_rank": 0, + "shared_bytes": 9.0, + "shared_id": 1 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "seq_id": 2, + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 0.5, + "user_defined": { + "home_rank": 0, + "shared_bytes": 9.0, + "shared_id": 1 + } + }, + { + "entity": { + "collection_id": 7, + "home": 0, + "seq_id": 0, + "migratable": true, + "type": "object" + }, + "node": 0, + "resource": "cpu", + "time": 1.0, + "user_defined": { + "home_rank": 0, + "shared_bytes": 9.0, + "shared_id": 0 + } + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json index 99f476a215..050fd1b1a5 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.1.json @@ -1 +1,140 @@ -{"metadata":{"type":"LBDatafile","rank":1},"phases":[{"id":0,"tasks":[{"entity":{"home":1,"id":5,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":2.0,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":4,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":2,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":7,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":0.5,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}},{"entity":{"home":1,"id":6,"migratable":true,"collection_id":7,"type":"object"},"node":1,"resource":"cpu","time":1.0,"user_defined":{"shared_id":3,"shared_bytes":9.0,"home_rank":1}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":1},"messages":1,"from":{"type":"object","id":4},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":8},"messages":1,"from":{"type":"object","id":5},"bytes":2.0},{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":7},"bytes":1.0}]}]} +{ + "metadata": { + "rank": 1, + "type": "LBDatafile" + }, + "phases": [ + { + "communications": [ + { + "bytes": 2.0, + "from": { + "collection_id": 7, + "home": 1, + "seq_id": 4, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 0, + "seq_id": 1, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + }, + { + "bytes": 2.0, + "from": { + "collection_id": 7, + "home": 1, + "seq_id": 5, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 2, + "seq_id": 8, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + }, + { + "bytes": 1.0, + "from": { + "collection_id": 7, + "home": 1, + "seq_id": 7, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 1, + "seq_id": 6, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + } + ], + "id": 0, + "tasks": [ + { + "entity": { + "collection_id": 7, + "home": 1, + "seq_id": 5, + "migratable": true, + "type": "object" + }, + "node": 1, + "resource": "cpu", + "time": 2.0, + "user_defined": { + "home_rank": 1, + "shared_bytes": 9.0, + "shared_id": 2 + } + }, + { + "entity": { + "collection_id": 7, + "home": 1, + "seq_id": 4, + "migratable": true, + "type": "object" + }, + "node": 1, + "resource": "cpu", + "time": 0.5, + "user_defined": { + "home_rank": 1, + "shared_bytes": 9.0, + "shared_id": 2 + } + }, + { + "entity": { + "collection_id": 7, + "home": 1, + "seq_id": 7, + "migratable": true, + "type": "object" + }, + "node": 1, + "resource": "cpu", + "time": 0.5, + "user_defined": { + "home_rank": 1, + "shared_bytes": 9.0, + "shared_id": 3 + } + }, + { + "entity": { + "collection_id": 7, + "home": 1, + "seq_id": 6, + "migratable": true, + "type": "object" + }, + "node": 1, + "resource": "cpu", + "time": 1.0, + "user_defined": { + "home_rank": 1, + "shared_bytes": 9.0, + "shared_id": 3 + } + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json index a09b3dba90..39dfa10e8c 100644 --- a/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json +++ b/tests/data/synthetic-blocks/synthetic-dataset-blocks.2.json @@ -1 +1,51 @@ -{"metadata":{"type":"LBDatafile","rank":2},"phases":[{"id":0,"tasks":[{"entity":{"home":2,"id":8,"migratable":true,"collection_id":7,"type":"object"},"node":2,"resource":"cpu","time":1.5,"user_defined":{"shared_id":4,"shared_bytes":9.0,"home_rank":2}}],"communications":[{"type":"SendRecv","to":{"type":"object","id":6},"messages":1,"from":{"type":"object","id":8},"bytes":1.5}]}]} +{ + "metadata": { + "rank": 2, + "type": "LBDatafile" + }, + "phases": [ + { + "communications": [ + { + "bytes": 1.5, + "from": { + "collection_id": 7, + "home": 2, + "seq_id": 8, + "migratable": true, + "type": "object" + }, + "messages": 1, + "to": { + "collection_id": 7, + "home": 1, + "seq_id": 6, + "migratable": true, + "type": "object" + }, + "type": "SendRecv" + } + ], + "id": 0, + "tasks": [ + { + "entity": { + "collection_id": 7, + "home": 2, + "seq_id": 8, + "migratable": true, + "type": "object" + }, + "node": 2, + "resource": "cpu", + "time": 1.5, + "user_defined": { + "home_rank": 2, + "shared_bytes": 9.0, + "shared_id": 4 + } + } + ] + } + ] +} \ No newline at end of file diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 7496df99a9..f8427e0b5a 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -63,16 +63,23 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { EXPECT_EQ(phase_info->imb_load_post_lb, expected_imb); } -TEST_F(TestTemperedLB, test_load_only) { +TEST_F(TestTemperedLB, test_load_only_original_transfer) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false); + auto cfg = writeTemperedLBConfig("Original", false); runTemperedLBTest(cfg); } +TEST_F(TestTemperedLB, test_load_only_swapclusters) { + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false); + // Expect 0.25 in this case because vt does not subcluster + runTemperedLBTest(cfg, 0.25); +} + TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("SwapClusters", true); - runTemperedLBTest(cfg); + runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_no_memory_delta_10) { From c5a4a8f2f7eb406951bab76837f31c4786b69a7d Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Thu, 12 Sep 2024 14:26:19 -0400 Subject: [PATCH 118/126] #2201: add test for delta=0.3 --- tests/unit/lb/test_temperedlb.cc | 43 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index f8427e0b5a..91977559c3 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -13,16 +13,17 @@ using TestTemperedLB = TestParallelHarness; std::string writeTemperedLBConfig(std::string transfer_strategy, bool mem_constraints, - double delta = 0.0, + double alpha = 1.0, double beta = 0.0, - double gamma = 0.0) { + double gamma = 0.0, + double delta = 0.0) { int this_rank; MPI_Comm_rank(MPI_COMM_WORLD, &this_rank); auto config_file = getUniqueFilename(); if (this_rank == 0) { std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << - " alpha=1.0" << + " alpha=" << alpha << " beta=" << beta << " gamma=" << gamma << " delta=" << delta; @@ -56,13 +57,15 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { vt::vrt::collection::balance::replay::replayWorkloads( initial_phase, phases_to_run, phase_mod); - // Get information for the last phase (this problem only has one) + // Get information for the last phase auto phase_info = theLBManager()->getPhaseInfo(); // Assert that temperedLB found the correct imbalance EXPECT_EQ(phase_info->imb_load_post_lb, expected_imb); } +// The following tests use expected values found by the MILP + TEST_F(TestTemperedLB, test_load_only_original_transfer) { SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("Original", false); @@ -72,7 +75,6 @@ TEST_F(TestTemperedLB, test_load_only_original_transfer) { TEST_F(TestTemperedLB, test_load_only_swapclusters) { SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("SwapClusters", false); - // Expect 0.25 in this case because vt does not subcluster runTemperedLBTest(cfg, 0.25); } @@ -84,27 +86,38 @@ TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { TEST_F(TestTemperedLB, test_load_no_memory_delta_10) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1.0); - runTemperedLBTest(cfg); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 1); + runTemperedLBTest(cfg, 1.0); } TEST_F(TestTemperedLB, test_load_no_memory_delta_01) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 0.1); - runTemperedLBTest(cfg); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.1); + runTemperedLBTest(cfg, 0.25); } -TEST_F(TestTemperedLB, test_load_memory_homing_swapclusters) { +TEST_F(TestTemperedLB, test_load_memory_delta_01) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 0.1); - runTemperedLBTest(cfg); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.1); + runTemperedLBTest(cfg, 0.25); +} + +TEST_F(TestTemperedLB, test_load_no_memory_delta_03) { + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.3); + runTemperedLBTest(cfg, 1.0); +} + +TEST_F(TestTemperedLB, test_load_memory_delta_03) { + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.3); + runTemperedLBTest(cfg, 1.0); } TEST_F(TestTemperedLB, test_load_memory_homing_comms) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 0.1, 1.0); - double expected_imbalance = 0.0; // placeholder for value from MILP - runTemperedLBTest(cfg, expected_imbalance); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 1, 0, 0.1); + runTemperedLBTest(cfg, 0.25); } #endif From 562ccbba4510dff14e738b2e793c2e4d2ddb1d3d Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Thu, 12 Sep 2024 16:44:11 -0400 Subject: [PATCH 119/126] #2201: remove comms test for now --- tests/unit/lb/test_temperedlb.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 91977559c3..985da6940d 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -114,12 +114,6 @@ TEST_F(TestTemperedLB, test_load_memory_delta_03) { runTemperedLBTest(cfg, 1.0); } -TEST_F(TestTemperedLB, test_load_memory_homing_comms) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 1, 0, 0.1); - runTemperedLBTest(cfg, 0.25); -} - #endif }}}} /* end namespace vt::tests::unit::lb */ From 26ba0e601988fcd17e96bf739addb3700ae568a8 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Fri, 13 Sep 2024 11:01:58 -0400 Subject: [PATCH 120/126] #2201: remove commented out epsilon --- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index e809ec3204..dcb58737dd 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -855,13 +855,12 @@ double TemperedLB::computeWork( double load, double inter_comm_bytes, double intra_comm_bytes, double shared_comm_bytes ) const { - // The work model based on input parameters + // The work model based on input parameters (excluding epsilon) return alpha * load + beta * inter_comm_bytes + gamma * intra_comm_bytes + delta * shared_comm_bytes; - // epsilon; } WorkBreakdown TemperedLB::computeWorkBreakdown( From e5a8e11244d3cc70ee4ed531ec12a748fb67a11c Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Fri, 13 Sep 2024 12:32:38 -0400 Subject: [PATCH 121/126] #2201: fix bug in schema; require collection_id for migratable objects --- scripts/JSON_data_files_validator.py | 9 +++++---- scripts/LBDatafile_schema.py | 17 ++++++++++++----- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/scripts/JSON_data_files_validator.py b/scripts/JSON_data_files_validator.py index 72a5c51c38..a14c083fc6 100644 --- a/scripts/JSON_data_files_validator.py +++ b/scripts/JSON_data_files_validator.py @@ -434,13 +434,14 @@ def validate_comm_links(all_jsons): for data in all_jsons: tasks = data["phases"][n]["tasks"] - id_key = "id" if "id" in tasks[0]["entity"] else "seq_id" - task_ids.update({int(task["entity"][id_key]) for task in tasks}) + task_ids.update( + {int(task["entity"].get("id", task["entity"].get("seq_id"))) for task in tasks} + ) if data["phases"][n].get("communications") is not None: comms = data["phases"][n]["communications"] - comm_ids.update({int(comm["from"][id_key]) for comm in comms}) - comm_ids.update({int(comm["to"][id_key]) for comm in comms}) + comm_ids.update({int(comm["from"].get("id", comm["from"].get("seq_id"))) for comm in comms}) + comm_ids.update({int(comm["to"].get("id", comm["to"].get("seq_id"))) for comm in comms}) if not comm_ids.issubset(task_ids): logging.error( diff --git a/scripts/LBDatafile_schema.py b/scripts/LBDatafile_schema.py index 743fff574e..d11b64fa0e 100644 --- a/scripts/LBDatafile_schema.py +++ b/scripts/LBDatafile_schema.py @@ -1,9 +1,16 @@ from schema import And, Optional, Schema -def validate_id_and_seq_id(field): - """Ensure that either seq_id or id is provided.""" +def validate_ids(field): + """ + Ensure that 1) either seq_id or id is provided, + and 2) if an object is migratable, collection_id has been set. + """ if 'seq_id' not in field and 'id' not in field: raise ValueError('Either id (bit-encoded) or seq_id must be provided.') + + if field['migratable'] and 'collection_id' not in field: + raise ValueError('If an entity is migratable, it must have a collection_id') + return field LBDatafile_schema = Schema( @@ -45,7 +52,7 @@ def validate_id_and_seq_id(field): 'type': str, 'migratable': bool, Optional('objgroup_id'): int - }, validate_id_and_seq_id), + }, validate_ids), 'node': int, 'resource': str, Optional('subphases'): [ @@ -71,7 +78,7 @@ def validate_id_and_seq_id(field): Optional('migratable'): bool, Optional('index'): [int], Optional('objgroup_id'): int, - }, validate_id_and_seq_id), + }, validate_ids), 'messages': int, 'from': And({ 'type': str, @@ -82,7 +89,7 @@ def validate_id_and_seq_id(field): Optional('migratable'): bool, Optional('index'): [int], Optional('objgroup_id'): int, - }, validate_id_and_seq_id), + }, validate_ids), 'bytes': float } ], From d216c4b642e7aa8b3f64a9e473d66c88c037cad1 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Fri, 13 Sep 2024 14:02:31 -0400 Subject: [PATCH 122/126] #2201: add collection_id and index to initialization test --- tests/unit/runtime/test_initialization.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/runtime/test_initialization.cc b/tests/unit/runtime/test_initialization.cc index 7f2a923da4..465683f226 100644 --- a/tests/unit/runtime/test_initialization.cc +++ b/tests/unit/runtime/test_initialization.cc @@ -541,6 +541,9 @@ void prepareLBDataFiles(const std::string file_name_without_ext) { for (PhaseType i = 0; i < num_phases; i++) { for (auto&& elm : ids[i]) { dh.node_data_[i][elm] = LoadSummary{3}; + std::vector arr = {1}; + VirtualProxyType proxy = 7; + dh.node_idx_[elm] = std::make_tuple(proxy, arr); } } From 894ea64f8b4a22e2203328e75b87227009ffe729 Mon Sep 17 00:00:00 2001 From: Jonathan Lifflander Date: Fri, 13 Sep 2024 15:12:59 -0700 Subject: [PATCH 123/126] #2201: tests: reformat to follow style guidlines and using theContext --- tests/unit/lb/test_temperedlb.cc | 140 +++++++++++++++---------------- 1 file changed, 69 insertions(+), 71 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 985da6940d..1ec5950acf 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -11,107 +11,105 @@ namespace vt { namespace tests { namespace unit { namespace lb { using TestTemperedLB = TestParallelHarness; -std::string writeTemperedLBConfig(std::string transfer_strategy, - bool mem_constraints, - double alpha = 1.0, - double beta = 0.0, - double gamma = 0.0, - double delta = 0.0) { - int this_rank; - MPI_Comm_rank(MPI_COMM_WORLD, &this_rank); - auto config_file = getUniqueFilename(); - if (this_rank == 0) { - std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; - cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << - " alpha=" << alpha << - " beta=" << beta << - " gamma=" << gamma << - " delta=" << delta; - if (transfer_strategy == "SwapClusters") { - if (mem_constraints) { - cfg_file_ << " memory_threshold=20.0"; - } else { - cfg_file_ << " memory_threshold=1e8"; - } +std::string writeTemperedLBConfig( + std::string transfer_strategy, bool mem_constraints, double alpha = 1.0, + double beta = 0.0, double gamma = 0.0, double delta = 0.0 +) { + auto const this_node = theContext()->getNode(); + auto config_file = getUniqueFilename(); + if (this_node == 0) { + std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; + cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << + " alpha=" << alpha << + " beta=" << beta << + " gamma=" << gamma << + " delta=" << delta; + if (transfer_strategy == "SwapClusters") { + if (mem_constraints) { + cfg_file_ << " memory_threshold=20.0"; + } else { + cfg_file_ << " memory_threshold=1e8"; + } } - cfg_file_.close(); - } - return config_file; + cfg_file_.close(); + } + return config_file; } void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { - // Clear the LB config - vrt::collection::balance::ReadLBConfig::clear(); - - // Set configuration - theConfig()->vt_lb = true; - theConfig()->vt_lb_data_in = true; - theConfig()->vt_lb_file_name = config_file; - theConfig()->vt_lb_data_file_in="synthetic-dataset-blocks.%p.json"; - theConfig()->vt_lb_data_dir_in="synthetic-blocks-data"; - - // Replay load balancing - int initial_phase = 0; - int phases_to_run = 1; - int phase_mod = 0; - vt::vrt::collection::balance::replay::replayWorkloads( - initial_phase, phases_to_run, phase_mod); - - // Get information for the last phase - auto phase_info = theLBManager()->getPhaseInfo(); - - // Assert that temperedLB found the correct imbalance - EXPECT_EQ(phase_info->imb_load_post_lb, expected_imb); + // Clear the LB config + vrt::collection::balance::ReadLBConfig::clear(); + + // Set configuration + theConfig()->vt_lb = true; + theConfig()->vt_lb_data_in = true; + theConfig()->vt_lb_file_name = config_file; + theConfig()->vt_lb_data_file_in="synthetic-dataset-blocks.%p.json"; + theConfig()->vt_lb_data_dir_in="synthetic-blocks-data"; + + // Replay load balancing + int initial_phase = 0; + int phases_to_run = 1; + int phase_mod = 0; + vt::vrt::collection::balance::replay::replayWorkloads( + initial_phase, phases_to_run, phase_mod + ); + + // Get information for the last phase + auto phase_info = theLBManager()->getPhaseInfo(); + + // Assert that temperedLB found the correct imbalance + EXPECT_EQ(phase_info->imb_load_post_lb, expected_imb); } // The following tests use expected values found by the MILP TEST_F(TestTemperedLB, test_load_only_original_transfer) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("Original", false); - runTemperedLBTest(cfg); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("Original", false); + runTemperedLBTest(cfg); } TEST_F(TestTemperedLB, test_load_only_swapclusters) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false); - runTemperedLBTest(cfg, 0.25); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false); + runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true); - runTemperedLBTest(cfg, 0.25); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", true); + runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_no_memory_delta_10) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 1); - runTemperedLBTest(cfg, 1.0); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 1); + runTemperedLBTest(cfg, 1.0); } TEST_F(TestTemperedLB, test_load_no_memory_delta_01) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.1); - runTemperedLBTest(cfg, 0.25); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.1); + runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_memory_delta_01) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.1); - runTemperedLBTest(cfg, 0.25); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.1); + runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_no_memory_delta_03) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.3); - runTemperedLBTest(cfg, 1.0); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.3); + runTemperedLBTest(cfg, 1.0); } TEST_F(TestTemperedLB, test_load_memory_delta_03) { - SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.3); - runTemperedLBTest(cfg, 1.0); + SET_NUM_NODES_CONSTRAINT(4); + auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.3); + runTemperedLBTest(cfg, 1.0); } #endif From 8504c33579273187c7a6b08d3e6166bc76c51c71 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Mon, 16 Sep 2024 16:22:54 -0400 Subject: [PATCH 124/126] #2201: fix remaining review comments; loosen collection_id requirement in schema --- scripts/LBDatafile_schema.py | 2 +- src/vt/vrt/collection/balance/temperedlb/temperedlb.cc | 8 -------- tests/unit/collection/test_lb.extended.cc | 4 ++-- tests/unit/lb/test_temperedlb.cc | 6 +++--- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/scripts/LBDatafile_schema.py b/scripts/LBDatafile_schema.py index d11b64fa0e..9dca0276c5 100644 --- a/scripts/LBDatafile_schema.py +++ b/scripts/LBDatafile_schema.py @@ -8,7 +8,7 @@ def validate_ids(field): if 'seq_id' not in field and 'id' not in field: raise ValueError('Either id (bit-encoded) or seq_id must be provided.') - if field['migratable'] and 'collection_id' not in field: + if field['migratable'] and 'seq_id' in field and 'collection_id' not in field: raise ValueError('If an entity is migratable, it must have a collection_id') return field diff --git a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc index dcb58737dd..b0d73651ce 100644 --- a/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc +++ b/src/vt/vrt/collection/balance/temperedlb/temperedlb.cc @@ -2222,14 +2222,6 @@ auto TemperedLB::removeClusterToSend( ); } -bool TemperedLB::memoryTransferCriterion(double try_total_bytes, double src_bytes) { - // FIXME: incomplete implementation that ignores memory regrouping - auto const src_after_mem = this->current_memory_usage_; - auto const try_after_mem = try_total_bytes + src_bytes; - - return not (src_after_mem > this->mem_thresh_ or try_after_mem > this->mem_thresh_); -} - double TemperedLB::loadTransferCriterion( double before_w_src, double before_w_dst, double after_w_src, double after_w_dst diff --git a/tests/unit/collection/test_lb.extended.cc b/tests/unit/collection/test_lb.extended.cc index 626b31d764..15511d158b 100644 --- a/tests/unit/collection/test_lb.extended.cc +++ b/tests/unit/collection/test_lb.extended.cc @@ -152,7 +152,7 @@ TEST_P(TestLoadBalancerOther, test_load_balancer_other_keep_last_elm) { TEST_P(TestLoadBalancerOther, test_load_balancer_other_run_lb_first_phase) { vt::theConfig()->vt_lb_run_lb_first_phase = true; - runTest(GetParam(), "test_load_balancer_other_keep_last_elm"); + runTest(GetParam(), "test_load_balancer_other_run_lb_first_phase"); } TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_2) { @@ -166,7 +166,7 @@ TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_keep_last_elm) { TEST_P(TestLoadBalancerGreedy, test_load_balancer_greedy_run_lb_first_phase) { vt::theConfig()->vt_lb_run_lb_first_phase = true; - runTest(GetParam(), "test_load_balancer_greedy_keep_last_elm"); + runTest(GetParam(), "test_load_balancer_greedy_run_lb_first_phase"); } TEST_F(TestLoadBalancerOther, test_make_graph_symmetric) { diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 1ec5950acf..a8f3cb3788 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -19,7 +19,7 @@ std::string writeTemperedLBConfig( auto config_file = getUniqueFilename(); if (this_node == 0) { std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; - cfg_file_ << "0 TemperedLB transfer=" << transfer_strategy << + cfg_file_ << "0 TemperedLB iters=10 transfer=" << transfer_strategy << " alpha=" << alpha << " beta=" << beta << " gamma=" << gamma << @@ -30,7 +30,7 @@ std::string writeTemperedLBConfig( } else { cfg_file_ << " memory_threshold=1e8"; } - } + } cfg_file_.close(); } return config_file; @@ -64,7 +64,7 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { // The following tests use expected values found by the MILP -TEST_F(TestTemperedLB, test_load_only_original_transfer) { +TEST_F(TestTemperedLB, test_load_only_original) { SET_NUM_NODES_CONSTRAINT(4); auto cfg = writeTemperedLBConfig("Original", false); runTemperedLBTest(cfg); From 3a7d7073fa85d2dcf17cdb0d7fd7522bc6d5aeba Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Mon, 16 Sep 2024 16:32:32 -0400 Subject: [PATCH 125/126] #2201: pass memory_threshold to config generator --- tests/unit/lb/test_temperedlb.cc | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index a8f3cb3788..5c591fd82d 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -12,7 +12,7 @@ namespace vt { namespace tests { namespace unit { namespace lb { using TestTemperedLB = TestParallelHarness; std::string writeTemperedLBConfig( - std::string transfer_strategy, bool mem_constraints, double alpha = 1.0, + std::string transfer_strategy, double memory_threshold, double alpha = 1.0, double beta = 0.0, double gamma = 0.0, double delta = 0.0 ) { auto const this_node = theContext()->getNode(); @@ -25,11 +25,7 @@ std::string writeTemperedLBConfig( " gamma=" << gamma << " delta=" << delta; if (transfer_strategy == "SwapClusters") { - if (mem_constraints) { - cfg_file_ << " memory_threshold=20.0"; - } else { - cfg_file_ << " memory_threshold=1e8"; - } + cfg_file_ << " memory_threshold=" << memory_threshold; } cfg_file_.close(); } @@ -66,49 +62,49 @@ void runTemperedLBTest(std::string config_file, double expected_imb = 0.0) { TEST_F(TestTemperedLB, test_load_only_original) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("Original", false); + auto cfg = writeTemperedLBConfig("Original", 1e8); runTemperedLBTest(cfg); } TEST_F(TestTemperedLB, test_load_only_swapclusters) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false); + auto cfg = writeTemperedLBConfig("SwapClusters", 1e8); runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_and_memory_swapclusters) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true); + auto cfg = writeTemperedLBConfig("SwapClusters", 20); runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_no_memory_delta_10) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 1); + auto cfg = writeTemperedLBConfig("SwapClusters", 1e8, 1, 0, 0, 1); runTemperedLBTest(cfg, 1.0); } TEST_F(TestTemperedLB, test_load_no_memory_delta_01) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.1); + auto cfg = writeTemperedLBConfig("SwapClusters", 1e8, 1, 0, 0, 0.1); runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_memory_delta_01) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.1); + auto cfg = writeTemperedLBConfig("SwapClusters", 20, 1, 0, 0, 0.1); runTemperedLBTest(cfg, 0.25); } TEST_F(TestTemperedLB, test_load_no_memory_delta_03) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", false, 1, 0, 0, 0.3); + auto cfg = writeTemperedLBConfig("SwapClusters", 1e8, 1, 0, 0, 0.3); runTemperedLBTest(cfg, 1.0); } TEST_F(TestTemperedLB, test_load_memory_delta_03) { SET_NUM_NODES_CONSTRAINT(4); - auto cfg = writeTemperedLBConfig("SwapClusters", true, 1, 0, 0, 0.3); + auto cfg = writeTemperedLBConfig("SwapClusters", 20, 1, 0, 0, 0.3); runTemperedLBTest(cfg, 1.0); } From a83c66a62ec75a8d203fc5c6be5678405be63b0c Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Tue, 17 Sep 2024 09:37:54 -0400 Subject: [PATCH 126/126] #2201: run tests with three trials --- tests/unit/lb/test_temperedlb.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/lb/test_temperedlb.cc b/tests/unit/lb/test_temperedlb.cc index 5c591fd82d..bb9dd2e652 100644 --- a/tests/unit/lb/test_temperedlb.cc +++ b/tests/unit/lb/test_temperedlb.cc @@ -19,7 +19,7 @@ std::string writeTemperedLBConfig( auto config_file = getUniqueFilename(); if (this_node == 0) { std::ofstream cfg_file_{config_file.c_str(), std::ofstream::out | std::ofstream::trunc}; - cfg_file_ << "0 TemperedLB iters=10 transfer=" << transfer_strategy << + cfg_file_ << "0 TemperedLB iters=10 trials=3 transfer=" << transfer_strategy << " alpha=" << alpha << " beta=" << beta << " gamma=" << gamma <<