From 74af51181f9f0ea0f81567a47920a22f8343ef61 Mon Sep 17 00:00:00 2001 From: Caleb Schilly Date: Fri, 10 Jan 2025 12:16:14 -0500 Subject: [PATCH] #2388: add lb_stats_freq flag and mark output with a trace event --- src/vt/collective/collective_ops.cc | 1 + src/vt/configs/arguments/app_config.h | 2 ++ src/vt/configs/arguments/args.cc | 6 ++++++ src/vt/runtime/runtime_banner.cc | 5 +++++ .../collection/balance/lb_invoke/lb_manager.cc | 15 +++++++++++++-- tests/unit/runtime/test_initialization.cc | 1 + 6 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/vt/collective/collective_ops.cc b/src/vt/collective/collective_ops.cc index 83e03ce9dc..09d897abc5 100644 --- a/src/vt/collective/collective_ops.cc +++ b/src/vt/collective/collective_ops.cc @@ -140,6 +140,7 @@ void printOverwrittens( printIfOverwritten(vt_lb_statistics_compress); printIfOverwritten(vt_lb_statistics_file); printIfOverwritten(vt_lb_statistics_dir); + printIfOverwritten(vt_lb_statistics_freq); printIfOverwritten(vt_lb_self_migration); printIfOverwritten(vt_help_lb_args); printIfOverwritten(vt_no_detect_hang); diff --git a/src/vt/configs/arguments/app_config.h b/src/vt/configs/arguments/app_config.h index 6548dc2fd1..e165cf6383 100644 --- a/src/vt/configs/arguments/app_config.h +++ b/src/vt/configs/arguments/app_config.h @@ -156,6 +156,7 @@ struct AppConfig { bool vt_lb_statistics_compress = true; std::string vt_lb_statistics_file = "vt_lb_statistics.%t.json"; std::string vt_lb_statistics_dir = ""; + int64_t vt_lb_statistics_freq = 100; bool vt_help_lb_args = false; bool vt_lb_self_migration = false; bool vt_lb_spec = false; @@ -334,6 +335,7 @@ struct AppConfig { | vt_lb_statistics_compress | vt_lb_statistics_file | vt_lb_statistics_dir + | vt_lb_statistics_freq | vt_help_lb_args | vt_lb_self_migration diff --git a/src/vt/configs/arguments/args.cc b/src/vt/configs/arguments/args.cc index 2cde3f1710..1a797ab73e 100644 --- a/src/vt/configs/arguments/args.cc +++ b/src/vt/configs/arguments/args.cc @@ -169,6 +169,7 @@ static const std::string vt_lb_statistics_label = "Enabled"; static const std::string vt_lb_statistics_compress_label = "Enable Compression"; static const std::string vt_lb_statistics_file_label = "File"; static const std::string vt_lb_statistics_dir_label = "Directory"; +static const std::string vt_lb_statistics_freq_label = "Frequency"; static const std::string vt_lb_self_migration_label = "Enable Self Migration"; static const std::string vt_lb_spec_label = "Enable Specification"; static const std::string vt_lb_spec_file_label = "Specification File"; @@ -577,6 +578,7 @@ void parseYaml(AppConfig& appConfig, std::string const& inputFile) { update_config(appConfig.vt_lb_statistics_compress, vt_lb_statistics_compress_label, lb_stats); update_config(appConfig.vt_lb_statistics_file, vt_lb_statistics_file_label, lb_stats); update_config(appConfig.vt_lb_statistics_dir, vt_lb_statistics_dir_label, lb_stats); + update_config(appConfig.vt_lb_statistics_freq, vt_lb_statistics_freq_label, lb_stats); // Diagnostics YAML::Node diagnostics = yaml_input["Diagnostics"]; @@ -919,6 +921,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto lb_statistics_comp = "Compress load balancing statistics file with brotli"; auto lb_statistics_file = "Load balancing statistics output file name"; auto lb_statistics_dir = "Load balancing statistics output directory name"; + auto lb_statistics_freq = "Number of phases between load balancing statistics output"; auto lb_self_migration = "Allow load balancer to migrate objects to the same node"; auto lb_spec = "Enable LB spec file (defines which phases output LB data)"; auto lb_spec_file = "File containing LB spec; --vt_lb_spec to enable"; @@ -942,6 +945,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto yy = app.add_flag("--vt_lb_statistics_compress", appConfig.vt_lb_statistics_compress, lb_statistics_comp); auto yz = app.add_option("--vt_lb_statistics_file", appConfig.vt_lb_statistics_file, lb_statistics_file)->capture_default_str(); auto zz = app.add_option("--vt_lb_statistics_dir", appConfig.vt_lb_statistics_dir, lb_statistics_dir)->capture_default_str(); + auto zy = app.add_option("--vt_lb_statistics_freq", appConfig.vt_lb_statistics_freq, lb_statistics_freq); auto lbasm = app.add_flag("--vt_lb_self_migration", appConfig.vt_lb_self_migration, lb_self_migration); auto lbspec = app.add_flag("--vt_lb_spec", appConfig.vt_lb_spec, lb_spec); auto lbspecfile = app.add_option("--vt_lb_spec_file", appConfig.vt_lb_spec_file, lb_spec_file)->capture_default_str()->check(CLI::ExistingFile); @@ -971,6 +975,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { yy->group(debugLB); yz->group(debugLB); zz->group(debugLB); + zy->group(debugLB); lbasm->group(debugLB); lbspec->group(debugLB); lbspecfile->group(debugLB); @@ -1283,6 +1288,7 @@ std::string convertConfigToYamlString(AppConfig& appConfig) { {"Load Balancing/LB Statistics", vt_lb_statistics_compress_label, static_cast(appConfig.vt_lb_statistics_compress)}, {"Load Balancing/LB Statistics", vt_lb_statistics_file_label, static_cast(appConfig.vt_lb_statistics_file)}, {"Load Balancing/LB Statistics", vt_lb_statistics_dir_label, static_cast(appConfig.vt_lb_statistics_dir)}, + {"Load Balancing/LB Statistics", vt_lb_statistics_freq_label, static_cast(appConfig.vt_lb_statistics_freq)}, {"Load Balancing", vt_lb_self_migration_label, static_cast(appConfig.vt_lb_self_migration)}, {"Load Balancing", vt_lb_spec_label, static_cast(appConfig.vt_lb_spec)}, {"Load Balancing", vt_lb_spec_file_label, static_cast(appConfig.vt_lb_spec_file)}, diff --git a/src/vt/runtime/runtime_banner.cc b/src/vt/runtime/runtime_banner.cc index b42cc59c52..f975695ac9 100644 --- a/src/vt/runtime/runtime_banner.cc +++ b/src/vt/runtime/runtime_banner.cc @@ -381,6 +381,11 @@ void Runtime::printStartupBanner() { auto f12 = opt_on("--vt_lb_statistics_file", f11); fmt::print("{}\t{}{}", vt_pre, f12, reset); } + + auto f13 = opt_on_value("--vt_lb_statistics_freq", + std::to_string(getAppConfig()->vt_lb_statistics_freq), + "Phases between LB statistics output"); + fmt::print("{}\t{}{}", vt_pre, f13, reset); } #if !vt_check_enabled(trace_enabled) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index ce20121e22..bb4cd1bdd0 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -658,11 +658,18 @@ void LBManager::stagePostLBStatistics( } void LBManager::commitPhaseStatistics(PhaseType phase) { - // Statistics output when LB is enabled and appropriate flag is enabled - if (theContext()->getNode() != 0 or !theConfig()->vt_lb_statistics) { + // Statistics output when LB is enabled, appropriate flag is enabled, + // and the user-defined frequency is respected + if (theContext()->getNode() != 0 \ + or !theConfig()->vt_lb_statistics \ + or phase % theConfig()->vt_lb_statistics_freq != 0) { return; } + #if vt_check_enabled(trace_enabled) + theTrace()->addUserEventBracketedBegin(1); + #endif + vt_debug_print( terse, lb, "LBManager::outputStatisticsForPhase: phase={}\n", phase @@ -679,6 +686,10 @@ void LBManager::commitPhaseStatistics(PhaseType phase) { auto writer = static_cast(statistics_writer_.get()); writer->stageObject(j); writer->commitStaged(); + + #if vt_check_enabled(trace_enabled) + theTrace()->addUserEventBracketedEnd(1); + #endif } balance::LoadData reduceVec( diff --git a/tests/unit/runtime/test_initialization.cc b/tests/unit/runtime/test_initialization.cc index 132321fe2e..e3d35b7e1f 100644 --- a/tests/unit/runtime/test_initialization.cc +++ b/tests/unit/runtime/test_initialization.cc @@ -457,6 +457,7 @@ TEST_F(TestInitialization, test_initialize_with_yaml) { EXPECT_EQ(theConfig()->vt_lb_statistics_compress, true); EXPECT_EQ(theConfig()->vt_lb_statistics_file, "vt_lb_statistics.%t.json"); EXPECT_EQ(theConfig()->vt_lb_statistics_dir, ""); + EXPECT_EQ(theConfig()->vt_lb_statistics_freq, 100); EXPECT_EQ(theConfig()->vt_lb_self_migration, false); EXPECT_EQ(theConfig()->vt_lb_spec, false); EXPECT_EQ(theConfig()->vt_lb_spec_file, "");