From 1477d502faeea366a7c7020c4863ba23b4bc9381 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Tue, 14 Jun 2022 11:22:49 -0700 Subject: [PATCH 1/3] #1850: allow setting dir where lb statistics are dumped --- src/vt/collective/collective_ops.cc | 1 + src/vt/configs/arguments/app_config.h | 2 ++ src/vt/configs/arguments/args.cc | 8 ++++++++ src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 7 +++++++ src/vt/vrt/collection/balance/lb_invoke/lb_manager.h | 5 ++++- 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/vt/collective/collective_ops.cc b/src/vt/collective/collective_ops.cc index ff550f8acb..f24c680ab3 100644 --- a/src/vt/collective/collective_ops.cc +++ b/src/vt/collective/collective_ops.cc @@ -140,6 +140,7 @@ void printOverwrittens( printIfOverwritten(vt_lb_statistics); printIfOverwritten(vt_lb_statistics_compress); printIfOverwritten(vt_lb_statistics_file); + printIfOverwritten(vt_lb_statistics_dir); printIfOverwritten(vt_lb_self_migration); printIfOverwritten(vt_help_lb_args); printIfOverwritten(vt_no_detect_hang); diff --git a/src/vt/configs/arguments/app_config.h b/src/vt/configs/arguments/app_config.h index 7aef235612..29d99cf591 100644 --- a/src/vt/configs/arguments/app_config.h +++ b/src/vt/configs/arguments/app_config.h @@ -152,6 +152,7 @@ struct AppConfig { bool vt_lb_statistics = true; bool vt_lb_statistics_compress = true; std::string vt_lb_statistics_file = "vt_lb_statistics.%t.json"; + std::string vt_lb_statistics_dir = ""; bool vt_help_lb_args = false; bool vt_lb_self_migration = false; @@ -322,6 +323,7 @@ struct AppConfig { | vt_lb_statistics | vt_lb_statistics_compress | vt_lb_statistics_file + | vt_lb_statistics_dir | vt_help_lb_args | vt_lb_self_migration diff --git a/src/vt/configs/arguments/args.cc b/src/vt/configs/arguments/args.cc index d3d5551f13..c3b2ded168 100644 --- a/src/vt/configs/arguments/args.cc +++ b/src/vt/configs/arguments/args.cc @@ -482,6 +482,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto lb_statistics = "Dump load balancing statistics to file"; auto lb_statistics_comp = "Compress load balancing statistics file with brotli"; auto lb_statistics_file = "Load balancing statistics output file name"; + auto lb_statistics_dir = "Load balancing statistics output directory name"; auto lb_self_migration = "Allow load balancer to migrate objects to the same node"; auto lbn = "NoLB"; auto lbi = 1; @@ -490,6 +491,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto lbs = "data"; auto lba = ""; auto lbq = "vt_lb_statistics.%t.json"; + auto lbqq = ""; auto s = app.add_flag("--vt_lb", appConfig.vt_lb, lb); auto t1 = app.add_flag("--vt_lb_quiet", appConfig.vt_lb_quiet, lb_quiet); auto u = app.add_option("--vt_lb_file_name", appConfig.vt_lb_file_name, lb_file_name, lbf)->check(CLI::ExistingFile); @@ -507,6 +509,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { auto yx = app.add_flag("--vt_lb_statistics", appConfig.vt_lb_statistics, lb_statistics); auto yy = app.add_flag("--vt_lb_statistics_compress", appConfig.vt_lb_statistics_compress, lb_statistics_comp); auto yz = app.add_option("--vt_lb_statistics_file", appConfig.vt_lb_statistics_file, lb_statistics_file,lbq); + auto zz = app.add_option("--vt_lb_statistics_dir", appConfig.vt_lb_statistics_dir, lb_statistics_dir,lbqq); auto lbasm = app.add_flag("--vt_lb_self_migration", appConfig.vt_lb_self_migration, lb_self_migration); auto debugLB = "Load Balancing"; @@ -527,6 +530,7 @@ void addLbArgs(CLI::App& app, AppConfig& appConfig) { yx->group(debugLB); yy->group(debugLB); yz->group(debugLB); + zz->group(debugLB); lbasm->group(debugLB); // help options deliberately omitted from the debugLB group above so that @@ -836,6 +840,10 @@ std::string AppConfig::getLBDataFileIn() const { std::string AppConfig::getLBStatisticsFile() const { std::string name = vt_lb_statistics_file; + std::string dir = vt_lb_statistics_dir; + if (dir.size() > 0) { + name = dir + "/" + name; + } std::size_t timestamp = name.find("%t"); if (timestamp != std::string::npos) { std::time_t t = std::time(nullptr); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 597a9e8c18..b2a179037c 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -717,6 +717,13 @@ void LBManager::createStatisticsFile() { "LBManager::createStatsFile: file={}\n", file_name ); + auto const dir = theConfig()->vt_lb_statistics_dir; + // Node 0 creates the directory + if (not created_lbstats_dir_ and theContext()->getNode() == 0) { + mkdir(dir.c_str(), S_IRWXU); + created_lbstats_dir_ = true; + } + using JSONAppender = util::json::Appender; if (not statistics_writer_) { diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h index 2ff1df80a2..03eb63c751 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.h @@ -212,7 +212,8 @@ struct LBManager : runtime::component::Component { | base_model_ | model_ | lb_instances_ - | stats; + | stats + | created_lbstats_dir_; } void stagePreLBStatistics(const StatisticMapType &statistics); @@ -291,6 +292,8 @@ struct LBManager : runtime::component::Component { bool before_lb_stats_ = true; /// The appender for outputting statistics in JSON format std::unique_ptr statistics_writer_ = nullptr; + /// Whether the LB statistics directory has been created + bool created_lbstats_dir_ = false; }; void makeGraphSymmetric( From e61221850dbd51272abcb99a10e9dab00fed364d Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Wed, 15 Jun 2022 11:46:49 -0700 Subject: [PATCH 2/3] #1850: check for no directory specified --- src/vt/configs/arguments/args.cc | 2 +- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/vt/configs/arguments/args.cc b/src/vt/configs/arguments/args.cc index c3b2ded168..0126b08a97 100644 --- a/src/vt/configs/arguments/args.cc +++ b/src/vt/configs/arguments/args.cc @@ -841,7 +841,7 @@ std::string AppConfig::getLBDataFileIn() const { std::string AppConfig::getLBStatisticsFile() const { std::string name = vt_lb_statistics_file; std::string dir = vt_lb_statistics_dir; - if (dir.size() > 0) { + if (not dir.empty()) { name = dir + "/" + name; } std::size_t timestamp = name.find("%t"); diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index b2a179037c..0ffc454090 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -719,7 +719,10 @@ void LBManager::createStatisticsFile() { auto const dir = theConfig()->vt_lb_statistics_dir; // Node 0 creates the directory - if (not created_lbstats_dir_ and theContext()->getNode() == 0) { + if ( + theContext()->getNode() == 0 and + not dir.empty() and not created_lbstats_dir_ + ) { mkdir(dir.c_str(), S_IRWXU); created_lbstats_dir_ = true; } From 5e5a6ca4a21a7c48c54d5ee0c54a7b6e11da4079 Mon Sep 17 00:00:00 2001 From: Nicole Lemaster Slattengren Date: Wed, 15 Jun 2022 11:47:08 -0700 Subject: [PATCH 3/3] #1850: check result of mkdir operation --- src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc | 5 ++++- src/vt/vrt/collection/balance/node_lb_data.cc | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc index 0ffc454090..752d81d4a2 100644 --- a/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc +++ b/src/vt/vrt/collection/balance/lb_invoke/lb_manager.cc @@ -723,7 +723,10 @@ void LBManager::createStatisticsFile() { theContext()->getNode() == 0 and not dir.empty() and not created_lbstats_dir_ ) { - mkdir(dir.c_str(), S_IRWXU); + int flag = mkdir(dir.c_str(), S_IRWXU); + if (flag < 0 && errno != EEXIST) { + throw std::runtime_error("Failed to create directory: " + dir); + } created_lbstats_dir_ = true; } diff --git a/src/vt/vrt/collection/balance/node_lb_data.cc b/src/vt/vrt/collection/balance/node_lb_data.cc index b7c2cb5746..64d872be3c 100644 --- a/src/vt/vrt/collection/balance/node_lb_data.cc +++ b/src/vt/vrt/collection/balance/node_lb_data.cc @@ -155,7 +155,10 @@ void NodeLBData::createLBDataFile() { auto const dir = theConfig()->vt_lb_data_dir; // Node 0 creates the directory if (not created_dir_ and theContext()->getNode() == 0) { - mkdir(dir.c_str(), S_IRWXU); + int flag = mkdir(dir.c_str(), S_IRWXU); + if (flag < 0 && errno != EEXIST) { + throw std::runtime_error("Failed to create directory: " + dir); + } created_dir_ = true; }