Skip to content

Commit

Permalink
Merge pull request #281 from darma-mpi-backend/feature-280-lb-sim-out…
Browse files Browse the repository at this point in the history
…put-only
  • Loading branch information
lifflander authored Feb 27, 2019
2 parents 10bd87f + ea1e42a commit 82fb410
Show file tree
Hide file tree
Showing 12 changed files with 191 additions and 23 deletions.
34 changes: 24 additions & 10 deletions src/vt/configs/arguments/args.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ namespace vt { namespace arguments {
/*static*/ std::string ArgConfig::vt_lb_file_name = "balance.in";
/*static*/ std::string ArgConfig::vt_lb_name = "NoLB";
/*static*/ int32_t ArgConfig::vt_lb_interval = 1;
/*static*/ bool ArgConfig::vt_lb_stats = false;
/*static*/ std::string ArgConfig::vt_lb_stats_dir = "vt_lb_stats";
/*static*/ std::string ArgConfig::vt_lb_stats_file = "stats";

/*static*/ bool ArgConfig::vt_no_detect_hang = false;
/*static*/ int64_t ArgConfig::vt_hang_freq = 1024;
Expand Down Expand Up @@ -324,25 +327,36 @@ namespace vt { namespace arguments {
* Flags for enabling load balancing and configuring it
*/

auto lb = "Enable load balancing";
auto lb_file = "Enable reading LB configuration from file";
auto lb_file_name = "LB configuration file to read";
auto lb_name = "Name of the load balancer to use";
auto lb_interval = "Load balancing interval";
auto lb = "Enable load balancing";
auto lb_file = "Enable reading LB configuration from file";
auto lb_file_name = "LB configuration file to read";
auto lb_name = "Name of the load balancer to use";
auto lb_interval = "Load balancing interval";
auto lb_stats = "Enable load balancing statistics";
auto lb_stats_dir = "Load balancing statistics output directory";
auto lb_stats_file = "Load balancing statistics output file name";
auto lbn = "NoLB";
auto lbi = 1;
auto lbf = "balance.in";
auto s = app.add_flag("--vt_lb", vt_lb, lb);
auto t = app.add_flag("--vt_lb_file", vt_lb_file, lb_file);
auto u = app.add_option("--vt_lb_file_name", vt_lb_file_name, lb_file_name, lbf);
auto v = app.add_option("--vt_lb_name", vt_lb_name, lb_name, lbn);
auto w = app.add_option("--vt_lb_interval", vt_lb_interval, lb_interval, lbi);
auto lbd = "vt_lb_stats";
auto lbs = "stats";
auto s = app.add_flag("--vt_lb", vt_lb, lb);
auto t = app.add_flag("--vt_lb_file", vt_lb_file, lb_file);
auto u = app.add_option("--vt_lb_file_name", vt_lb_file_name, lb_file_name, lbf);
auto v = app.add_option("--vt_lb_name", vt_lb_name, lb_name, lbn);
auto w = app.add_option("--vt_lb_interval", vt_lb_interval, lb_interval, lbi);
auto ww = app.add_flag("--vt_lb_stats", vt_lb_stats, lb_stats);
auto wx = app.add_option("--vt_lb_stats_dir", vt_lb_stats_dir, lb_stats_dir, lbd);
auto wy = app.add_option("--vt_lb_stats_file", vt_lb_stats_file, lb_stats_file,lbs);
auto debugLB = "Load Balancing";
s->group(debugLB);
t->group(debugLB);
u->group(debugLB);
v->group(debugLB);
w->group(debugLB);
ww->group(debugLB);
wx->group(debugLB);
wy->group(debugLB);

/*
* Flags for controlling termination
Expand Down
3 changes: 3 additions & 0 deletions src/vt/configs/arguments/args.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ struct ArgConfig {
static std::string vt_lb_file_name;
static std::string vt_lb_name;
static int32_t vt_lb_interval;
static bool vt_lb_stats;
static std::string vt_lb_stats_dir;
static std::string vt_lb_stats_file;

static bool vt_no_detect_hang;
static int64_t vt_hang_freq;
Expand Down
24 changes: 24 additions & 0 deletions src/vt/runtime/runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,10 @@ void Runtime::printStartupBanner() {
auto f9 = warn_cr("--vt_lb", "lblite");
fmt::print("{}\t{}{}", vt_pre, f9, reset);
}
if (ArgType::vt_lb_stats) {
auto f9 = warn_cr("--vt_lb_stats", "lblite");
fmt::print("{}\t{}{}", vt_pre, f9, reset);
}
#endif

if (ArgType::vt_lb) {
Expand All @@ -390,6 +394,26 @@ void Runtime::printStartupBanner() {
}
}

if (ArgType::vt_lb_stats) {
auto f9 = opt_on("--vt_lb_stats", "Load balancing statistics collection");
fmt::print("{}\t{}{}", vt_pre, f9, reset);

auto const fname = ArgType::vt_lb_stats_file;
if (fname != "") {
auto f11 = fmt::format("LB stats file name \"{}.0.out\"", fname);
auto f12 = opt_on("--vt_lb_stats_file", f11);
fmt::print("{}\t{}{}", vt_pre, f12, reset);
}

auto const fdir = ArgType::vt_lb_stats_dir;
if (fdir != "") {
auto f11 = fmt::format("LB stats directory \"{}\"", fdir);
auto f12 = opt_on("--vt_lb_stats_dir", f11);
fmt::print("{}\t{}{}", vt_pre, f12, reset);
}
}


#if !backend_check_enabled(trace_enabled)
if (ArgType::vt_trace) {
auto f9 = warn_cr("--vt_trace", "trace_enabled");
Expand Down
2 changes: 2 additions & 0 deletions src/vt/runtime/runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ struct Runtime {

RuntimeInstType getInstanceID() const { return instance_; }

void systemSync() { sync(); }

private:
RuntimeInstType const instance_;

Expand Down
67 changes: 67 additions & 0 deletions src/vt/vrt/collection/balance/proc_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,16 @@
#include "vt/vrt/collection/balance/proc_stats.h"
#include "vt/vrt/collection/manager.h"
#include "vt/timing/timing.h"
#include "vt/configs/arguments/args.h"
#include "vt/runtime/runtime.h"

#include <vector>
#include <unordered_map>
#include <string>
#include <cstdio>
#include <unistd.h>

#include <fmt/format.h>

namespace vt { namespace vrt { namespace collection { namespace balance {

Expand All @@ -64,6 +70,10 @@ std::unordered_map<ProcStats::ElementIDType,ProcStats::MigrateFnType>

/*static*/ ProcStats::ElementIDType ProcStats::next_elm_ = 1;

/*static*/ FILE* ProcStats::stats_file_ = nullptr;

/*static*/ bool ProcStats::created_dir_ = false;

/*static*/ void ProcStats::clearStats() {
ProcStats::proc_data_.clear();
ProcStats::proc_migrate_.clear();
Expand All @@ -83,4 +93,61 @@ std::unordered_map<ProcStats::ElementIDType,ProcStats::MigrateFnType>
CollectionManager::releaseLBPhase(msg.get());
}

/*static*/ void ProcStats::createStatsFile() {
using ArgType = vt::arguments::ArgConfig;
auto const node = theContext()->getNode();
auto const base_file = std::string(ArgType::vt_lb_stats_file);
auto const dir = std::string(ArgType::vt_lb_stats_dir);
auto const file = fmt::format("{}.{}.out", base_file, node);
auto const file_name = fmt::format("{}/{}", dir, file);

debug_print(
vrt_coll, node,
"ProcStats: createStatsFile file={}\n", file_name
);

// Node 0 creates the directory
if (not created_dir_ and node == 0) {
mkdir(dir.c_str(), S_IRWXU);
created_dir_ = true;
}

// Barrier: wait for node 0 to create directory before trying to put a file in
// the stats destination directory
if (curRT) {
curRT->systemSync();
} else {
// Something is wrong
vtAssert(false, "Trying to dump stats when VT runtime is deallocated?");
}

stats_file_ = fopen(file_name.c_str(), "w+");
}

/*static*/ void ProcStats::closeStatsFile() {
if (stats_file_) {
fclose(stats_file_);
stats_file_ = nullptr;
}
}

/*static*/ void ProcStats::outputStatsFile() {
if (stats_file_ == nullptr) {
createStatsFile();
}

vtAssertExpr(stats_file_ != nullptr);

auto const num_iters = ProcStats::proc_data_.size();
for (auto i = 0; i < num_iters; i++) {
for (auto&& elm : ProcStats::proc_data_.at(i)) {
auto obj_str = fmt::format("{},{},{}\n", i, elm.first, elm.second);
fprintf(stats_file_, "%s", obj_str.c_str());
}
}
fflush(stats_file_);

closeStatsFile();
}

}}}} /* end namespace vt::vrt::collection::balance */
10 changes: 10 additions & 0 deletions src/vt/vrt/collection/balance/proc_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
#include <unordered_map>
#include <tuple>
#include <functional>
#include <cstdio>
#include <cstdlib>

namespace vt { namespace vrt { namespace collection { namespace balance {
Expand All @@ -72,6 +73,12 @@ struct ProcStats {
static void clearStats();
static void releaseLB();

static void outputStatsFile();

private:
static void createStatsFile();
static void closeStatsFile();

private:
static ElementIDType getNextElm();

Expand All @@ -81,6 +88,9 @@ struct ProcStats {
public:
static std::vector<std::unordered_map<ElementIDType,TimeType>> proc_data_;
static std::unordered_map<ElementIDType,MigrateFnType> proc_migrate_;
private:
static FILE* stats_file_;
static bool created_dir_;
};

}}}} /* end namespace vt::vrt::collection::balance */
Expand Down
24 changes: 15 additions & 9 deletions src/vt/vrt/collection/balance/proc_stats.impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,12 @@ template <typename ColT>
VirtualElmProxyType<ColT> const& elm_proxy, ColT* col_elm,
PhaseType const& phase, TimeType const& time
) {
auto const& next_elm = ProcStats::getNextElm();
// Assign a new element ID if it's the first time this runs
if (col_elm->stats_elm_id_ == 0) {
col_elm->stats_elm_id_ = ProcStats::getNextElm();
}

auto const next_elm = col_elm->stats_elm_id_;

debug_print(
vrt_coll, node,
Expand All @@ -77,14 +82,15 @@ template <typename ColT>
std::forward_as_tuple(time)
);
auto migrate_iter = proc_migrate_.find(next_elm);
vtAssert(migrate_iter == proc_migrate_.end(), "Migrate func must not exist");
proc_migrate_.emplace(
std::piecewise_construct,
std::forward_as_tuple(next_elm),
std::forward_as_tuple([elm_proxy,col_elm](NodeType node){
col_elm->migrate(node);
})
);
if (migrate_iter == proc_migrate_.end()) {
proc_migrate_.emplace(
std::piecewise_construct,
std::forward_as_tuple(next_elm),
std::forward_as_tuple([elm_proxy,col_elm](NodeType node){
col_elm->migrate(node);
})
);
}
return next_elm;
}

Expand Down
4 changes: 4 additions & 0 deletions src/vt/vrt/collection/holders/holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ struct Holder {
void setGroupReady(bool const ready) { group_ready_ = ready; }
NodeType groupRoot() const { return group_root_; }
void setGroupRoot(NodeType const root) { group_root_ = root; }
CountType numReady() const { return elements_ready_; }
void addReady(CountType num = 1) { elements_ready_ += 1; }
void clearReady() { elements_ready_ = 0; }

friend struct CollectionManager;

Expand All @@ -106,6 +109,7 @@ struct Holder {
bool group_ready_ = false;
NodeType group_root_ = 0;
CountType num_erased_not_removed_ = 0;
CountType elements_ready_ = 0;
};

}}} /* end namespace vt::vrt::collection */
Expand Down
35 changes: 31 additions & 4 deletions src/vt/vrt/collection/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@
#include "vt/collective/collective_alg.h"
#include "vt/collective/reduce/reduce_msg.h"
#include "vt/collective/reduce/reduce_hash.h"
#include "vt/configs/arguments/args.h"
#include "vt/vrt/collection/balance/proc_stats.h"

#include <memory>
#include <vector>
Expand Down Expand Up @@ -113,6 +115,7 @@ struct CollectionManager {
using CleanupListFnType = std::list<CleanupFnType>;
using DispatchHandlerType = auto_registry::AutoHandlerType;
using ActionVecType = std::vector<ActionType>;
using ArgType = vt::arguments::ArgConfig;

template <typename ColT, typename IndexT = typename ColT::IndexType>
using DistribConstructFn = std::function<VirtualPtrType<ColT>(IndexT idx)>;
Expand All @@ -133,7 +136,19 @@ struct CollectionManager {

CollectionManager() = default;

virtual ~CollectionManager() { cleanupAll<>(); }
virtual ~CollectionManager() {
cleanupAll<>();

// Statistics output when LB is enabled and appropriate flag is enabled
backend_enable_if(
lblite, {
if (ArgType::vt_lb_stats) {
balance::ProcStats::outputStatsFile();
balance::ProcStats::clearStats();
}
}
);
}

template <typename=void>
void cleanupAll();
Expand Down Expand Up @@ -604,7 +619,15 @@ struct CollectionManager {
);

/*
* LB-related operations on the collection
* ======================================================================
* LB-related operations on the collection
* ======================================================================
*/

/*
* The `nextPhase` function is called by a single node on the whole collection
* to indicate that LB is ready. This includes all collections and thus may
* require extra sync to invoke safely
*/
template <typename ColT>
void nextPhase(
Expand Down Expand Up @@ -643,11 +666,15 @@ struct CollectionManager {
void checkReduceNoElements();

private:
template <typename ColT, typename IndexT>
template <typename ColT, typename IndexT = typename ColT::IndexType>
CollectionHolder<ColT, IndexT>* findColHolder(VirtualProxyType const& proxy);
template <typename ColT, typename IndexT>

template <typename ColT, typename IndexT = typename ColT::IndexType>
Holder<ColT, IndexT>* findElmHolder(VirtualProxyType const& proxy);

template <typename ColT, typename IndexT = typename ColT::IndexType>
Holder<ColT, IndexT>* findElmHolder(CollectionProxyWrapType<ColT> proxy);

public:
template <typename ColT, typename IndexT>
void destroy(CollectionProxyWrapType<ColT,IndexT> const& proxy);
Expand Down
7 changes: 7 additions & 0 deletions src/vt/vrt/collection/manager.impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2679,6 +2679,13 @@ Holder<ColT, IndexT>* CollectionManager::findElmHolder(
}
}

template <typename ColT, typename IndexT>
Holder<ColT, IndexT>* CollectionManager::findElmHolder(
CollectionProxyWrapType<ColT> proxy
) {
return findElmHolder<ColT,IndexT>(proxy.getProxy());
}

template <typename>
std::size_t CollectionManager::numCollections() {
return UniversalIndexHolder<>::getNumCollections();
Expand Down
3 changes: 3 additions & 0 deletions src/vt/vrt/collection/types/migratable.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "vt/vrt/collection/types/migrate_hooks.h"
#include "vt/vrt/collection/types/migratable.fwd.h"
#include "vt/vrt/collection/balance/elm_stats.h"
#include "vt/vrt/collection/balance/proc_stats.h"

namespace vt { namespace vrt { namespace collection {

Expand Down Expand Up @@ -99,8 +100,10 @@ struct Migratable : MigrateHookBase {

protected:
friend struct balance::ElementStats;
friend struct balance::ProcStats;
balance::ElementStats stats_;
balance::ElementStats& getStats() { return stats_; }
balance::ProcStats::ElementIDType stats_elm_id_ = 0;
};

}}} /* end namespace vt::vrt::collection */
Expand Down
Loading

0 comments on commit 82fb410

Please sign in to comment.