Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2243 allow replay to repeat phases using modulus #2244

Merged
merged 1 commit into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions src/vt/vrt/collection/balance/workload_replay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ namespace vt { namespace vrt { namespace collection {
namespace balance { namespace replay {

void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod
) {
// read in object loads from json files
auto const filename = theConfig()->getLBDataFileIn();
Expand All @@ -67,11 +67,11 @@ void replayWorkloads(
&LBManager::statsHandler
>(theLBManager()->getProxy());

replayWorkloads(initial_phase, phases_to_run, workloads, stats_cb);
replayWorkloads(initial_phase, phases_to_run, phase_mod, workloads, stats_cb);
}

void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run,
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod,
std::shared_ptr<LBDataHolder> workloads,
Callback<std::vector<balance::LoadData>> stats_cb
) {
Expand Down Expand Up @@ -102,6 +102,8 @@ void replayWorkloads(
// simulate the given number of phases
auto stop_phase = initial_phase + phases_to_run;
for (PhaseType phase = initial_phase; phase < stop_phase; phase++) {
PhaseType input_phase = phase_mod == 0 ? phase : phase % phase_mod;

// reapply the base load model if in case we overwrote it on a previous iter
theLBManager()->setLoadModel(base_load_model);

Expand All @@ -113,7 +115,7 @@ void replayWorkloads(

// point the load model at the workloads for the relevant phase
runInEpochCollective("WorkloadReplayDriver -> updateLoads", [=] {
base_load_model->updateLoads(phase);
base_load_model->updateLoads(input_phase);
});

if (theConfig()->vt_debug_replay) {
Expand All @@ -123,7 +125,7 @@ void replayWorkloads(
++count;
vt_debug_print(
normal, replay,
"workload for element {} is here on phase {}\n", workload_id, phase
"workload for element {} is here on input_phase {}\n", workload_id, input_phase
);
}
}
Expand Down Expand Up @@ -161,7 +163,7 @@ void replayWorkloads(
}

if (this_rank == 0) {
vt_print(replay, "Simulating phase {}...\n", phase);
vt_print(replay, "Simulating phase {} using inputs from phase {}...\n", phase, input_phase);
}

if (theConfig()->vt_debug_replay) {
Expand Down Expand Up @@ -227,12 +229,19 @@ void replayWorkloads(
auto cb = theCB()->makeFunc<ReassignmentMsg>(
vt::pipe::LifetimeEnum::Once, postLBWork
);
theLBManager()->selectStartLB(phase, cb);
auto lb = theLBManager()->decideLBToRun(phase, true);
auto const start_time = timing::getCurrentTime();
theLBManager()->startLB(input_phase, lb, cb);
auto const total_time = timing::getCurrentTime() - start_time;
if (lb != LBType::NoLB) {
vt_print(replay, "Time in load balancer: {}\n", total_time);
}
});
runInEpochCollective("WorkloadReplayDriver -> destroyLB", [&] {
theLBManager()->destroyLB();
});
auto last_phase_info = theLBManager()->getPhaseInfo();
last_phase_info->phase = phase;
thePhase()->printSummary(last_phase_info);
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/vt/vrt/collection/balance/workload_replay.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ namespace balance { namespace replay {
* object exists during any given phase.
*/
void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod
);

/**
Expand All @@ -92,7 +92,7 @@ void replayWorkloads(
* same rank as the object exists during any given phase.
*/
void replayWorkloads(
PhaseType initial_phase, PhaseType phases_to_run,
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod,
std::shared_ptr<LBDataHolder> workloads,
Callback<std::vector<balance::LoadData>> stats_cb
);
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/collection/test_workload_data_migrator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -878,7 +878,7 @@ TEST_F(TestWorkloadReplay, test_run_replay_verify_some_stats) {

// then replay them but allow the lb to place objects differently
vt::vrt::collection::balance::replay::replayWorkloads(
initial_phase, num_phases, lbdh, stats_cb
initial_phase, num_phases, 0, lbdh, stats_cb
);
}

Expand Down
17 changes: 12 additions & 5 deletions tools/workload_replay/simulate_replay.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,28 @@ int main(int argc, char** argv) {
vt::initialize(argc, argv);

vtAbortIf(
argc != 3,
"Must have two app-specific arguments: <initial phase> <phases to run>\n"
argc < 3 or argc > 4,
"Must have two or three app-specific arguments:\n"
" <initial phase> <phases to run> [phase modulus]\n"
"The json workload files needs to be specified using\n"
"--vt_lb_data_file_in and --vt_lb_data_dir_in"
" --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in"
);

// initial phase to simulate
PhaseType initial_phase = atoi(argv[1]);
// number of phases to simulate
PhaseType phases_to_run = atoi(argv[2]);
// phase modulus to apply to input
PhaseType phase_mod = 0;

if (argc > 3) {
phase_mod = atoi(argv[3]);
}

// the workloads used will be those specified with the command-line arguments
// --vt_lb_data_file_in and --vt_lb_data_dir_in
// --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in
vt::vrt::collection::balance::replay::replayWorkloads(
initial_phase, phases_to_run
initial_phase, phases_to_run, phase_mod
);

vt::finalize();
Expand Down
Loading