Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1938 remove unneeded calls to mpi wtime in trace and lb #1998

Merged
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
9b0f56f
#1938: pass time as an argument to runnable context LBData
stmcgovern Jan 10, 2023
664304a
#1938: pass time as argument to start stop in ElementLBData
stmcgovern Jan 10, 2023
b90b118
#1938: fix serializer list - add special_progress_
stmcgovern Jan 10, 2023
bbe8629
#1938: pass time as argument in RunnableNew member functions
stmcgovern Jan 10, 2023
d55db73
#1938: add recent_time_ and get function to scheduler
stmcgovern Jan 10, 2023
09af68a
#1938: use a flag to decide when to update recent time
stmcgovern Jan 11, 2023
29db02e
#1938: use recent time in runProgress
stmcgovern Jan 11, 2023
87baa47
#1938: pass start_time down in RunnableNew::run and end_time
stmcgovern Jan 11, 2023
70fd5dc
#1938: set recent time to stale when work is done by a lambda
stmcgovern Jan 11, 2023
24d6ec1
#1938: adjust whitespace
stmcgovern Jan 11, 2023
a76fd5f
#1938: change trace's beginProcessing interface and uses
stmcgovern Nov 15, 2022
164f013
#1938: change interface and calls to endProcessing
stmcgovern Jan 31, 2023
1f75321
#1938: adjust runnable context for trace to use passed in time
stmcgovern Jan 31, 2023
1585b13
#1938: runnable: add interface for testing to get the underlying Runn…
lifflander Feb 9, 2023
9d78f6f
#1938: tests: write test for LB data timings
lifflander Feb 9, 2023
e343c02
#1938: remove whitespace
stmcgovern Feb 14, 2023
81249b2
#1938: runLambda uses time conservatively
stmcgovern Feb 14, 2023
44f3132
#1938: add another test for cumulative elapsed time - values will nee…
stmcgovern Feb 14, 2023
af8b217
#1938: fix license
stmcgovern Feb 14, 2023
a4ea3a8
#1938: tests: set comparison margin and clean up test
stmcgovern Feb 14, 2023
d46c6d1
#1938: add lb vt_check and loosen requirements for tests
stmcgovern Feb 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions src/vt/context/runnable_context/lb_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,30 +46,32 @@

namespace vt { namespace ctx {

void LBData::start() {
void LBData::start(TimeType time) {
// record start time
if (should_instrument_) {
lb_data_->startTime();
lb_data_->start(time);
}
}

void LBData::finish() {
void LBData::finish(TimeType time) {
// record end time
if (should_instrument_) {
lb_data_->stopTime();
lb_data_->stop(time);
}
}

void LBData::send(elm::ElementIDStruct dest, MsgSizeType bytes) {
lb_data_->sendToEntity(dest, cur_elm_id_, bytes);
if (should_instrument_) {
lb_data_->sendToEntity(dest, cur_elm_id_, bytes);
}
}

void LBData::suspend() {
finish();
void LBData::suspend(TimeType time) {
finish(time);
}

void LBData::resume() {
start();
void LBData::resume(TimeType time) {
start(time);
}

typename LBData::ElementIDStruct const& LBData::getCurrentElementID() const {
Expand Down
13 changes: 9 additions & 4 deletions src/vt/context/runnable_context/lb_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,20 @@ struct LBData {
should_instrument_(true)
{ }

/**
* \brief Return whether time is required
*/
bool needsTime() const { return should_instrument_; }

/**
* \brief Set the context and timing for a collection task
*/
void start();
void start(TimeType time);

/**
* \brief Remove the context and store timing for a collection task
*/
void finish();
void finish(TimeType time);

/**
* \brief Record LB data whenever a message is sent and a collection
Expand All @@ -100,8 +105,8 @@ struct LBData {
*/
void send(elm::ElementIDStruct dest, MsgSizeType bytes);

void suspend();
void resume();
void suspend(TimeType time);
void resume(TimeType time);

/**
* \brief Get the current element ID struct for the running context
Expand Down
18 changes: 9 additions & 9 deletions src/vt/context/runnable_context/trace.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@

namespace vt { namespace ctx {

void Trace::start() {
void Trace::start(TimeType time) {
if (not is_traced_) {
return;
}
Expand All @@ -67,30 +67,30 @@ void Trace::start() {
from_node_ != uninitialized_destination ? from_node_ : cur_node;

processing_tag_ = theTrace()->beginProcessing(
trace_id, msg_size_, event_, from_node, idx1_, idx2_, idx3_, idx4_
trace_id, msg_size_, event_, from_node, idx1_, idx2_, idx3_, idx4_, time
);
} else {
processing_tag_ = theTrace()->beginProcessing(
trace_id, msg_size_, event_, from_node_
trace_id, msg_size_, event_, from_node_, time
);
}
}

void Trace::finish() {
void Trace::finish(TimeType time) {
if (not is_traced_) {
return;
}

theTrace()->endProcessing(processing_tag_);
theTrace()->endProcessing(processing_tag_, time);
}

void Trace::suspend() {
finish();
void Trace::suspend(TimeType time) {
finish(time);
}

void Trace::resume() {
void Trace::resume(TimeType time) {
// @todo: connect up the last event to this new one after suspension
start();
start(time);
}

}} /* end namespace vt::ctx */
Expand Down
8 changes: 4 additions & 4 deletions src/vt/context/runnable_context/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ struct Trace {
*/
trace::TraceEventIDType getEvent() const { return event_; }

void start();
void finish();
void suspend();
void resume();
void start(TimeType time);
void finish(TimeType time);
void suspend(TimeType time);
void resume(TimeType time);

private:
/// Whether it's a collection or not
Expand Down
8 changes: 4 additions & 4 deletions src/vt/elm/elm_lb_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@

namespace vt { namespace elm {

void ElementLBData::startTime() {
TimeTypeWrapper const start_time = timing::getCurrentTime();
void ElementLBData::start(TimeType time) {
TimeTypeWrapper const start_time = time;
cur_time_ = start_time.seconds();
cur_time_started_ = true;

Expand All @@ -62,8 +62,8 @@ void ElementLBData::startTime() {
);
}

void ElementLBData::stopTime() {
TimeTypeWrapper const stop_time = timing::getCurrentTime();
void ElementLBData::stop(TimeType time) {
TimeTypeWrapper const stop_time = time;
TimeTypeWrapper const total_time = stop_time.seconds() - cur_time_;
//vtAssert(cur_time_started_, "Must have started time");
auto const started = cur_time_started_;
Expand Down
4 changes: 2 additions & 2 deletions src/vt/elm/elm_lb_data.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ struct ElementLBData {
ElementLBData(ElementLBData const&) = default;
ElementLBData(ElementLBData&&) = default;

void startTime();
void stopTime();
void start(TimeType time);
void stop(TimeType time);
void addTime(TimeTypeWrapper const& time);

void sendToEntity(ElementIDStruct to, ElementIDStruct from, double bytes);
Expand Down
4 changes: 2 additions & 2 deletions src/vt/parameterization/parameterization.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ static void dataMessageHandler(DataMsg<Tuple>* msg) {
NodeType const& from_node = theContext()->getFromNodeCurrentTask();

processing_tag =
theTrace()->beginProcessing(ep, msg_size, event, from_node);
theTrace()->beginProcessing(ep, msg_size, event, from_node, timing::getCurrentTime());
PhilMiller marked this conversation as resolved.
Show resolved Hide resolved
}
#endif

Expand All @@ -121,7 +121,7 @@ static void dataMessageHandler(DataMsg<Tuple>* msg) {
}

#if vt_check_enabled(trace_enabled)
theTrace()->endProcessing(processing_tag);
theTrace()->endProcessing(processing_tag, timing::getCurrentTime());
#endif
}

Expand Down
8 changes: 4 additions & 4 deletions src/vt/rdma/state/rdma_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ void State::getData(
size_t msg_size = info.num_bytes;

processing_tag =
theTrace()->beginProcessing(trace_id, msg_size, event, from_node);
theTrace()->beginProcessing(trace_id, msg_size, event, from_node, timing::getCurrentTime());
}
#endif

Expand All @@ -280,7 +280,7 @@ void State::getData(
}

#if vt_check_enabled(trace_enabled)
theTrace()->endProcessing(processing_tag);
theTrace()->endProcessing(processing_tag, timing::getCurrentTime());
#endif
} else {
pending_tag_gets[tag].push_back(info);
Expand Down Expand Up @@ -328,7 +328,7 @@ void State::putData(
size_t msg_size = info.num_bytes;

processing_tag =
theTrace()->beginProcessing(trace_id, msg_size, event, from_node);
theTrace()->beginProcessing(trace_id, msg_size, event, from_node, timing::getCurrentTime());
}
#endif

Expand All @@ -346,7 +346,7 @@ void State::putData(
}

#if vt_check_enabled(trace_enabled)
theTrace()->endProcessing(processing_tag);
theTrace()->endProcessing(processing_tag, timing::getCurrentTime());
#endif
} else {
pending_tag_puts[tag].push_back(info);
Expand Down
4 changes: 2 additions & 2 deletions src/vt/runnable/invoke.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,12 @@ static trace::TraceProcessingTag BeginProcessingInvokeEvent() {
const auto trace_event = theTrace()->messageCreation(trace_id, 0);
const auto from_node = theContext()->getNode();

return theTrace()->beginProcessing(trace_id, 0, trace_event, from_node);
return theTrace()->beginProcessing(trace_id, 0, trace_event, from_node, timing::getCurrentTime());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may actually illustrate a case for why I like the 'pass the time as an argument to contexts' approach better - if we want to add LB attribution for methods called through invoke, we would want to share this timer call for the start time, rather than have it reference the scheduler's recorded time, or need another new interface for the divergent case.

}

template <typename Callable, Callable f, typename... Args>
static void EndProcessingInvokeEvent(trace::TraceProcessingTag processing_tag) {
theTrace()->endProcessing(processing_tag);
theTrace()->endProcessing(processing_tag, timing::getCurrentTime());

const auto trace_id = CallableWrapper<Callable, f>::GetTraceID();
theTrace()->messageCreation(trace_id, 0);
Expand Down
9 changes: 9 additions & 0 deletions src/vt/runnable/make_runnable.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,15 @@ struct RunnableMaker {
*/
void enqueue();

/**
* \brief Return the underlying \c RunnableNew
*
* \warning This is for testing only
*
* \return the underlying runnable
*/
RunnableNew* getRunnableImpl() { return impl_; }

private:
/**
* \internal \brief Setup for running or enqueuing
Expand Down
56 changes: 35 additions & 21 deletions src/vt/runnable/runnable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,25 @@ void RunnableNew::run() {
);
#endif

bool needs_time = false;
#if vt_check_enabled(trace_enabled)
if (contexts_.has_trace) needs_time = true;
else
#endif
if (contexts_.has_lb)
{
needs_time = contexts_.lb.needsTime();
}
TimeType start_time = needs_time ? theSched()->getRecentTime() : NAN;

#if vt_check_enabled(fcontext)
if (suspended_) {
resume();
} else {
start();
}
#else
start();
resume(start_time);
} else
#endif
{
start(start_time);
}

#if vt_check_enabled(fcontext)
if (is_threaded_ and not theConfig()->vt_ult_disable) {
Expand Down Expand Up @@ -164,16 +174,20 @@ void RunnableNew::run() {
done_ = true;
#endif
}
theSched()->setRecentTimeToStale();
TimeType end_time = needs_time ? theSched()->getRecentTime() : NAN;



#if vt_check_enabled(fcontext)
if (done_) {
finish();
finish(end_time);
} else {
suspended_ = true;
suspend();
suspend(end_time);
}
#else
finish();
finish(end_time);
#endif

#if vt_check_enabled(fcontext)
Expand All @@ -185,49 +199,49 @@ void RunnableNew::run() {
#endif
}

void RunnableNew::start() {
void RunnableNew::start(TimeType time) {
contexts_.setcontext.start();
if (contexts_.has_td) contexts_.td.start();
if (contexts_.has_col) contexts_.col.start();
if (contexts_.has_lb) contexts_.lb.start();
if (contexts_.has_lb) contexts_.lb.start(time);
#if vt_check_enabled(trace_enabled)
if (contexts_.has_trace) contexts_.trace.start();
if (contexts_.has_trace) contexts_.trace.start(time);
#endif
}

void RunnableNew::finish() {
void RunnableNew::finish(TimeType time) {
contexts_.setcontext.finish();
if (contexts_.has_td) contexts_.td.finish();
if (contexts_.has_col) contexts_.col.finish();
if (contexts_.has_cont) contexts_.cont.finish();
if (contexts_.has_lb) contexts_.lb.finish();
if (contexts_.has_lb) contexts_.lb.finish(time);
#if vt_check_enabled(trace_enabled)
if (contexts_.has_trace) contexts_.trace.finish();
if (contexts_.has_trace) contexts_.trace.finish(time);
#endif
}

void RunnableNew::suspend() {
void RunnableNew::suspend(TimeType time) {
#if vt_check_enabled(fcontext)
contexts_.setcontext.suspend();
if (contexts_.has_td) contexts_.td.suspend();
if (contexts_.has_col) contexts_.col.suspend();
if (contexts_.has_lb) contexts_.lb.suspend();
if (contexts_.has_lb) contexts_.lb.suspend(time);

# if vt_check_enabled(trace_enabled)
if (contexts_.has_trace) contexts_.trace.suspend();
if (contexts_.has_trace) contexts_.trace.suspend(time);
# endif
#endif
}

void RunnableNew::resume() {
void RunnableNew::resume(TimeType time) {
#if vt_check_enabled(fcontext)
contexts_.setcontext.resume();
if (contexts_.has_td) contexts_.td.resume();
if (contexts_.has_col) contexts_.col.resume();
if (contexts_.has_lb) contexts_.lb.resume();
if (contexts_.has_lb) contexts_.lb.resume(time);

# if vt_check_enabled(trace_enabled)
if (contexts_.has_trace) contexts_.trace.resume();
if (contexts_.has_trace) contexts_.trace.resume(time);
# endif
#endif
}
Expand Down
Loading