Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GraphRuntime] Layerwise timing in debug graph runtime #3232

Merged
merged 1 commit into from
May 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 5 additions & 8 deletions python/tvm/contrib/debugger/debug_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,10 +207,8 @@ def dump_graph_json(self, graph):
def display_debug_result(self):
"""Displays the debugger result"
"""
header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Start Time", \
"End Time", "Shape", "Inputs", "Outputs"]
lines = ["---------", "---", "--------", "-------", "----------", \
"--------", "-----", "------", "-------"]
header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", "Inputs", "Outputs"]
lines = ["---------", "---", "--------", "-------", "-----", "------", "-------"]
eid = 0
data = []
total_time = sum(time[0] for time in self._time_list)
Expand All @@ -223,12 +221,11 @@ def display_debug_result(self):
continue
name = node['name']
shape = str(self._output_tensor_list[eid].shape)
time_us = round(time[0] * 1000000, 2)
time_percent = round(((time[0] / total_time) * 100), 2)
time_us = round(time[0] * 1000000, 3)
time_percent = round(((time[0] / total_time) * 100), 3)
inputs = str(node['attrs']['num_inputs'])
outputs = str(node['attrs']['num_outputs'])
node_data = [name, op, time_us, time_percent, str(time[1]), str(time[2]), \
shape, inputs, outputs]
node_data = [name, op, time_us, time_percent, shape, inputs, outputs]
data.append(node_data)
eid += 1
fmt = ""
Expand Down
42 changes: 26 additions & 16 deletions python/tvm/contrib/debugger/debug_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import os
import tempfile
import shutil
from datetime import datetime
from tvm._ffi.base import string_types
from tvm._ffi.function import get_global_func
from tvm.contrib import graph_runtime
Expand All @@ -30,6 +29,7 @@
_DUMP_ROOT_PREFIX = "tvmdbg_"
_DUMP_PATH_PREFIX = "_tvmdbg_"


def create(graph_json_str, libmod, ctx, dump_root=None):
"""Create a runtime executor module given a graph and module.
Expand Down Expand Up @@ -62,17 +62,23 @@ def create(graph_json_str, libmod, ctx, dump_root=None):
try:
fcreate = get_global_func("tvm.graph_runtime_debug.create")
except ValueError:
raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \
"config.cmake and rebuild TVM to enable debug mode")
raise ValueError(
"Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
"config.cmake and rebuild TVM to enable debug mode"
)

ctx, num_rpc_ctx, device_type_id = graph_runtime.get_device_ctx(libmod, ctx)
if num_rpc_ctx == len(ctx):
libmod = rpc_base._ModuleHandle(libmod)
try:
fcreate = ctx[0]._rpc_sess.get_function("tvm.graph_runtime_debug.remote_create")
fcreate = ctx[0]._rpc_sess.get_function(
"tvm.graph_runtime_debug.remote_create"
)
except ValueError:
raise ValueError("Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in " \
"config.cmake and rebuild TVM to enable debug mode")
raise ValueError(
"Please set '(USE_GRAPH_RUNTIME_DEBUG ON)' in "
"config.cmake and rebuild TVM to enable debug mode"
)
func_obj = fcreate(graph_json_str, libmod, *device_type_id)
return GraphModuleDebug(func_obj, ctx, graph_json_str, dump_root)

Expand Down Expand Up @@ -100,10 +106,10 @@ class GraphModuleDebug(graph_runtime.GraphModule):
To select which folder the outputs should be kept.
None will make a temp folder in /tmp/tvmdbg<rand_string> and does the dumping
"""

def __init__(self, module, ctx, graph_json_str, dump_root):
self._dump_root = dump_root
self._dump_path = None
self._debug_run = module["debug_run"]
self._get_output_by_layer = module["get_output_by_layer"]
self._run_individual = module["run_individual"]
graph_runtime.GraphModule.__init__(self, module)
Expand Down Expand Up @@ -181,13 +187,10 @@ def _run_debug(self):
Time consumed for each execution will be set as debug output.
"""
self.debug_datum._time_list = []

self.debug_datum._time_list = [
[float(t) * 1e-6] for t in self.run_individual(10, 1, 1)
]
for i, node in enumerate(self.debug_datum.get_graph_nodes()):
start_time = datetime.now().time()
time_stamp = self._debug_run(i)
end_time = datetime.now().time()
self.debug_datum._time_list.append([time_stamp, start_time, end_time])
num_outputs = self.debug_datum.get_graph_node_output_num(node)
for j in range(num_outputs):
out_tensor = self._get_output_by_layer(i, j)
Expand All @@ -212,8 +215,13 @@ def debug_get_output(self, node, out):
ret = output_tensors[node]
except:
node_list = output_tensors.keys()
raise RuntimeError("Node " + node + " not found, available nodes are: "
+ str(node_list) + ".")
raise RuntimeError(
"Node "
+ node
+ " not found, available nodes are: "
+ str(node_list)
+ "."
)
elif isinstance(node, int):
output_tensors = self.debug_datum._output_tensor_list
ret = output_tensors[node]
Expand Down Expand Up @@ -242,7 +250,9 @@ def run(self, **input_dict):
self.debug_datum.display_debug_result()

def run_individual(self, number, repeat=1, min_repeat_ms=0):
self._run_individual(number, repeat, min_repeat_ms)
ret = self._run_individual(number, repeat, min_repeat_ms)
return ret.strip(",").split(",") if ret else []


def exit(self):
"""Exits the dump folder and all its contents"""
Expand Down
63 changes: 24 additions & 39 deletions src/runtime/graph/debug/graph_runtime_debug.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
Expand All @@ -24,7 +24,9 @@
#include <tvm/runtime/packed_func.h>
#include <tvm/runtime/registry.h>
#include <tvm/runtime/ndarray.h>

#include <chrono>
#include <sstream>
#include "../graph_runtime.h"

namespace tvm {
Expand All @@ -39,40 +41,23 @@ namespace runtime {
class GraphRuntimeDebug : public GraphRuntime {
public:
/*!
* \brief Run each operation and get the output.
* \param index The index of op which needs to be run.
* \return the elapsed time.
*/
double DebugRun(size_t index) {
CHECK(index < op_execs_.size());
TVMContext ctx = data_entry_[entry_id(index, 0)]->ctx;
auto tbegin = std::chrono::high_resolution_clock::now();
if (op_execs_[index]) {
op_execs_[index]();
}
TVMSynchronize(ctx.device_type, ctx.device_id, nullptr);
auto tend = std::chrono::high_resolution_clock::now();
double time = std::chrono::duration_cast<std::chrono::duration<double> >(
tend - tbegin).count();
return time;
}

/*!
* \brief Run each operation in the graph and print out the runtime per op.
* \brief Run each operation in the graph and get the time per op for all ops.
* \param number The number of times to run this function for taking average.
* \param repeat The number of times to repeat the measurement.
In total, the function will be invoked (1 + number x repeat) times,
where the first one is warmed up and will be discarded in case
there is lazy initialization.
* In total, the function will be invoked (1 + number x repeat) times,
* where the first one is warmed up and will be discarded in case
* there is lazy initialization.
* \param min_repeat_ms The minimum duration of one `repeat` in milliseconds.
By default, one `repeat` contains `number` runs. If this parameter is set,
the parameters `number` will be dynamically adjusted to meet the
minimum duration requirement of one `repeat`.
* By default, one `repeat` contains `number` runs. If this parameter is set,
* the parameters `number` will be dynamically adjusted to meet the
* minimum duration requirement of one `repeat`.
* \return Comma seperated string containing the elapsed time per op for the last
* iteration only, because returning a long string over rpc can be expensive.
*/
void RunIndividual(int number, int repeat, int min_repeat_ms) {
std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
// warmup run
GraphRuntime::Run();

std::ostringstream os;
std::vector<double> time_per_op(op_execs_.size(), 0);
for (int i = 0; i < repeat; ++i) {
std::chrono::time_point<
Expand All @@ -96,7 +81,7 @@ class GraphRuntimeDebug : public GraphRuntime {
auto op_tend = std::chrono::high_resolution_clock::now();
double op_duration = std::chrono::duration_cast<
std::chrono::duration<double> >(op_tend - op_tbegin).count();
time_per_op[index] += op_duration * 1000; // ms
time_per_op[index] += op_duration * 1e6; // us
}
}
}
Expand All @@ -105,16 +90,20 @@ class GraphRuntimeDebug : public GraphRuntime {
(tend - tbegin).count() * 1000;
} while (duration_ms < min_repeat_ms);

LOG(INFO) << "Repeat: " << i;
LOG(INFO) << "Iteration: " << i;
int op = 0;
for (size_t index = 0; index < time_per_op.size(); index++) {
if (op_execs_[index]) {
time_per_op[index] /= number;
LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
<< time_per_op[index] << " ms/iter";
<< time_per_op[index] << " us/iter";
}
}
}
for (size_t index = 0; index < time_per_op.size(); index++) {
os << time_per_op[index] << ",";
}
return os.str();
}

/*!
Expand Down Expand Up @@ -182,11 +171,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
const std::string& name,
const std::shared_ptr<ModuleNode>& sptr_to_self) {
// return member functions during query.
if (name == "debug_run") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
*rv = this->DebugRun(static_cast<size_t>(args[0].operator int64_t()));
});
} else if (name == "get_output_by_layer") {
if (name == "get_output_by_layer") {
return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
*rv = this->GetOutputByLayer(args[0], args[1]);
});
Expand All @@ -206,7 +191,7 @@ PackedFunc GraphRuntimeDebug::GetFunction(
CHECK_GT(number, 0);
CHECK_GT(repeat, 0);
CHECK_GE(min_repeat_ms, 0);
this->RunIndividual(number, repeat, min_repeat_ms);
*rv = this->RunIndividual(number, repeat, min_repeat_ms);
});
} else {
return GraphRuntime::GetFunction(name, sptr_to_self);
Expand Down
4 changes: 0 additions & 4 deletions tests/python/unittest/test_runtime_graph_debug.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,6 @@ def check_verify():
out = mod.get_output(0, tvm.nd.empty((n,)))
np.testing.assert_equal(out.asnumpy(), a + 1)

#test individual run
mod.run_individual(20, 2, 1)

mod.exit()
#verify dump root delete after cleanup
assert(not os.path.exists(directory))
Expand All @@ -129,7 +126,6 @@ def check_remote():
mod.run(x=tvm.nd.array(a, ctx))
out = tvm.nd.empty((n,), ctx=ctx)
out = mod.get_output(0, out)
mod.run_individual(20, 2, 1)
np.testing.assert_equal(out.asnumpy(), a + 1)

check_verify()
Expand Down