Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend the profile plugin to take per host profile traces #1117

Merged
merged 6 commits into from
Apr 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -208,15 +208,6 @@
Polymer({
is: 'input-pipeline-analyzer',
properties: {
_requestManager: {
type: Object,
readOnly: true,
value: () => new tf_backend.RequestManager(),
},
run: {
type: String,
observer: '_reloadToolData',
},
_data: {
type: Object,
observer: '_updateView',
Expand Down Expand Up @@ -249,25 +240,13 @@
onClick: function(e) {
this.set('_show_host_side_table', !this._show_host_side_table);
},
_reloadToolData: function(run) {
if (!run) return;
this._requestManager.request(tf_backend.addParams(
tf_backend.getRouter().pluginRoute('profile', '/data'),
{tag: 'input_pipeline_analyzer', run})
).catch(error => {
console.error(error);
}).then((data) => {
if (data) {
this.set('_data', data);
}
});
},
_getToggleButtonText: function(show_host_side_table) {
return (show_host_side_table ? 'Hide' : 'Show') + ' Input Op Statistics';
},

/* Update view according to new data */
_updateView: function() {
if (this._data == null) return;
var deviceJson = this._data[0];
var hostJson = this._data[1];
var recommendationJson = this._data[2];
Expand Down
21 changes: 1 addition & 20 deletions tensorboard/plugins/profile/overview_page/overview-page.html
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,6 @@
Polymer({
is: 'overview-page',
properties: {
_requestManager: {
type: Object,
readOnly: true,
value: () => new tf_backend.RequestManager(),
},
run: {
type: String,
observer: '_reloadToolData',
},
_data: {
type: Object,
observer: '_updateView',
Expand Down Expand Up @@ -206,17 +197,6 @@
_build_target: String,
_statement: String,

_reloadToolData: function(run) {
this._requestManager.request(tf_backend.addParams(
tf_backend.getRouter().pluginRoute('profile', '/data'),
{tag: 'overview_page', run})
).then((data) => {
if (data) {
this.set('_data', data);
}
});
},

/* Toggles _show_top_ops_table */
onClickTopOps: function(e) {
this.set('_show_top_ops_table', !this._show_top_ops_table);
Expand All @@ -229,6 +209,7 @@

/* Updates view according to new data */
_updateView: function() {
if (this._data == null) return;
var generalAnalysisJson = this._data[0];
var inputAnalysisJson = this._data[1];
var runEnvironmentJson = this._data[2];
Expand Down
95 changes: 81 additions & 14 deletions tensorboard/plugins/profile/profile_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
LOGDIR_ROUTE = '/logdir'
DATA_ROUTE = '/data'
TOOLS_ROUTE = '/tools'
HOSTS_ROUTE = '/hosts'

# Available profiling tools -> file name of the tool data.
_FILE_NAME = 'TOOL_FILE_NAME'
Expand Down Expand Up @@ -92,13 +93,19 @@ def index_impl(self):
In the plugin log directory, each directory contains profile data for a
single run (identified by the directory name), and files in the run
directory contains data for different tools. The file that contains profile
for a specific tool "x" will have a fixed name TOOLS["x"].
for a specific tool "x" will have a suffix name TOOLS["x"].
Example:
log/
run1/
trace
plugins/
profile/
host1.trace
host2.trace
run2/
trace
plugins/
profile/
host1.trace
host2.trace

Returns:
A map from runs to tool names e.g.
Expand All @@ -110,11 +117,11 @@ def index_impl(self):
# run1/
# plugins/
# profile/
# trace
# host1.trace
# run2/
# plugins/
# profile/
# trace
# host2.trace
run_to_tools = {}
if not tf.gfile.IsDirectory(self.plugin_logdir):
return run_to_tools
Expand All @@ -124,31 +131,88 @@ def index_impl(self):
continue
run_to_tools[run] = []
for tool in TOOLS:
tool_filename = TOOLS[tool]
if tf.gfile.Exists(os.path.join(run_dir, tool_filename)):
run_to_tools[run].append(tool)
tool_pattern = '*' + TOOLS[tool]
path = os.path.join(run_dir, tool_pattern)
try:
files = tf.gfile.Glob(path)
if len(files) >= 1:
run_to_tools[run].append(tool)
except tf.errors.OpError as e:
logging.warning("Cannot read asset directory: %s, OpError %s",
run_dir, e)
return run_to_tools

@wrappers.Request.application
def tools_route(self, request):
run_to_tools = self.index_impl()
return http_util.Respond(request, run_to_tools, 'application/json')

def data_impl(self, run, tool):
"""Retrieves and processes the tool data for a run.
def host_impl(self, run, tool):
"""Returns available hosts for the run and tool in the log directory.

In the plugin log directory, each directory contains profile data for a
single run (identified by the directory name), and files in the run
directory contains data for different tools and hosts. The file that
contains profile for a specific tool "x" will have a prefix name TOOLS["x"].

Example:
log/
run1/
plugins/
profile/
host1.trace
host2.trace
run2/
plugins/
profile/
host1.trace
host2.trace

Returns:
A list of host names e.g.
{"host1", "host2", "host3"} for the example.
"""
hosts = {}
if not tf.gfile.IsDirectory(self.plugin_logdir):
return hosts
run_dir = self._run_dir(run)
if not run_dir:
logging.warning("Cannot find asset directory: %s", run_dir)
return hosts
tool_pattern = '*' + TOOLS[tool]
try:
files = tf.gfile.Glob(os.path.join(run_dir, tool_pattern))
hosts = [os.path.basename(f).replace(TOOLS[tool], '') for f in files]
except tf.errors.OpError as e:
logging.warning("Cannot read asset directory: %s, OpError %s",
run_dir, e)
return hosts


@wrappers.Request.application
def hosts_route(self, request):
run = request.args.get('run')
tool = request.args.get('tag')
hosts = self.host_impl(run, tool)
return http_util.Respond(request, hosts, 'application/json')

def data_impl(self, run, tool, host):
"""Retrieves and processes the tool data for a run and a host.

Args:
run: Name of the run.
tool: Name of the tool.
host: Name of the host.

Returns:
A string that can be served to the frontend tool or None if tool or
run is invalid.
A string that can be served to the frontend tool or None if tool,
run or host is invalid.
"""
# Path relative to the path of plugin directory.
if tool not in TOOLS:
return None
rel_data_path = os.path.join(run, TOOLS[tool])
tool_name = str(host) + TOOLS[tool]
rel_data_path = os.path.join(run, tool_name)
asset_path = os.path.join(self.plugin_logdir, rel_data_path)
raw_data = None
try:
Expand All @@ -173,9 +237,11 @@ def data_route(self, request):
# run: The run name.
# tag: The tool name e.g. trace_viewer. The plugin returns different UI
# data for different tools of the same run.
# host: The host name.
run = request.args.get('run')
tool = request.args.get('tag')
data = self.data_impl(run, tool)
host = request.args.get('host')
data = self.data_impl(run, tool, host)
if data is None:
return http_util.Respond(request, '404 Not Found', 'text/plain', code=404)
return http_util.Respond(request, data, 'text/plain')
Expand All @@ -184,6 +250,7 @@ def get_plugin_apps(self):
return {
LOGDIR_ROUTE: self.logdir_route,
TOOLS_ROUTE: self.tools_route,
HOSTS_ROUTE: self.hosts_route,
DATA_ROUTE: self.data_route,
}

Expand Down
39 changes: 26 additions & 13 deletions tensorboard/plugins/profile/profile_plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,29 @@ def setUp(self):
'baz': ['trace_viewer'],
'empty': [],
}
self.run_to_hosts = {
'foo': ['host0', 'host1'],
'bar': ['host1'],
'baz': ['host2'],
'empty': [],
}
for run in self.run_to_tools:
run_dir = os.path.join(plugin_logdir, run)
os.mkdir(run_dir)
for tool in self.run_to_tools[run]:
if tool not in profile_plugin.TOOLS:
continue
tool_file = os.path.join(run_dir, profile_plugin.TOOLS[tool])
if tool == 'trace_viewer':
trace = trace_events_pb2.Trace()
trace.devices[0].name = run
data = trace.SerializeToString()
else:
data = tool
with open(tool_file, 'wb') as f:
f.write(data)
for host in self.run_to_hosts[run]:
file_name = host + profile_plugin.TOOLS[tool]
tool_file = os.path.join(run_dir, file_name)
if tool == 'trace_viewer':
trace = trace_events_pb2.Trace()
trace.devices[0].name = run
data = trace.SerializeToString()
else:
data = tool
with open(tool_file, 'wb') as f:
f.write(data)
with open(os.path.join(plugin_logdir, 'noise'), 'w') as f:
f.write('Not a dir, not a run.')

Expand All @@ -76,8 +84,12 @@ def testRuns(self):
self.assertItemsEqual(runs['bar'], [])
self.assertItemsEqual(runs['empty'], [])

def testHosts(self):
hosts = self.plugin.host_impl('foo', 'trace_viewer')
self.assertItemsEqual(['host0', 'host1'], sorted(hosts))

def testData(self):
trace = json.loads(self.plugin.data_impl('foo', 'trace_viewer'))
trace = json.loads(self.plugin.data_impl('foo', 'trace_viewer', 'host0'))
self.assertEqual(trace,
dict(
displayTimeUnit='ns',
Expand All @@ -96,9 +108,10 @@ def testData(self):
]))

# Invalid tool/run.
self.assertEqual(None, self.plugin.data_impl('foo', 'nonono'))
self.assertEqual(None, self.plugin.data_impl('bar', 'unsupported'))
self.assertEqual(None, self.plugin.data_impl('empty', 'trace_viewer'))
self.assertEqual(None, self.plugin.data_impl('foo', 'nonono', 'host0'))
self.assertEqual(None, self.plugin.data_impl('foo', 'trace_viewer', ''))
self.assertEqual(None, self.plugin.data_impl('bar', 'unsupported', 'host1'))
self.assertEqual(None, self.plugin.data_impl('empty', 'trace_viewer', ''))

def testActive(self):
self.assertTrue(self.plugin.is_active())
Expand Down
22 changes: 2 additions & 20 deletions tensorboard/plugins/profile/tf_op_profile/tf-op-profile.html
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
tf-op-details {
position: fixed;
/* don't set top, so it ends up next to tf-op-table */
padding-top: 6.5em;
margin-top: 10em;
left: 16px;
width: 330px;
}
Expand All @@ -56,7 +56,7 @@ <h4>Overall TPU FLOPS utilization is
<p>Modifying your model's architecture, data dimensions, and improving
the efficiency of CPU operations may help reach the TPU's FLOPS potential.
</p></div>
<tf-op-details hidden="[[!_active]]" node="[[_active]]"></tf-op-details>
<tf-op-details hidden="[[!_active]]" node=[[_active]]></tf-op-details>
<tf-op-table root-node="[[_root]]" active={{_active}}></tf-op-table>
</div>
</template>
Expand All @@ -65,15 +65,6 @@ <h4>Overall TPU FLOPS utilization is
Polymer({
is: 'tf-op-profile',
properties: {
_requestManager: {
type: Object,
readOnly: true,
value: () => new tf_backend.RequestManager(),
},
run: {
type: String,
observer: '_load'
},
_data: {
type: Object,
notify: true,
Expand All @@ -89,15 +80,6 @@ <h4>Overall TPU FLOPS utilization is
notify: true,
},
},
_load: function(run) {
if (!run) return;
this._requestManager.request(tf_backend.addParams(
tf_backend.getRouter().pluginRoute('profile', '/data'), {tag: 'op_profile', run})
).catch(error => {}
).then((data) => {
this._data = data;
});
},
_getRoot: function(data, breakdown) { return data[breakdown]; },
_utilizationPercent: function(node) { return tf_op_profile.percent(tf_op_profile.utilization(node)); },
_hasFlops: function(node) { return node.metrics.flops > 0; },
Expand Down
Loading