From 1e497747109d3bfaffa8500340a5af47f7c6a07f Mon Sep 17 00:00:00 2001 From: Justine Tunney Date: Mon, 9 Apr 2018 13:02:21 -0700 Subject: [PATCH] Remove expensive plugin initialization code This change does some code cleanup on Beholder to fix #1107, where invoking TensorFlow routines caused a nontrivial amount of GPU to be reserved. --- tensorboard/plugins/beholder/BUILD | 4 +- .../plugins/beholder/beholder_plugin.py | 45 ++++---- tensorboard/plugins/beholder/im_util.py | 108 +----------------- tensorboard/util.py | 4 +- 4 files changed, 35 insertions(+), 126 deletions(-) diff --git a/tensorboard/plugins/beholder/BUILD b/tensorboard/plugins/beholder/BUILD index f4bdb810bde..7930cf466b9 100644 --- a/tensorboard/plugins/beholder/BUILD +++ b/tensorboard/plugins/beholder/BUILD @@ -37,6 +37,7 @@ py_library( ":shared_config", "//tensorboard:expect_numpy_installed", "//tensorboard:expect_tensorflow_installed", + "//tensorboard:util", "//tensorboard/backend:http_util", "//tensorboard/backend/event_processing:plugin_asset_util", "//tensorboard/plugins:base_plugin", @@ -81,12 +82,13 @@ py_library( py_library( name = "im_util", srcs = ["im_util.py"], - data = ["resources"], + data = ["resources"], # TODO(jart): Don't reference directory. srcs_version = "PY2AND3", deps = [ ":colormaps", "//tensorboard:expect_numpy_installed", "//tensorboard:expect_tensorflow_installed", + "//tensorboard:util", ], ) diff --git a/tensorboard/plugins/beholder/beholder_plugin.py b/tensorboard/plugins/beholder/beholder_plugin.py index 75cb72a21c1..7e894a68266 100644 --- a/tensorboard/plugins/beholder/beholder_plugin.py +++ b/tensorboard/plugins/beholder/beholder_plugin.py @@ -24,6 +24,7 @@ from google.protobuf import message from werkzeug import wrappers +from tensorboard import util from tensorboard.backend import http_util from tensorboard.backend.event_processing import plugin_asset_util as pau from tensorboard.plugins import base_plugin @@ -31,6 +32,10 @@ from tensorboard.plugins.beholder import im_util from tensorboard.plugins.beholder import shared_config +DEFAULT_INFO = [{ + 'name': 'Waiting for data...', +}] + class BeholderPlugin(base_plugin.TBPlugin): """ @@ -40,15 +45,14 @@ class BeholderPlugin(base_plugin.TBPlugin): plugin_name = shared_config.PLUGIN_NAME def __init__(self, context): + self._lock = threading.Lock() self._MULTIPLEXER = context.multiplexer self.PLUGIN_LOGDIR = pau.PluginDirectory( context.logdir, shared_config.PLUGIN_NAME) self.FPS = 10 - self.most_recent_frame = im_util.get_image_relative_to_script('no-data.png') - self.most_recent_info = [{ - 'name': 'Waiting for data...', - }] self._config_file_lock = threading.Lock() + self.most_recent_frame = None + self.most_recent_info = DEFAULT_INFO def get_plugin_apps(self): return { @@ -96,18 +100,18 @@ def _serve_is_active(self, request): } return http_util.Respond(request, response, 'application/json') - def _fetch_current_frame(self): path = '{}/{}'.format(self.PLUGIN_LOGDIR, shared_config.SUMMARY_FILENAME) - - try: - frame = file_system_tools.read_tensor_summary(path).astype(np.uint8) - self.most_recent_frame = frame - return frame - - except (message.DecodeError, IOError, tf.errors.NotFoundError): - return self.most_recent_frame - + with self._lock: + try: + frame = file_system_tools.read_tensor_summary(path).astype(np.uint8) + self.most_recent_frame = frame + return frame + except (message.DecodeError, IOError, tf.errors.NotFoundError): + if self.most_recent_frame is None: + self.most_recent_frame = im_util.get_image_relative_to_script( + 'no-data.png') + return self.most_recent_frame @wrappers.Request.application def _serve_change_config(self, request): @@ -132,18 +136,19 @@ def _serve_change_config(self, request): '{}/{}'.format(self.PLUGIN_LOGDIR, shared_config.CONFIG_FILENAME)) return http_util.Respond(request, {'config': config}, 'application/json') - @wrappers.Request.application def _serve_section_info(self, request): path = '{}/{}'.format( self.PLUGIN_LOGDIR, shared_config.SECTION_INFO_FILENAME) - info = file_system_tools.read_pickle(path, default=self.most_recent_info) - self.most_recent_info = info + with self._lock: + default = self.most_recent_info + info = file_system_tools.read_pickle(path, default=default) + if info is not default: + with self._lock: + self.most_recent_info = info return http_util.Respond(request, info, 'application/json') - def _frame_generator(self): - while True: last_duration = 0 @@ -154,7 +159,7 @@ def _frame_generator(self): start_time = time.time() array = self._fetch_current_frame() - image_bytes = im_util.encode_png(array) + image_bytes = util.encode_png(array) frame_text = b'--frame\r\n' content_type = b'Content-Type: image/png\r\n\r\n' diff --git a/tensorboard/plugins/beholder/im_util.py b/tensorboard/plugins/beholder/im_util.py index 2050ff7c76f..714f7d23ceb 100644 --- a/tensorboard/plugins/beholder/im_util.py +++ b/tensorboard/plugins/beholder/im_util.py @@ -17,11 +17,11 @@ from __future__ import print_function import os -import threading import numpy as np import tensorflow as tf +from tensorboard import util from tensorboard.plugins.beholder import colormaps @@ -86,82 +86,7 @@ def apply_colormap(image, colormap='magma'): return image if cm is None else cm[image] -# Taken from https://github.com/tensorflow/tensorboard/blob/ -# /28f58888ebb22e2db0f4f1f60cd96138ef72b2ef/tensorboard/util.py - -# Modified by Chris Anderson to not use the GPU. -class PersistentOpEvaluator(object): - """Evaluate a fixed TensorFlow graph repeatedly, safely, efficiently. - Extend this class to create a particular kind of op evaluator, like an - image encoder. In `initialize_graph`, create an appropriate TensorFlow - graph with placeholder inputs. In `run`, evaluate this graph and - return its result. This class will manage a singleton graph and - session to preserve memory usage, and will ensure that this graph and - session do not interfere with other concurrent sessions. - A subclass of this class offers a threadsafe, highly parallel Python - entry point for evaluating a particular TensorFlow graph. - Example usage: - class FluxCapacitanceEvaluator(PersistentOpEvaluator): - \"\"\"Compute the flux capacitance required for a system. - Arguments: - x: Available power input, as a `float`, in jigawatts. - Returns: - A `float`, in nanofarads. - \"\"\" - def initialize_graph(self): - self._placeholder = tf.placeholder(some_dtype) - self._op = some_op(self._placeholder) - def run(self, x): - return self._op.eval(feed_dict: {self._placeholder: x}) - evaluate_flux_capacitance = FluxCapacitanceEvaluator() - for x in xs: - evaluate_flux_capacitance(x) - """ - - def __init__(self): - super(PersistentOpEvaluator, self).__init__() - self._session = None - self._initialization_lock = threading.Lock() - - - def _lazily_initialize(self): - """Initialize the graph and session, if this has not yet been done.""" - with self._initialization_lock: - if self._session: - return - graph = tf.Graph() - with graph.as_default(): - self.initialize_graph() - - config = tf.ConfigProto(device_count={'GPU': 0}) - self._session = tf.Session(graph=graph, config=config) - - - def initialize_graph(self): - """Create the TensorFlow graph needed to compute this operation. - This should write ops to the default graph and return `None`. - """ - raise NotImplementedError('Subclasses must implement "initialize_graph".') - - - def run(self, *args, **kwargs): - """Evaluate the ops with the given input. - When this function is called, the default session will have the - graph defined by a previous call to `initialize_graph`. This - function should evaluate any ops necessary to compute the result of - the query for the given *args and **kwargs, likely returning the - result of a call to `some_op.eval(...)`. - """ - raise NotImplementedError('Subclasses must implement "run".') - - - def __call__(self, *args, **kwargs): - self._lazily_initialize() - with self._session.as_default(): - return self.run(*args, **kwargs) - - -class PNGDecoder(PersistentOpEvaluator): +class PNGDecoder(util.PersistentOpEvaluator): def __init__(self): super(PNGDecoder, self).__init__() @@ -181,30 +106,7 @@ def run(self, image): }) -class PNGEncoder(PersistentOpEvaluator): - - def __init__(self): - super(PNGEncoder, self).__init__() - self._image_placeholder = None - self._encode_op = None - - - def initialize_graph(self): - self._image_placeholder = tf.placeholder(dtype=tf.uint8) - self._encode_op = tf.image.encode_png(self._image_placeholder) - - - # pylint: disable=arguments-differ - def run(self, image): - if len(image.shape) == 2: - image = image.reshape([image.shape[0], image.shape[1], 1]) - - return self._encode_op.eval(feed_dict={ - self._image_placeholder: image, - }) - - -class Resizer(PersistentOpEvaluator): +class Resizer(util.PersistentOpEvaluator): def __init__(self): super(Resizer, self).__init__() @@ -233,10 +135,8 @@ def run(self, image, height, width): decode_png = PNGDecoder() -encode_png = PNGEncoder() resize = Resizer() - def read_image(filename): with tf.gfile.Open(filename, 'rb') as image_file: return np.array(decode_png(image_file.read())) @@ -244,7 +144,7 @@ def read_image(filename): def write_image(array, filename): with tf.gfile.Open(filename, 'w') as image_file: - image_file.write(encode_png(array)) + image_file.write(util.encode_png(array)) def get_image_relative_to_script(filename): diff --git a/tensorboard/util.py b/tensorboard/util.py index baec54c9f92..316912cab5e 100644 --- a/tensorboard/util.py +++ b/tensorboard/util.py @@ -419,7 +419,9 @@ def _lazily_initialize(self): graph = tf.Graph() with graph.as_default(): self.initialize_graph() - self._session = tf.Session(graph=graph) + # Don't reserve GPU because libpng can't run on GPU. + config = tf.ConfigProto(device_count={'GPU': 0}) + self._session = tf.Session(graph=graph, config=config) def initialize_graph(self): """Create the TensorFlow graph needed to compute this operation.