TensorFlow: Upstream changes to git.

Change 109344341 Teach ./configure about Python 3 (and other minor Python 3 issues) ./configure now writes bazel.rc based on a bazel.rc.template, which gives us a place to tell bazel which version of Python we were using. Also fix a few tests whose Python 3 support had degraded. The only thing left before we have Python 3 support is protocolbuffers/protobuf#1023 Change 109343002 Update ops.pbtxt to reflect 109321497. Change 109342838 Do memory deallocation outside the critical section in gpu_event_mgr.cc. Change 109334210 PTB LSTM example: use slicing instead of splitting the inputs. Change 109332238 Cleanup TensorBoard local development environment Change 109331051 Use __all__ in __init__.py to restrict exported modules Specifically, __all__ is now anything that (1) doesn't begin with an underscore and (2) isn't a non-whitelisted module. This fixes one tiny piece of b/25561952. Specifically, the following no longer exist: tf.np, tf.math_ops, and tf.variables. tf.ops and tf.tensor_util still exist but shouldn't; that will have to wait for a later CL. Change 109327154 tf.tuple allow Tensors to be passed in as control_inputs like tf.control_dependencies. Change 109324239 Make tf.control_dependencies(None) clear the control dependencies. Use that to prevent ops created for Variables to inherit the current control dependencies. This fixes issues when using ExponentialMovingAverages with control dependencies. Change 109323719 Added support for boolean tf.scatter_update. Base CL: 109348398
adology · Dec 3, 2015 · eb5e56e · eb5e56e
1 parent a4806a3
commit eb5e56e
Show file tree

Hide file tree

Showing 31 changed files with 346 additions and 184 deletions.
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
@@ -40,6 +40,7 @@ filegroup(
 py_library(
     name = "tensorflow_py",
     srcs = ["__init__.py"],
+    srcs_version = "PY2AND3",
     visibility = ["//visibility:public"],
     deps = ["//tensorflow/python"],
 )
diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.cc
@@ -40,13 +40,13 @@ EventMgr::~EventMgr() {
     delete e;
   }
   while (!used_events_.empty()) {
-    delete used_events_[0].event;
-    delete used_events_[0].mem;
-    if (used_events_[0].bufrec.buf) {
-      used_events_[0].bufrec.alloc->DeallocateRaw(used_events_[0].bufrec.buf);
+    InUse* ue = &used_events_[0];
+    delete ue->event;
+    delete ue->mem;
+    if (ue->bufrec.buf) {
+      ue->bufrec.alloc->DeallocateRaw(ue->bufrec.buf);
     }
-    if (used_events_[0].func != nullptr)
-      threadpool_.Schedule(used_events_[0].func);
+    if (ue->func != nullptr) threadpool_.Schedule(ue->func);
     used_events_.pop_front();
   }
 }
@@ -60,10 +60,12 @@ EventMgr::~EventMgr() {
 void EventMgr::PollLoop() {
   while (!stop_polling_.HasBeenNotified()) {
     Env::Default()->SleepForMicroseconds(1 * 1000);
+    ToFreeVector to_free;
     {
       mutex_lock l(mu_);
-      PollEvents(true);
+      PollEvents(true, &to_free);
     }
+    FreeMemory(to_free);
   }
   polling_stopped_.Notify();
 }
@@ -103,7 +105,8 @@ void EventMgr::QueueInUse(gpu::Stream* stream, InUse iu) {
 // GPU memory use to spike needlessly.  An alternative strategy would
 // be to throttle new Op execution until the pending event queue
 // clears.
-void EventMgr::PollEvents(bool is_dedicated_poller) {
+void EventMgr::PollEvents(bool is_dedicated_poller,
+                          gtl::InlinedVector<InUse, 4>* to_free) {
   VLOG(2) << "PollEvents  free_events_ " << free_events_.size()
           << " used_events_ " << used_events_.size();
   // Sweep the remaining events in order.  If this is the dedicated
@@ -123,11 +126,9 @@ void EventMgr::PollEvents(bool is_dedicated_poller) {
         if (!is_dedicated_poller) return;  // quit processing queue
         break;
       case gpu::Event::Status::kComplete:
-        delete iu.mem;
-        if (iu.bufrec.buf) iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
-        // The function must be called in another thread, outside of
-        // the mutex held here.
-        if (iu.func != nullptr) threadpool_.Schedule(iu.func);
+        // Make a copy of the InUse record so we can free it after releasing
+        // the lock
+        to_free->push_back(iu);
         free_events_.push_back(iu.event);
         // Mark this InUse record as completed.
         iu.event = nullptr;

diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h b/tensorflow/core/common_runtime/gpu/gpu_event_mgr.h
@@ -18,8 +18,10 @@ limitations under the License.
 
 #include <deque>
 #include <vector>
+#include "tensorflow/stream_executor/stream.h"
 #include "tensorflow/core/lib/core/notification.h"
 #include "tensorflow/core/lib/core/threadpool.h"
+#include "tensorflow/core/lib/gtl/inlined_vector.h"
 #include "tensorflow/core/platform/port.h"
 #include "tensorflow/core/platform/thread_annotations.h"
 #include "tensorflow/core/public/tensor.h"
@@ -47,9 +49,13 @@ class EventMgr {
   // currently enqueued on *stream have completed.
   inline void ThenDeleteTensors(perftools::gputools::Stream* stream,
                                 std::vector<Tensor>* tensors) {
-    mutex_lock l(mu_);
-    QueueTensors(stream, tensors);
-    PollEvents(false);
+    ToFreeVector to_free;
+    {
+      mutex_lock l(mu_);
+      QueueTensors(stream, tensors);
+      PollEvents(false, &to_free);
+    }
+    FreeMemory(to_free);
   }
 
   struct BufRec {
@@ -61,16 +67,24 @@ class EventMgr {
   // on it as soon as all events currently enqueued on *stream have completed.
   inline void ThenDeleteBuffer(perftools::gputools::Stream* stream,
                                BufRec bufrec) {
-    mutex_lock l(mu_);
-    QueueBuffer(stream, bufrec);
-    PollEvents(false);
+    ToFreeVector to_free;
+    {
+      mutex_lock l(mu_);
+      QueueBuffer(stream, bufrec);
+      PollEvents(false, &to_free);
+    }
+    FreeMemory(to_free);
   }
 
   inline void ThenExecute(perftools::gputools::Stream* stream,
                           std::function<void()> func) {
-    mutex_lock l(mu_);
-    QueueFunc(stream, func);
-    PollEvents(false);
+    ToFreeVector to_free;
+    {
+      mutex_lock l(mu_);
+      QueueFunc(stream, func);
+      PollEvents(false, &to_free);
+    }
+    FreeMemory(to_free);
   }
 
  private:
@@ -85,10 +99,22 @@ class EventMgr {
     std::function<void()> func;
   };
 
+  typedef gtl::InlinedVector<InUse, 4> ToFreeVector;
+
+  void FreeMemory(const ToFreeVector& to_free) {
+    for (const auto& iu : to_free) {
+      delete iu.mem;
+      if (iu.bufrec.buf) iu.bufrec.alloc->DeallocateRaw(iu.bufrec.buf);
+      // The function must be called in another thread.
+      if (iu.func != nullptr) threadpool_.Schedule(iu.func);
+    }
+  }
+
   // Stream-enqueue an unused Event and save with it a collection of
   // Tensors and/or a BufRec to be deleted only after the Event
   // records.
   void QueueInUse(perftools::gputools::Stream* stream, InUse in_use)
+
       EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   void QueueTensors(perftools::gputools::Stream* stream,
@@ -109,8 +135,11 @@ class EventMgr {
 
   // This function should be called at roughly the same tempo as
   // QueueTensors() to check whether pending events have recorded,
-  // and then retire them.
-  void PollEvents(bool is_dedicated_poller) EXCLUSIVE_LOCKS_REQUIRED(mu_);
+  // and then retire them.  It appends InUse elements that need cleanup
+  // to "*to_free".  The caller should call FreeMemory(to_free)
+  // when this returns.
+  void PollEvents(bool is_dedicated_poller, ToFreeVector* to_free)
+      EXCLUSIVE_LOCKS_REQUIRED(mu_);
 
   // An internal polling loop that runs at a low frequency to clear
   // straggler Events.

diff --git a/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc b/tensorflow/core/common_runtime/gpu/gpu_event_mgr_test.cc
@@ -47,8 +47,12 @@ class TEST_EventMgrHelper {
   }
 
   void PollEvents(bool is_dedicated_poller) {
-    mutex_lock l(em_->mu_);
-    em_->PollEvents(is_dedicated_poller);
+    EventMgr::ToFreeVector to_free;
+    {
+      mutex_lock l(em_->mu_);
+      em_->PollEvents(is_dedicated_poller, &to_free);
+    }
+    em_->FreeMemory(to_free);
   }
 
  private:

diff --git a/tensorflow/core/kernels/scatter_op.cc b/tensorflow/core/kernels/scatter_op.cc
@@ -140,6 +140,8 @@ class ScatterUpdateOp : public OpKernel {
 
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_UPDATE_INT32);
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_UPDATE_INT64);
+REGISTER_SCATTER_UPDATE_INT32(bool)
+REGISTER_SCATTER_UPDATE_INT64(bool)
 
 #undef REGISTER_SCATTER_UPDATE_INT64
 #undef REGISTER_SCATTER_UPDATE_INT32