Minor refactor: prevent string copying, list -> vector, shared_ptr by…

… ref
apache · Nov 29, 2017 · f515943 · f515943
1 parent 2f8c1e8
commit f515943
Show file tree

Hide file tree

Showing 6 changed files with 20 additions and 19 deletions.
diff --git a/include/mxnet/base.h b/include/mxnet/base.h
@@ -243,7 +243,7 @@ struct Context {
    * \param str the string pattern
    * \return Context
    */
-  inline static Context FromString(std::string str);
+  inline static Context FromString(std::string const& str);
 };
 
 /*!
@@ -316,7 +316,7 @@ inline Context Context::GPU(int32_t dev_id) {
   return Create(kGPU, dev_id);
 }
 
-inline Context Context::FromString(std::string str) {
+inline Context Context::FromString(std::string const& str) {
   Context ret;
   try {
     std::string::size_type l = str.find('(');

diff --git a/src/engine/stream_manager.h b/src/engine/stream_manager.h
@@ -74,7 +74,7 @@ RunContext StreamManager<kNumGpus, kStreams>::GetRunContext(
       std::size_t use_counter;
       CUDA_CALL(cudaSetDevice(ctx.dev_id));
       {
-        std::lock_guard<std::mutex> lock{m_};
+        std::lock_guard<std::mutex> lock{mutex_};
         auto&& counter = gpu_cnt_.at(ctx.dev_id);
         if (counter == -1) {
           for (auto&& i : gpu_streams_.at(ctx.dev_id)) {
@@ -109,7 +109,7 @@ RunContext StreamManager<kNumGpus, kStreams>::GetIORunContext(
 #if MXNET_USE_CUDA
       CUDA_CALL(cudaSetDevice(ctx.dev_id));
       {
-        std::lock_guard<std::mutex> lock{m_};
+        std::lock_guard<std::mutex> lock{mutex_};
         if (gpu_io_streams_.at(ctx.dev_id) == nullptr) {
           gpu_io_streams_.at(ctx.dev_id) = mshadow::NewStream<gpu>(false, false, ctx.dev_id);
         }

diff --git a/src/engine/thread_pool.h b/src/engine/thread_pool.h
@@ -90,6 +90,7 @@ class ThreadPool {
                       std::function<void(std::shared_ptr<SimpleEvent> ready)> func,
                       const bool wait)
       : worker_threads_(size) {
+    ready_events_.reserve(size);
     for (auto& i : worker_threads_) {
       std::shared_ptr<SimpleEvent> ptr = std::make_shared<SimpleEvent>();
       ready_events_.emplace_back(ptr);
@@ -110,7 +111,7 @@ class ThreadPool {
    * \brief Wait for all started threads to signal that they're ready
    */
   void WaitForReady() {
-    for (std::shared_ptr<SimpleEvent> ptr : ready_events_) {
+    for (std::shared_ptr<SimpleEvent>& ptr : ready_events_) {
       ptr->wait();
     }
   }
@@ -122,7 +123,7 @@ class ThreadPool {
   /*!
    * \brief Startup synchronization objects
    */
-  std::list<std::shared_ptr<SimpleEvent>> ready_events_;
+  std::vector<std::shared_ptr<SimpleEvent> > ready_events_;
   /*!
    * \brief Disallow default construction.
    */

diff --git a/src/engine/threaded_engine.cc b/src/engine/threaded_engine.cc
@@ -49,7 +49,7 @@ ThreadedVar::ThreadedVar(VersionedVarBlock* head) : head_{head} {
 }
 
 inline void ThreadedVar::AppendReadDependency(OprBlock* opr_block) {
-  std::lock_guard<std::mutex> lock{m_};
+  std::lock_guard<std::mutex> lock{mutex_};
   if (pending_write_ == nullptr) {
     // invariant: is_ready_to_read()
     CHECK_GE(num_pending_reads_, 0);
@@ -71,7 +71,7 @@ inline void ThreadedVar::AppendReadDependency(OprBlock* opr_block) {
 
 inline void ThreadedVar::AppendWriteDependency(OprBlock* opr_block) {
   auto&& new_var_block = VersionedVarBlock::New();
-  std::lock_guard<std::mutex> lock{m_};
+  std::lock_guard<std::mutex> lock{mutex_};
   // invariant.
   assert(head_->next == nullptr);
   assert(head_->trigger == nullptr);
@@ -102,7 +102,7 @@ inline void ThreadedVar::CompleteReadDependency(Dispatcher dispatcher) {
   OprBlock *trigger = nullptr;
   {
     // this is lock scope
-    std::lock_guard<std::mutex> lock{m_};
+    std::lock_guard<std::mutex> lock{mutex_};
     CHECK_GT(num_pending_reads_, 0);
 
     if (--num_pending_reads_ == 0) {
@@ -124,7 +124,7 @@ inline bool ThreadedVar::CompleteWriteDependency(Dispatcher dispatcher) {
   VersionedVarBlock *old_pending_write, *end_of_read_chain;
   OprBlock* trigger_write = nullptr;
   {
-    std::lock_guard<std::mutex> lock{m_};
+    std::lock_guard<std::mutex> lock{mutex_};
     // invariants
     assert(head_->next == nullptr);
     assert(pending_write_ != nullptr);
@@ -187,12 +187,12 @@ inline bool ThreadedVar::CompleteWriteDependency(Dispatcher dispatcher) {
 }
 
 inline void ThreadedVar::SetToDelete() {
-  std::lock_guard<std::mutex> lock{m_};
+  std::lock_guard<std::mutex> lock{mutex_};
   to_delete_ = true;
 }
 
 inline bool ThreadedVar::ready_to_read() {
-  std::lock_guard<std::mutex> lock{m_};
+  std::lock_guard<std::mutex> lock{mutex_};
   return this->is_ready_to_read();
 }
 

diff --git a/src/engine/threaded_engine.h b/src/engine/threaded_engine.h
@@ -85,8 +85,8 @@ struct OprBlock : public common::ObjectPoolAllocatable<OprBlock> {
    * \return the wait counter after the decreasement.
    */
   inline int decr_wait() {
-    // chack invariant, avoid over trigger
-    int ret = --wait;
+    // check invariant, avoid over trigger
+    int const ret = --wait;
     CHECK_GE(ret, 0);
     return ret;
   }
@@ -112,8 +112,8 @@ struct VersionedVarBlock
  * \brief Variable implementation.
  *  Each ThreadedVar is a linked list(queue) of operations to be performed.
  */
-class ThreadedVar final : public Var,
-                          public common::ObjectPoolAllocatable<ThreadedVar> {
+class ThreadedVar final
+    : public Var, public common::ObjectPoolAllocatable<ThreadedVar> {
  public:
   /*!
    * \brief constructor
@@ -180,7 +180,7 @@ class ThreadedVar final : public Var,
   // TODO(hotpxl) change this to spinlock for faster runtime
   // TODO(hotpxl) consider rename head
   /*! \brief inetrnal mutex of the ThreadedVar */
-  std::mutex m_;
+  std::mutex mutex_;
   /*!
    * \brief number of pending reads operation in the variable.
    *  will be marked as -1 when there is a already triggered pending write.
@@ -446,7 +446,7 @@ class ThreadedEngine : public Engine {
     if (!bulk_status.count) return;
     bulk_status.count = 0;
     DeduplicateVarHandle(&bulk_status.const_vars, &bulk_status.mutable_vars);
-    auto fn = std::move(bulk_status.fn);
+    SyncFn fn = std::move(bulk_status.fn);
     this->PushAsync([fn](RunContext ctx, CallbackOnComplete on_complete) {
         fn(ctx);
         on_complete();

diff --git a/src/engine/threaded_engine_perdevice.cc b/src/engine/threaded_engine_perdevice.cc
@@ -226,7 +226,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
   inline void GPUWorker(Context ctx,
                         bool is_copy_worker,
                         ThreadWorkerBlock<type> *block,
-                        std::shared_ptr<ThreadPool::SimpleEvent> ready_event) {
+                        std::shared_ptr<ThreadPool::SimpleEvent> const& ready_event) {
     this->is_worker_ = true;
 #if MXNET_USE_CUDA
     mshadow::Stream<gpu> *stream;