add result check and add parameter hhm cost stat (PaddlePaddle#66)

add result check and add parameter hhm cost stat
danleifeng · Jul 13, 2022 · 22e1835 · 22e1835
1 parent 988aa30
commit 22e1835
Show file tree

Hide file tree

Showing 3 changed files with 12 additions and 8 deletions.
diff --git a/paddle/fluid/framework/fleet/heter_ps/hashtable_kernel.cu b/paddle/fluid/framework/fleet/heter_ps/hashtable_kernel.cu
@@ -156,9 +156,9 @@ __global__ void dy_mf_update_kernel(Table* table,
 template <typename KeyType, typename ValType>
 HashTable<KeyType, ValType>::HashTable(size_t capacity) {
   container_ = new TableContainer<KeyType, ValType>(capacity);
-  cudaMalloc((void**)&device_optimizer_config_, sizeof(OptimizerConfig));
-  cudaMemcpy((void*)device_optimizer_config_, &host_optimizer_config_,
-             sizeof(OptimizerConfig), cudaMemcpyHostToDevice);
+  CUDA_RT_CALL(cudaMalloc((void**)&device_optimizer_config_, sizeof(OptimizerConfig)));
+  CUDA_RT_CALL(cudaMemcpy((void*)device_optimizer_config_, &host_optimizer_config_,
+             sizeof(OptimizerConfig), cudaMemcpyHostToDevice));
   rwlock_.reset(new phi::RWLock);
 }
 

diff --git a/paddle/fluid/framework/fleet/heter_ps/mem_pool.h b/paddle/fluid/framework/fleet/heter_ps/mem_pool.h
@@ -19,6 +19,7 @@ limitations under the License. */
 // "paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h"
 #include <iostream>
 #ifdef PADDLE_WITH_CUDA
+#include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h"
 #include "paddle/fluid/framework/fleet/heter_ps/cudf/managed.cuh"
 
 namespace paddle {
@@ -60,9 +61,9 @@ class HBMMemoryPool : public managed {
     block_size_ = mem_pool->block_size();
     VLOG(3) << "hbm memory pool with capacity" << capacity_
             << " bs: " << block_size_;
-    cudaMalloc(&mem_, block_size_ * capacity_);
-    cudaMemcpy(mem_, mem_pool->mem(), mem_pool->byte_size(),
-               cudaMemcpyHostToDevice);
+    CUDA_CHECK(cudaMalloc(&mem_, block_size_ * capacity_));
+    CUDA_CHECK(cudaMemcpy(mem_, mem_pool->mem(), mem_pool->byte_size(),
+               cudaMemcpyHostToDevice));
   }
 
   ~HBMMemoryPool() {
@@ -78,8 +79,8 @@ class HBMMemoryPool : public managed {
     cudaFree(mem_);
     mem_ = NULL;
     capacity_ = capacity;
-    cudaMalloc(&mem_, (block_size_ * capacity / 8 + 1) * 8);
-    cudaMemset(mem_, 0, block_size_ * capacity);
+    CUDA_CHECK(cudaMalloc(&mem_, (block_size_ * capacity / 8 + 1) * 8));
+    CUDA_CHECK(cudaMemset(mem_, 0, block_size_ * capacity));
   }
 
   char* mem() { return mem_; }

diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cc
@@ -28,6 +28,7 @@ limitations under the License. */
 #ifdef PADDLE_WITH_HETERPS
 
 #include "paddle/fluid/framework/fleet/ps_gpu_wrapper.h"
+#include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_utils.h"
 
 #include <algorithm>
 #include <deque>
@@ -821,7 +822,9 @@ void PSGPUWrapper::BeginPass() {
         platform::errors::Fatal("[BeginPass] current task is not ended."));
   }
 
+  debug_gpu_memory_info("befor build task");
   build_task();
+  debug_gpu_memory_info("after build task");
   timer.Pause();
 
   if (current_task_ == nullptr) {