Revert 'Optimize the error messages of paddle CUDA API (PaddlePaddle#…

…23816)',test=develop
zhwesky2010 · Apr 21, 2020 · 6f59331 · 6f59331
1 parent 89cf4f9
commit 6f59331
Show file tree

Hide file tree

Showing 30 changed files with 496 additions and 645 deletions.
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
@@ -135,12 +135,6 @@ copy(inference_lib_dist
         SRCS ${THREADPOOL_INCLUDE_DIR}/ThreadPool.h
         DSTS ${dst_dir})
 
-set(dst_dir "${FLUID_INFERENCE_INSTALL_DIR}/third_party/cudaerror/data")
-copy(inference_lib_dist
-        SRCS ${cudaerror_INCLUDE_DIR}
-        DSTS ${dst_dir})
-
-# CMakeCache Info
 copy(inference_lib_dist
         SRCS ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
         DSTS ${FLUID_INFERENCE_INSTALL_DIR})
@@ -190,7 +184,7 @@ copy(fluid_lib_dist
         )
 
 set(module "framework")
-set(framework_lib_deps framework_proto data_feed_proto trainer_desc_proto)
+set(framework_lib_deps framework_proto)
 add_dependencies(fluid_lib_dist ${framework_lib_deps})
 copy(fluid_lib_dist
         SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/trainer_desc.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/data_feed.pb.h ${src_dir}/${module}/ir/memory_optimize_pass/*.h
@@ -210,11 +204,11 @@ copy(fluid_lib_dist
         )
 
 set(module "platform")
-set(platform_lib_deps profiler_proto error_codes_proto cuda_error_proto)
+set(platform_lib_deps profiler_proto)
 add_dependencies(fluid_lib_dist ${platform_lib_deps})
 copy(fluid_lib_dist
-        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/platform/*.pb.h
-        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details ${dst_dir}/${module}
+        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/platform/profiler.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/platform/error_codes.pb.h
+        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}
         )
 
 set(module "string")
@@ -255,7 +249,6 @@ copy(inference_lib_dist
         SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
         DSTS ${dst_dir} ${dst_dir}/lib)
 
-
 # CMakeCache Info
 copy(fluid_lib_dist
         SRCS ${FLUID_INFERENCE_INSTALL_DIR}/third_party ${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt

diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-include(ExternalProject)
 # Creat a target named "third_party", which can compile external dependencies on all platform(windows/linux/mac)
 
 set(THIRD_PARTY_PATH  "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
@@ -22,7 +21,6 @@ set(THIRD_PARTY_CACHE_PATH     "${CMAKE_SOURCE_DIR}"    CACHE STRING
     "A path cache third party source code to avoid repeated download.")
 
 set(THIRD_PARTY_BUILD_TYPE Release)
-set(third_party_deps)
 
 # cache funciton to avoid repeat download code of third_party.
 # This function has 4 parameters, URL / REPOSITOR / TAG / DIR:
@@ -102,32 +100,6 @@ MACRO(UNSET_VAR VAR_NAME)
     UNSET(${VAR_NAME})
 ENDMACRO()
 
-# Funciton to Download the dependencies during compilation
-# This function has 2 parameters, URL / DIRNAME:
-# 1. URL:           The download url of 3rd dependencies
-# 2. NAME:          The name of file, that determin the dirname
-#
-MACRO(file_download_and_uncompress URL NAME)
-  MESSAGE(STATUS "Download dependence[${NAME}] from ${URL}")
-  SET(EXTERNAL_PROJECT_NAME "extern_download_${NAME}")
-  SET(${NAME}_INCLUDE_DIR ${THIRD_PARTY_PATH}/${NAME}/data)
-  ExternalProject_Add(
-      ${EXTERNAL_PROJECT_NAME}
-      ${EXTERNAL_PROJECT_LOG_ARGS}
-      PREFIX                ${THIRD_PARTY_PATH}/${NAME}
-      URL                   ${URL}
-      DOWNLOAD_DIR          ${THIRD_PARTY_PATH}/${NAME}/data/
-      SOURCE_DIR            ${THIRD_PARTY_PATH}/${NAME}/data/
-      DOWNLOAD_NO_PROGRESS  1
-      CONFIGURE_COMMAND     ""
-      BUILD_COMMAND         ""
-      UPDATE_COMMAND        ""
-      INSTALL_COMMAND       ""
-    )
-  list(APPEND third_party_deps ${EXTERNAL_PROJECT_NAME})
-ENDMACRO()
-
-
 # Correction of flags on different Platform(WIN/MAC) and Print Warning Message
 if (APPLE)
     if(WITH_MKL)
@@ -206,13 +178,10 @@ include(external/dlpack)    # download dlpack
 include(external/xxhash)    # download, build, install xxhash
 include(external/warpctc)   # download, build, install warpctc
 
+set(third_party_deps)
 list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog extern_boost extern_xxhash)
 list(APPEND third_party_deps extern_zlib extern_dlpack extern_warpctc extern_threadpool)
 
-# download file
-set(CUDAERROR_URL  "https://paddlepaddledeps.bj.bcebos.com/cudaErrorMessage.tar.gz" CACHE STRING "" FORCE)
-file_download_and_uncompress(${CUDAERROR_URL} "cudaerror")
-
 if(WITH_AMD_GPU)
     include(external/rocprim)   # download, build, install rocprim
     list(APPEND third_party_deps extern_rocprim)
@@ -305,4 +274,4 @@ if (WITH_LITE)
     include(external/lite)
 endif (WITH_LITE)
 
-add_custom_target(third_party ALL DEPENDS ${third_party_deps})
+add_custom_target(third_party DEPENDS ${third_party_deps})
diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu
@@ -152,7 +152,9 @@ void TensorCheckerVisitor<platform::CUDADeviceContext>::apply(
 
       PADDLE_ENFORCE_CUDA_SUCCESS(
           cudaMemcpyAsync(gpu_str_ptr, iter->first.c_str(), op_var.length() + 1,
-                          cudaMemcpyHostToDevice, dev_ctx->stream()));
+                          cudaMemcpyHostToDevice, dev_ctx->stream()),
+          platform::errors::External(
+              "Async cudaMemcpy op_var info to gpu failed."));
     } else {  // get
       auto iter = op_var2gpu_str.find(op_var);
       PADDLE_ENFORCE_EQ(iter != op_var2gpu_str.end(), true,

diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu
@@ -124,9 +124,12 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs,
   float const* input_ptr = reinterpret_cast<float const*>(inputs[0]);
   float* const* h_odatas = reinterpret_cast<float* const*>(outputs);
   float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs_[0]);
-  PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
-      output_ptrs, h_odatas, d_output_ptrs_.size() * sizeof(float*),
-      cudaMemcpyHostToDevice, stream));
+  PADDLE_ENFORCE_CUDA_SUCCESS(
+      cudaMemcpyAsync(output_ptrs, h_odatas,
+                      d_output_ptrs_.size() * sizeof(float*),
+                      cudaMemcpyHostToDevice, stream),
+      platform::errors::External(
+          "CUDA Memcpy failed during split plugin run."));
 
   int outer_rows = outer_rows_ * batchSize;
 
@@ -241,9 +244,12 @@ int SplitPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
     float* const* h_odatas = reinterpret_cast<float* const*>(outputs);
     float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs[0]);
 
-    PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
-        output_ptrs, h_odatas, d_output_ptrs.size() * sizeof(float*),
-        cudaMemcpyHostToDevice, stream));
+    PADDLE_ENFORCE_CUDA_SUCCESS(
+        cudaMemcpyAsync(output_ptrs, h_odatas,
+                        d_output_ptrs.size() * sizeof(float*),
+                        cudaMemcpyHostToDevice, stream),
+        platform::errors::External(
+            "CUDA Memcpy failed during split plugin run."));
 
     split_kernel<<<grid, block, 0, stream>>>(
         d_segment_offsets.size(), d_segment_offsets_ptr, input_ptr, output_ptrs,
@@ -257,9 +263,12 @@ int SplitPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc,
     half* const* h_odatas = reinterpret_cast<half* const*>(outputs);
     half** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs[0]);
 
-    PADDLE_ENFORCE_CUDA_SUCCESS(cudaMemcpyAsync(
-        output_ptrs, h_odatas, d_output_ptrs.size() * sizeof(half*),
-        cudaMemcpyHostToDevice, stream));
+    PADDLE_ENFORCE_CUDA_SUCCESS(
+        cudaMemcpyAsync(output_ptrs, h_odatas,
+                        d_output_ptrs.size() * sizeof(half*),
+                        cudaMemcpyHostToDevice, stream),
+        platform::errors::External(
+            "CUDA Memcpy failed during split plugin run."));
 
     split_kernel<<<grid, block, 0, stream>>>(
         d_segment_offsets.size(), d_segment_offsets_ptr, input_ptr, output_ptrs,

diff --git a/paddle/fluid/memory/allocation/cuda_device_context_allocator.h b/paddle/fluid/memory/allocation/cuda_device_context_allocator.h
@@ -80,13 +80,17 @@ class CUDADeviceContextAllocator : public Allocator {
       : place_(place), default_stream_(default_stream) {
     platform::CUDADeviceGuard guard(place_.device);
     PADDLE_ENFORCE_CUDA_SUCCESS(
-        cudaEventCreate(&event_, cudaEventDisableTiming));
+        cudaEventCreate(&event_, cudaEventDisableTiming),
+        platform::errors::External(
+            "Create event failed in CUDADeviceContextAllocator"));
   }
 
   ~CUDADeviceContextAllocator() {
     if (event_) {
       platform::CUDADeviceGuard guard(place_.device);
-      PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventDestroy(event_));
+      PADDLE_ENFORCE_CUDA_SUCCESS(
+          cudaEventDestroy(event_),
+          "Destory event failed in CUDADeviceContextAllocator destroctor");
     }
   }
 
@@ -99,9 +103,12 @@ class CUDADeviceContextAllocator : public Allocator {
     auto allocation =
         new CUDADeviceContextAllocation(memory::Alloc(place_, size));
     // Wait for the event on stream
-    PADDLE_ENFORCE_CUDA_SUCCESS(cudaEventRecord(event_, default_stream_));
     PADDLE_ENFORCE_CUDA_SUCCESS(
-        cudaStreamWaitEvent(default_stream_, event_, 0));
+        cudaEventRecord(event_, default_stream_),
+        "Failed to record event in CUDADeviceContextAllocator");
+    PADDLE_ENFORCE_CUDA_SUCCESS(
+        cudaStreamWaitEvent(default_stream_, event_, 0),
+        "Failed to wait event in CUDADeviceContextAllocator");
     return allocation;
   }
 

diff --git a/paddle/fluid/operators/argsort_op.cu b/paddle/fluid/operators/argsort_op.cu
@@ -141,7 +141,12 @@ void ArgFullSort(const platform::CUDADeviceContext& ctx, const Tensor* input,
         num_rows, segment_offsets_t, segment_offsets_t + 1, 0, sizeof(T) * 8,
         cu_stream);
   }
-  PADDLE_ENFORCE_CUDA_SUCCESS(err);
+  PADDLE_ENFORCE_CUDA_SUCCESS(
+      err,
+      "ArgSortOP failed as could not launch "
+      "cub::DeviceSegmentedRadixSort::SortPairsDescending to calculate"
+      "temp_storage_bytes, status:%s.",
+      temp_storage_bytes, cudaGetErrorString(err));
 
   Tensor temp_storage;
   temp_storage.mutable_data<uint8_t>(ctx.GetPlace(), temp_storage_bytes);
@@ -160,7 +165,12 @@ void ArgFullSort(const platform::CUDADeviceContext& ctx, const Tensor* input,
         cu_stream);
   }
 
-  PADDLE_ENFORCE_CUDA_SUCCESS(err);
+  PADDLE_ENFORCE_CUDA_SUCCESS(
+      err,
+      "ArgSortOP failed as could not launch "
+      "cub::DeviceSegmentedRadixSort::SortPairsDescending to sort input, "
+      "temp_storage_bytes:%d status:%s.",
+      temp_storage_bytes, cudaGetErrorString(err));
 }
 
 template <typename T, typename IndType>