From a2450cd44ffa0c9b42a49ef0117eedb78defe8cd Mon Sep 17 00:00:00 2001 From: CaiZhi Date: Fri, 8 Sep 2023 09:54:52 +0800 Subject: [PATCH] [MT484] fix(phi): fic musa version check problem (#56) --- README.md | 5 +-- cmake/generic.cmake | 3 ++ paddle/fluid/pybind/CMakeLists.txt | 2 ++ paddle/phi/backends/gpu/gpu_resources.cc | 40 +++++++++++++++-------- paddle/phi/backends/gpu/musa/musa_info.cc | 8 +++-- python/CMakeLists.txt | 2 ++ test/CMakeLists.txt | 3 ++ 7 files changed, 44 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 764f2ba742660..22cca88438112 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ ```bash apt-get install ccache -pip install -r requirements.txt +pip install -r python/requirements.txt ``` #### Set Important Environment Variables @@ -76,7 +76,8 @@ docker run -it --privileged --name=paddle_musa_dev --env MTHREADS_VISIBLE_DEVICE | Docker Tag | Description | | ---- | --- | -| [**v0.1.4/latest**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.2 (driver2.2.0 develop or newer)
mcc-20230823-daily
mudnn 20230823-daily
mccl_20230823-daily
muAlg_dev-20230823-daily
muRAND_dev1.0.0
muSPARSE_dev0.1.0
muThrust_dev-0.1.1 | +| [**v0.1.6/latest**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-20230903 (driver2.3.0 develop or newer)
mudnn 20230903-daily
mccl_20230903-daily
muAlg_dev-20230903-daily
muRAND_dev1.0.0
muSPARSE_dev0.1.0
muThrust_dev-0.1.1 | +| [**v0.1.4**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.2 (driver2.2.0 develop or newer)
mcc-20230823-daily
mudnn 20230823-daily
mccl_20230823-daily
muAlg_dev-20230823-daily
muRAND_dev1.0.0
muSPARSE_dev0.1.0
muThrust_dev-0.1.1 | | [**v0.1.3**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.0 (ddk_1.4.0 develop or newer)
mcc-20230814-daily
mudnn v1.4.0
mccl_rc1.1.0
muAlg_dev-20230814-daily
muRAND_dev1.0.0
muSPARSE_dev0.1.0
muThrust_dev-0.1.1 | | [**v0.1.2**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.0 (ddk_1.4.0 develop or newer)
mcc-20230814-daily
mudnn v1.4.0
mccl_rc1.1.0
muAlg_dev-20230814-daily
muRAND_dev1.0.0
muSPARSE_dev0.1.0
muThrust_dev-0.1.1 | | [**v0.1.1**](https://sh-harbor.mthreads.com/harbor/projects/20/repositories/musa-paddle-dev/artifacts-tab) | musatoolkits-v1.4.0 (ddk_1.4.0 develop or newer)
mudnn v1.4.0
mccl_rc1.1.0
muAlg_dev-0.1.1
muRAND_dev1.0.0
muSPARSE_dev0.1.0
muThrust_dev-0.1.1 | diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 56c9c0de2f24b..af2c5b608635d 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -457,6 +457,9 @@ function(cc_test_build TARGET_NAME) endif() if(WITH_MUSA) target_link_libraries(${TARGET_NAME} ${MUSARTC_LIB}) + # libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/' + target_link_options(${TARGET_NAME} PRIVATE + -Wl,-rpath,/usr/lib/x86_64-linux-gnu/) endif() check_coverage_opt(${TARGET_NAME} ${cc_test_SRCS}) endif() diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 52511cef4e36b..845f8930952a4 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -296,6 +296,7 @@ if(WITH_PYTHON) add_executable(eager_legacy_op_function_generator eager_legacy_op_function_generator.cc) if(WITH_MUSA) + # libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/' target_link_options(eager_legacy_op_function_generator PRIVATE -Wl,-rpath,/usr/lib/x86_64-linux-gnu/) endif() @@ -304,6 +305,7 @@ if(WITH_PYTHON) if(NOT WIN32) add_executable(kernel_signature_generator kernel_signature_generator.cc) if(WITH_MUSA) + # libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/' target_link_options(kernel_signature_generator PRIVATE -Wl,-rpath,/usr/lib/x86_64-linux-gnu/) endif() diff --git a/paddle/phi/backends/gpu/gpu_resources.cc b/paddle/phi/backends/gpu/gpu_resources.cc index 9a59781c7b276..fc2711f98da3a 100644 --- a/paddle/phi/backends/gpu/gpu_resources.cc +++ b/paddle/phi/backends/gpu/gpu_resources.cc @@ -123,6 +123,17 @@ void InitGpuProperties(Place place, } #endif +#ifdef PADDLE_WITH_MUSA + LOG_FIRST_N(INFO, 1) << "Please NOTE: device: " + << static_cast(place.device) + << ", GPU Compute Capability: " + << *compute_capability / 10 << "." + << *compute_capability % 10 + << ", Driver API Version: " << *driver_version / 10000 + << "." << (*driver_version % 10000) / 100 + << ", Runtime API Version: " << *runtime_version / 10000 + << "." << (*runtime_version % 10000) / 100; +#else // TODO(wilber): glog may be replaced in the future? LOG_FIRST_N(WARNING, 1) << "Please NOTE: device: " << static_cast(place.device) @@ -134,6 +145,7 @@ void InitGpuProperties(Place place, << ", Runtime API Version: " << *runtime_version / 1000 << "." << (*runtime_version % 100) / 10; +#endif #ifdef PADDLE_WITH_HIP size_t miopen_major, miopen_minor, miopen_patch; PADDLE_ENFORCE_GPU_SUCCESS( @@ -156,39 +168,39 @@ void InitGpuProperties(Place place, // TODO(@caizhi): mudnnGetVersion is not supported for MUSA now. // Requests have been submitted to Mudnn. // size_t mudnn_dso_ver = dynload::mudnnGetVersion(); - size_t mudnn_dso_ver = 1100; - LOG_FIRST_N(WARNING, 1) << "device: " << static_cast(place.device) - << ", muDNN Version: " << mudnn_dso_ver / 1000 << "." - << (mudnn_dso_ver % 1000) / 100 << "."; + size_t mudnn_dso_ver = 2300; + LOG_FIRST_N(INFO, 1) << "device: " << static_cast(place.device) + << ", muDNN Version: " << mudnn_dso_ver / 1000 << "." + << (mudnn_dso_ver % 1000) / 100 << "."; // Check MUSA/MUDNN version compatiblity - auto local_musa_version = - (*driver_version / 1000) * 10 + (*driver_version % 100) / 10; - auto compile_musa_version = - (MUSA_VERSION / 1000) * 10 + (MUSA_VERSION % 100) / 10; + auto local_musa_version = *driver_version; + int compile_musa_version = MUSA_VERSION; #if defined(__linux__) PADDLE_ENFORCE_EQ( - (local_musa_version / 10 < compile_musa_version / 10) && + (local_musa_version / 100 < compile_musa_version / 100) && (mudnn_dso_ver / 1000 < MUDNN_VERSION / 1000), false, phi::errors::InvalidArgument( - "The installed Paddle is compiled with MUDA%d/muDNN%d," + "The installed Paddle is compiled with MUSA%d/muDNN%d," "but MUSA/muDNN version in your machine is MUSA%d/muDNN%d. " "which will cause serious incompatible bug. " "Please recompile or reinstall Paddle with compatible MUSA/muDNN " "version.", - compile_musa_version / 10, + compile_musa_version / 10000, MUDNN_VERSION / 1000, - local_musa_version / 10, + local_musa_version / 10000, mudnn_dso_ver / 1000)); #endif if (local_musa_version < compile_musa_version) { LOG_FIRST_N(WARNING, 1) << "WARNING: device: " << static_cast(place.device) << ". The installed Paddle is compiled with MUSA " - << compile_musa_version / 10 << "." << compile_musa_version % 10 + << compile_musa_version / 10000 << "." + << (compile_musa_version % 1000) / 100 << ", but MUSA runtime version in your machine is " - << local_musa_version / 10 << "." << local_musa_version % 10 + << local_musa_version / 10000 << "." + << (local_musa_version % 1000) / 100 << ", which may cause serious incompatible bug. " << "Please recompile or reinstall Paddle with compatible MUSA " "version."; diff --git a/paddle/phi/backends/gpu/musa/musa_info.cc b/paddle/phi/backends/gpu/musa/musa_info.cc index f244601b9d9cc..41a32373d1aec 100644 --- a/paddle/phi/backends/gpu/musa/musa_info.cc +++ b/paddle/phi/backends/gpu/musa/musa_info.cc @@ -34,8 +34,8 @@ int DnnVersion() { if (!dynload::HasCUDNN()) return -1; // TODO(@caizhi): mudnnGetVersion is not supported now. // version info will be returned from mudnnGetVersion later. - const int version_major = 1; - const int version_minor = 1; + const int version_major = 2; + const int version_minor = 3; const int version_patch = 0; return version_major * 1000 + version_minor * 100 + version_patch; } @@ -99,7 +99,7 @@ int GetGPUComputeCapability(int id) { PADDLE_ENFORCE_GPU_SUCCESS(major_error_code); PADDLE_ENFORCE_GPU_SUCCESS(minor_error_code); - return major * 100 + minor; + return major * 10 + minor; } int GetGPURuntimeVersion(int id) { @@ -111,6 +111,7 @@ int GetGPURuntimeVersion(int id) { id, GetGPUDeviceCount())); int runtime_version = 0; + // Note: runtime_version = MAJOR * 10000 + MINOR * 100 + PATCH PADDLE_ENFORCE_GPU_SUCCESS(musaRuntimeGetVersion(&runtime_version)); return runtime_version; } @@ -124,6 +125,7 @@ int GetGPUDriverVersion(int id) { id, GetGPUDeviceCount())); int driver_version = 0; + // Note: driver_version = MAJOR * 10000 + MINOR * 100 + PATCH PADDLE_ENFORCE_GPU_SUCCESS(musaDriverGetVersion(&driver_version)); return driver_version; } diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index cd7dc7e12f2a3..c69c32dc1dca7 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -44,6 +44,8 @@ set(PY_FILES paddle/__init__.py ${UTILS_PY_FILES} ${FLUID_PY_FILES}) if(WITH_GPU) set(PACKAGE_NAME "paddlepaddle-gpu") +elseif(WITH_MUSA) + set(PACKAGE_NAME "paddlepaddle-musa") elseif(WITH_ROCM) set(PACKAGE_NAME "paddlepaddle-rocm") elseif(WITH_XPU) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3856dd11d0cde..ebb6f69d97334 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -191,6 +191,9 @@ if(${len} GREATER_EQUAL 1) endif() if(WITH_MUSA) target_link_libraries(${test_name} ${MUSARTC_LIB}) + # libtinfo.so depended by libmusa.so is located in '/usr/lib/x86_64-linux-gnu/' + target_link_options(${test_name} PRIVATE + -Wl,-rpath,/usr/lib/x86_64-linux-gnu/) endif() if(APPLE) target_link_libraries(