diff --git a/common/install_cuda.sh b/common/install_cuda.sh index de64d43df..c9b9c9d1a 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -24,7 +24,7 @@ function install_116 { } function install_117 { - echo "Installing CUDA 11.7 and CuDNN 8.5" + echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14" rm -rf /usr/local/cuda-11.7 /usr/local/cuda # install CUDA 11.7.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run @@ -42,10 +42,20 @@ function install_117 { cd .. rm -rf tmp_cudnn ldconfig + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz + tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz + cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig } function install_118 { - echo "Installing CUDA 11.8 and cuDNN 8.5" + echo "Installing CUDA 11.8 and cuDNN 8.5 and NCCL 2.15" rm -rf /usr/local/cuda-11.8 /usr/local/cuda # install CUDA 11.8.0 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run @@ -63,6 +73,16 @@ function install_118 { cd .. rm -rf tmp_cudnn ldconfig + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + mkdir tmp_nccl && cd tmp_nccl + wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz + tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz + cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/ + cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_nccl + ldconfig } function prune_116 { diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 2498f7a22..8166114b3 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -142,76 +142,14 @@ DEPS_SONAME=( "libcublasLt.so.11" "libgomp.so.1" ) -elif [[ $CUDA_VERSION == "11.7" ]]; then +elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then export USE_STATIC_CUDNN=0 # Try parallelizing nvcc as well export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" "$LIBGOMP_PATH" ) DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.7" - "libgomp.so.1" - ) - - if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then - echo "Bundling with cudnn and cublas." - DEPS_LIST+=( - "/usr/local/cuda/lib64/libcudnn_adv_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_adv_train.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_cnn_train.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_infer.so.8" - "/usr/local/cuda/lib64/libcudnn_ops_train.so.8" - "/usr/local/cuda/lib64/libcudnn.so.8" - "/usr/local/cuda/lib64/libcublas.so.11" - "/usr/local/cuda/lib64/libcublasLt.so.11" - ) - DEPS_SONAME+=( - "libcudnn_adv_infer.so.8" - "libcudnn_adv_train.so.8" - "libcudnn_cnn_infer.so.8" - "libcudnn_cnn_train.so.8" - "libcudnn_ops_infer.so.8" - "libcudnn_ops_train.so.8" - "libcudnn.so.8" - "libcublas.so.11" - "libcublasLt.so.11" - ) - else - echo "Using cudnn and cublas from pypi." - CUDA_RPATHS=( - '$ORIGIN/../../nvidia/cublas/lib' - '$ORIGIN/../../nvidia/cudnn/lib' - ) - CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") - export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' - export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' - export FORCE_RPATH="--force-rpath" - fi -elif [[ $CUDA_VERSION == "11.8" ]]; then - export USE_STATIC_CUDNN=0 - # Try parallelizing nvcc as well - export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" - DEPS_LIST=( - "/usr/local/cuda/lib64/libcudart.so.11.0" - "/usr/local/cuda/lib64/libnvToolsExt.so.1" - "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.8, it links to 11.8.89 - "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" - "$LIBGOMP_PATH" - ) - DEPS_SONAME=( - "libcudart.so.11.0" - "libnvToolsExt.so.1" - "libnvrtc.so.11.2" - "libnvrtc-builtins.so.11.8" "libgomp.so.1" ) @@ -227,6 +165,9 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then "/usr/local/cuda/lib64/libcudnn.so.8" "/usr/local/cuda/lib64/libcublas.so.11" "/usr/local/cuda/lib64/libcublasLt.so.11" + "/usr/local/cuda/lib64/libcudart.so.11.0" + "/usr/local/cuda/lib64/libnvToolsExt.so.1" + "/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake, it links to more specific cuda version ) DEPS_SONAME+=( "libcudnn_adv_infer.so.8" @@ -238,17 +179,52 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then "libcudnn.so.8" "libcublas.so.11" "libcublasLt.so.11" + "libcudart.so.11.0" + "libnvToolsExt.so.1" + "libnvrtc.so.11.2" ) + if [[ $CUDA_VERSION == "11.7" ]]; then + DEPS_LIST+=( + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7" + ) + DEPS_SONAME+=( + "libnvrtc-builtins.so.11.7" + ) + fi + if [[ $CUDA_VERSION == "11.8" ]]; then + DEPS_LIST+=( + "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" + ) + DEPS_SONAME+=( + "libnvrtc-builtins.so.11.8" + ) + fi else - echo "Using cudnn and cublas from pypi." + echo "Using nvidia libs from pypi." CUDA_RPATHS=( '$ORIGIN/../../nvidia/cublas/lib' + '$ORIGIN/../../nvidia/cuda_cupti/lib' + '$ORIGIN/../../nvidia/cuda_nvrtc/lib' + '$ORIGIN/../../nvidia/cuda_runtime/lib' '$ORIGIN/../../nvidia/cudnn/lib' + '$ORIGIN/../../nvidia/cufft/lib' + '$ORIGIN/../../nvidia/curand/lib' + '$ORIGIN/../../nvidia/cusolver/lib' + '$ORIGIN/../../nvidia/cusparse/lib' + '$ORIGIN/../../nvidia/nccl/lib' + '$ORIGIN/../../nvidia/nvtx/lib' ) CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' export FORCE_RPATH="--force-rpath" + export USE_STATIC_NCCL=0 + export USE_SYSTEM_NCCL=1 + export ATEN_STATIC_CUDA=0 + export USE_CUDA_STATIC_LINK=0 + export USE_CUPTI_SO=1 + export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" + export NCCL_LIB_DIR="/usr/local/cuda/lib64/" fi else echo "Unknown cuda version $CUDA_VERSION" diff --git a/release/pypi/prep_binary_for_pypi.sh b/release/pypi/prep_binary_for_pypi.sh index e3b2b28c6..fdd9bf4a0 100755 --- a/release/pypi/prep_binary_for_pypi.sh +++ b/release/pypi/prep_binary_for_pypi.sh @@ -56,22 +56,8 @@ for whl_file in "$@"; do if [[ $whl_file == *"with.pypi.cudnn"* ]]; then rm -rf "${whl_dir}/caffe2" rm -rf "${whl_dir}"/torch/lib/libnvrtc* - sed -i -e "s/Requires-Dist: nvidia-cuda-runtime-cu11/Requires-Dist: nvidia-cuda-runtime-cu11 (==11.7.99)/" "${whl_dir}"/*/METADATA - sed -i -e "/^Requires-Dist: nvidia-cublas-cu11 (==11.10.3.66).*/a Requires-Dist: nvidia-cuda-nvrtc-cu11 (==11.7.99) ; platform_system == \"Linux\"" "${whl_dir}"/*/METADATA sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py" - find "${whl_dir}/torch/" -maxdepth 1 -type f -name "*.so*" | while read sofile; do - patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN:$ORIGIN/lib' \ - --force-rpath $sofile - patchelf --print-rpath $sofile - done - - find "${whl_dir}/torch/lib" -maxdepth 1 -type f -name "*.so*" | while read sofile; do - patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN' \ - --force-rpath $sofile - patchelf --print-rpath $sofile - done - patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so" fi find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;