Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds infra to use nvidia dependencies from pypi and cleans up patches #1248

Merged
merged 8 commits into from
Jan 3, 2023
24 changes: 22 additions & 2 deletions common/install_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ function install_116 {
}

function install_117 {
echo "Installing CUDA 11.7 and CuDNN 8.5"
echo "Installing CUDA 11.7 and CuDNN 8.5 and NCCL 2.14"
rm -rf /usr/local/cuda-11.7 /usr/local/cuda
# install CUDA 11.7.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda_11.7.0_515.43.04_linux.run
Expand All @@ -42,10 +42,20 @@ function install_117 {
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.14/nccl_2.14.3-1+cuda11.7_x86_64.txz
tar xf nccl_2.14.3-1+cuda11.7_x86_64.txz
cp -a nccl_2.14.3-1+cuda11.7_x86_64/include/* /usr/local/cuda/include/
cp -a nccl_2.14.3-1+cuda11.7_x86_64/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_nccl
ldconfig
}

function install_118 {
echo "Installing CUDA 11.8 and cuDNN 8.5"
echo "Installing CUDA 11.8 and cuDNN 8.5 and NCCL 2.15"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
Expand All @@ -63,6 +73,16 @@ function install_118 {
cd ..
rm -rf tmp_cudnn
ldconfig

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
mkdir tmp_nccl && cd tmp_nccl
wget -q https://developer.download.nvidia.com/compute/redist/nccl/v2.15.5/nccl_2.15.5-1+cuda11.8_x86_64.txz
tar xf nccl_2.15.5-1+cuda11.8_x86_64.txz
cp -a nccl_2.15.5-1+cuda11.8_x86_64/include/* /usr/local/cuda/include/
cp -a nccl_2.15.5-1+cuda11.8_x86_64/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_nccl
ldconfig
}

function prune_116 {
Expand Down
104 changes: 40 additions & 64 deletions manywheel/build_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,76 +142,14 @@ DEPS_SONAME=(
"libcublasLt.so.11"
"libgomp.so.1"
)
elif [[ $CUDA_VERSION == "11.7" ]]; then
elif [[ $CUDA_VERSION == "11.7" || $CUDA_VERSION == "11.8" ]]; then
export USE_STATIC_CUDNN=0
# Try parallelizing nvcc as well
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
DEPS_LIST=(
"/usr/local/cuda/lib64/libcudart.so.11.0"
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.7, it links to 11.7.50
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
"$LIBGOMP_PATH"
)
DEPS_SONAME=(
"libcudart.so.11.0"
"libnvToolsExt.so.1"
"libnvrtc.so.11.2"
"libnvrtc-builtins.so.11.7"
"libgomp.so.1"
)

if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
echo "Bundling with cudnn and cublas."
DEPS_LIST+=(
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8"
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8"
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8"
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8"
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8"
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8"
"/usr/local/cuda/lib64/libcudnn.so.8"
"/usr/local/cuda/lib64/libcublas.so.11"
"/usr/local/cuda/lib64/libcublasLt.so.11"
)
DEPS_SONAME+=(
"libcudnn_adv_infer.so.8"
"libcudnn_adv_train.so.8"
"libcudnn_cnn_infer.so.8"
"libcudnn_cnn_train.so.8"
"libcudnn_ops_infer.so.8"
"libcudnn_ops_train.so.8"
"libcudnn.so.8"
"libcublas.so.11"
"libcublasLt.so.11"
)
else
echo "Using cudnn and cublas from pypi."
CUDA_RPATHS=(
'$ORIGIN/../../nvidia/cublas/lib'
'$ORIGIN/../../nvidia/cudnn/lib'
)
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
export FORCE_RPATH="--force-rpath"
fi
elif [[ $CUDA_VERSION == "11.8" ]]; then
export USE_STATIC_CUDNN=0
# Try parallelizing nvcc as well
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
DEPS_LIST=(
"/usr/local/cuda/lib64/libcudart.so.11.0"
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake for 11.8, it links to 11.8.89
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
"$LIBGOMP_PATH"
)
DEPS_SONAME=(
"libcudart.so.11.0"
"libnvToolsExt.so.1"
"libnvrtc.so.11.2"
"libnvrtc-builtins.so.11.8"
"libgomp.so.1"
)

Expand All @@ -227,6 +165,9 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
"/usr/local/cuda/lib64/libcudnn.so.8"
"/usr/local/cuda/lib64/libcublas.so.11"
"/usr/local/cuda/lib64/libcublasLt.so.11"
"/usr/local/cuda/lib64/libcudart.so.11.0"
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake, it links to more specific cuda version
)
DEPS_SONAME+=(
"libcudnn_adv_infer.so.8"
Expand All @@ -238,17 +179,52 @@ elif [[ $CUDA_VERSION == "11.8" ]]; then
"libcudnn.so.8"
"libcublas.so.11"
"libcublasLt.so.11"
"libcudart.so.11.0"
"libnvToolsExt.so.1"
"libnvrtc.so.11.2"
)
if [[ $CUDA_VERSION == "11.7" ]]; then
DEPS_LIST+=(
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.7"
)
DEPS_SONAME+=(
"libnvrtc-builtins.so.11.7"
)
fi
if [[ $CUDA_VERSION == "11.8" ]]; then
DEPS_LIST+=(
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
)
DEPS_SONAME+=(
"libnvrtc-builtins.so.11.8"
)
fi
else
echo "Using cudnn and cublas from pypi."
echo "Using nvidia libs from pypi."
CUDA_RPATHS=(
'$ORIGIN/../../nvidia/cublas/lib'
'$ORIGIN/../../nvidia/cuda_cupti/lib'
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
'$ORIGIN/../../nvidia/cuda_runtime/lib'
'$ORIGIN/../../nvidia/cudnn/lib'
'$ORIGIN/../../nvidia/cufft/lib'
'$ORIGIN/../../nvidia/curand/lib'
'$ORIGIN/../../nvidia/cusolver/lib'
'$ORIGIN/../../nvidia/cusparse/lib'
'$ORIGIN/../../nvidia/nccl/lib'
'$ORIGIN/../../nvidia/nvtx/lib'
)
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
export FORCE_RPATH="--force-rpath"
export USE_STATIC_NCCL=0
export USE_SYSTEM_NCCL=1
export ATEN_STATIC_CUDA=0
export USE_CUDA_STATIC_LINK=0
export USE_CUPTI_SO=1
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
fi
else
echo "Unknown cuda version $CUDA_VERSION"
Expand Down
14 changes: 0 additions & 14 deletions release/pypi/prep_binary_for_pypi.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,22 +56,8 @@ for whl_file in "$@"; do
if [[ $whl_file == *"with.pypi.cudnn"* ]]; then
rm -rf "${whl_dir}/caffe2"
rm -rf "${whl_dir}"/torch/lib/libnvrtc*
sed -i -e "s/Requires-Dist: nvidia-cuda-runtime-cu11/Requires-Dist: nvidia-cuda-runtime-cu11 (==11.7.99)/" "${whl_dir}"/*/METADATA
sed -i -e "/^Requires-Dist: nvidia-cublas-cu11 (==11.10.3.66).*/a Requires-Dist: nvidia-cuda-nvrtc-cu11 (==11.7.99) ; platform_system == \"Linux\"" "${whl_dir}"/*/METADATA

sed -i -e "s/-with-pypi-cudnn//g" "${whl_dir}/torch/version.py"
find "${whl_dir}/torch/" -maxdepth 1 -type f -name "*.so*" | while read sofile; do
patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN:$ORIGIN/lib' \
--force-rpath $sofile
patchelf --print-rpath $sofile
done

find "${whl_dir}/torch/lib" -maxdepth 1 -type f -name "*.so*" | while read sofile; do
patchelf --set-rpath '$ORIGIN/../../nvidia/cublas/lib:$ORIGIN/../../nvidia/cudnn/lib:$ORIGIN/../../nvidia/cuda_nvrtc/lib:$ORIGIN' \
--force-rpath $sofile
patchelf --print-rpath $sofile
done
patchelf --replace-needed libnvrtc-d833c4f3.so.11.2 libnvrtc.so.11.2 "${whl_dir}/torch/lib/libcaffe2_nvrtc.so"
fi

find "${dist_info_folder}" -type f -exec sed -i "s!${version_with_suffix}!${version_no_suffix}!" {} \;
Expand Down