From e1453a4913027020519b2cca9da18476a6aded17 Mon Sep 17 00:00:00 2001 From: ptrblck Date: Fri, 20 Jan 2023 06:46:30 -0800 Subject: [PATCH] add sm_90 to CUDA11.8 builds (#1263) * add sm_90 to CUDA11.8 builds * Manually invoke bash for Miniconda --- common/install_conda.sh | 3 ++- common/install_cuda.sh | 4 ++-- conda/pytorch-nightly/bld.bat | 7 ++----- conda/pytorch-nightly/build.sh | 2 +- magma/Makefile | 6 ++---- manywheel/build_cuda.sh | 8 ++++---- windows/cuda118.bat | 4 ++-- 7 files changed, 15 insertions(+), 19 deletions(-) diff --git a/common/install_conda.sh b/common/install_conda.sh index 1027115c0..c7967a615 100644 --- a/common/install_conda.sh +++ b/common/install_conda.sh @@ -5,7 +5,8 @@ set -ex # Anaconda wget -q https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh chmod +x Miniconda3-latest-Linux-x86_64.sh -./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda +# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431 +bash ./Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda rm Miniconda3-latest-Linux-x86_64.sh export PATH=/opt/conda/bin:$PATH conda install -y conda-build anaconda-client git ninja diff --git a/common/install_cuda.sh b/common/install_cuda.sh index c9b9c9d1a..71cde2091 100644 --- a/common/install_cuda.sh +++ b/common/install_cuda.sh @@ -155,8 +155,8 @@ function prune_118 { export NVPRUNE="/usr/local/cuda-11.8/bin/nvprune" export CUDA_LIB_DIR="/usr/local/cuda-11.8/lib64" - export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" - export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86" + export GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" if [[ -n "$OVERRIDE_GENCODE" ]]; then export GENCODE=$OVERRIDE_GENCODE diff --git a/conda/pytorch-nightly/bld.bat b/conda/pytorch-nightly/bld.bat index e7c983400..18850f758 100644 --- a/conda/pytorch-nightly/bld.bat +++ b/conda/pytorch-nightly/bld.bat @@ -20,21 +20,18 @@ if "%build_with_cuda%" == "" goto cuda_flags_end set CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%desired_cuda% set CUDA_BIN_PATH=%CUDA_PATH%\bin set TORCH_NVCC_FLAGS=-Xfatbin -compress-all -set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0 +set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6 if "%desired_cuda%" == "11.5" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) if "%desired_cuda%" == "11.6" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) if "%desired_cuda%" == "11.7" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) if "%desired_cuda%" == "11.8" ( - set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;6.0;6.1;7.0;7.5;8.0;8.6 + set TORCH_CUDA_ARCH_LIST=%TORCH_CUDA_ARCH_LIST%;9.0 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all --threads 2 ) diff --git a/conda/pytorch-nightly/build.sh b/conda/pytorch-nightly/build.sh index f61f53f73..3a3139fd1 100755 --- a/conda/pytorch-nightly/build.sh +++ b/conda/pytorch-nightly/build.sh @@ -70,7 +70,7 @@ if [[ -n "$build_with_cuda" ]]; then #for cuda 11.7 include all dynamic loading libraries DEPS_LIST=(/usr/local/cuda/lib64/libcudnn*.so.8 /usr/local/cuda-11.7/extras/CUPTI/lib64/libcupti.so.11.7) elif [[ $CUDA_VERSION == 11.8* ]]; then - export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6" + export TORCH_CUDA_ARCH_LIST="$TORCH_CUDA_ARCH_LIST;6.0;6.1;7.0;7.5;8.0;8.6;9.0" #for cuda 11.8 we use cudnn 8.5 #which does not have single static libcudnn_static.a deliverable to link with export USE_STATIC_CUDNN=0 diff --git a/magma/Makefile b/magma/Makefile index ec12e267b..4a90a43e2 100644 --- a/magma/Makefile +++ b/magma/Makefile @@ -2,7 +2,7 @@ SHELL=/usr/bin/env bash DESIRED_CUDA ?= 11.6 PACKAGE_NAME ?= magma-cuda116 -CUDA_ARCH_LIST ?= -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 +CUDA_ARCH_LIST ?= -gencode arch=compute_37,code=sm_37 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 DOCKER_RUN = set -eou pipefail; docker run --rm -i \ -v $(shell git rev-parse --show-toplevel):/builder \ @@ -26,20 +26,18 @@ clean: .PHONY: magma-cuda118 magma-cuda118: DESIRED_CUDA := 11.8 magma-cuda118: PACKAGE_NAME := magma-cuda118 -magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 +magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_90,code=sm_90 magma-cuda118: $(DOCKER_RUN) .PHONY: magma-cuda117 magma-cuda117: DESIRED_CUDA := 11.7 magma-cuda117: PACKAGE_NAME := magma-cuda117 -magma-cuda117: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 magma-cuda117: $(DOCKER_RUN) .PHONY: magma-cuda116 magma-cuda116: DESIRED_CUDA := 11.6 magma-cuda116: PACKAGE_NAME := magma-cuda116 -magma-cuda116: CUDA_ARCH_LIST += -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 magma-cuda116: $(DOCKER_RUN) diff --git a/manywheel/build_cuda.sh b/manywheel/build_cuda.sh index 3a69d9838..bd04cdd60 100644 --- a/manywheel/build_cuda.sh +++ b/manywheel/build_cuda.sh @@ -58,12 +58,12 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') TORCH_CUDA_ARCH_LIST="3.7;5.0;6.0;7.0" case ${CUDA_VERSION} in - 11.[678]) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6" + 11.8) + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6;9.0" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; - 10.*) - TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}" + 11.[67]) + TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};7.5;8.0;8.6" EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") ;; *) diff --git a/windows/cuda118.bat b/windows/cuda118.bat index 697b511b9..02d91adc3 100644 --- a/windows/cuda118.bat +++ b/windows/cuda118.bat @@ -37,10 +37,10 @@ IF "%CUDA_PATH_V118%"=="" ( ) IF "%BUILD_VISION%" == "" ( - set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6 + set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0 set TORCH_NVCC_FLAGS=-Xfatbin -compress-all ) ELSE ( - set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 + set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 ) set "CUDA_PATH=%CUDA_PATH_V118%"