diff --git a/conda/Dockerfile b/conda/Dockerfile
index 30bd97f17..dcd785774 100644
--- a/conda/Dockerfile
+++ b/conda/Dockerfile
@@ -65,17 +65,29 @@ RUN wget -q https://developer.nvidia.com/compute/cuda/9.0/Prod/patches/2/cuda_9.
 
 # install CUDA 9.0 CuDNN
 # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.1.2/cudnn-9.0-linux-x64-v7.1.tgz -O && \
-    tar --no-same-owner -xzf cudnn-9.0-linux-x64-v7.1.tgz -C /usr/local && \
-    rm cudnn-9.0-linux-x64-v7.1.tgz && \
+RUN mkdir tmp_cudnn && cd tmp_cudnn && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.4.1.5-1+cuda9.0_amd64.deb && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.4.1.5-1+cuda9.0_amd64.deb && \
+    ar -x libcudnn7-dev_7.4.1.5-1+cuda9.0_amd64.deb && tar -xvf data.tar.xz && \
+    ar -x libcudnn7_7.4.1.5-1+cuda9.0_amd64.deb && tar -xvf data.tar.xz && \
+    mkdir -p cuda/include && mkdir -p cuda/lib64 && \
+    cp -a usr/include/x86_64-linux-gnu/cudnn_v7.h cuda/include/cudnn.h && \
+    cp -a usr/lib/x86_64-linux-gnu/libcudnn* cuda/lib64 && \
+    mv cuda/lib64/libcudnn_static_v7.a cuda/lib64/libcudnn_static.a && \
+    ln -s libcudnn.so.7 cuda/lib64/libcudnn.so && \
+    chmod +x cuda/lib64/*.so && \
+    cp -a cuda/include/* /usr/local/cuda/include/ && \
+    cp -a cuda/lib64/* /usr/local/cuda/lib64/ && \
+    cd .. && \
+    rm -rf tmp_cudnn && \
     ldconfig
 
 # NCCL2 license: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html
-RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.2.13-1%2Bcuda9.0_x86_64.txz && \
-    tar --no-same-owner -xvf nccl_2.2.13-1+cuda9.0_x86_64.txz && \
-    mv nccl_2.2.13-1+cuda9.0_x86_64/include/* /usr/local/cuda/include/ && \
-    cp -P nccl_2.2.13-1+cuda9.0_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
-    rm -rf nccl_2.2.13-1+cuda9.0_x86_64* && \
+RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.3.7-1%2Bcuda9.0_x86_64.txz && \
+    tar --no-same-owner -xvf nccl_2.3.7-1+cuda9.0_x86_64.txz && \
+    mv nccl_2.3.7-1+cuda9.0_x86_64/include/* /usr/local/cuda/include/ && \
+    cp -P nccl_2.3.7-1+cuda9.0_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
+    rm -rf nccl_2.3.7-1+cuda9.0_x86_64* && \
     ldconfig
 
 # install CUDA 9.2 in the same container
@@ -86,20 +98,31 @@ RUN wget -q https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers
 
 # install CUDA 9.2 CuDNN
 # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.1.4/cudnn-9.2-linux-x64-v7.1.tgz -O && \
-    tar --no-same-owner -xzf cudnn-9.2-linux-x64-v7.1.tgz -C /usr/local && \
-    rm cudnn-9.2-linux-x64-v7.1.tgz && \
+RUN mkdir tmp_cudnn && cd tmp_cudnn && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.4.1.5-1+cuda9.2_amd64.deb && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.4.1.5-1+cuda9.2_amd64.deb && \
+    ar -x libcudnn7-dev_7.4.1.5-1+cuda9.2_amd64.deb && tar -xvf data.tar.xz && \
+    ar -x libcudnn7_7.4.1.5-1+cuda9.2_amd64.deb && tar -xvf data.tar.xz && \
+    mkdir -p cuda/include && mkdir -p cuda/lib64 && \
+    cp -a usr/include/x86_64-linux-gnu/cudnn_v7.h cuda/include/cudnn.h && \
+    cp -a usr/lib/x86_64-linux-gnu/libcudnn* cuda/lib64 && \
+    mv cuda/lib64/libcudnn_static_v7.a cuda/lib64/libcudnn_static.a && \
+    ln -s libcudnn.so.7 cuda/lib64/libcudnn.so && \
+    chmod +x cuda/lib64/*.so && \
+    cp -a cuda/include/* /usr/local/cuda/include/ && \
+    cp -a cuda/lib64/* /usr/local/cuda/lib64/ && \
+    cd .. && \
+    rm -rf tmp_cudnn && \
     ldconfig
 
 # NCCL2 license: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html
-RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.2.13-1%2Bcuda9.2_x86_64.txz && \
-    tar --no-same-owner -xvf nccl_2.2.13-1+cuda9.2_x86_64.txz && \
-    mv nccl_2.2.13-1+cuda9.2_x86_64/include/* /usr/local/cuda/include/ && \
-    cp -P nccl_2.2.13-1+cuda9.2_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
-    rm -rf nccl_2.2.13-1+cuda9.2_x86_64* && \
+RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.3.7-1%2Bcuda9.2_x86_64.txz && \
+    tar --no-same-owner -xvf nccl_2.3.7-1+cuda9.2_x86_64.txz && \
+    mv nccl_2.3.7-1+cuda9.2_x86_64/include/* /usr/local/cuda/include/ && \
+    cp -P nccl_2.3.7-1+cuda9.2_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
+    rm -rf nccl_2.3.7-1+cuda9.2_x86_64* && \
     ldconfig
 
-
 # prune static libs
 ARG GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61"
 ARG CUDA_ROOT="/usr/local/cuda-8.0"
@@ -111,16 +134,14 @@ RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep
 ARG GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70"
 ARG CUDA_ROOT="/usr/local/cuda-9.0"
 ARG CUDA_LIB_DIR="/usr/local/cuda-9.0/lib64"
-RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | \
-    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" && \
-    $CUDA_ROOT/bin/nvprune $GENCODE -gencode code=compute_30 $CUDA_LIB_DIR/libcudnn_static.a -o $CUDA_LIB_DIR/libcudnn_static.a
+RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | \
+    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
 
 ARG GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70"
 ARG CUDA_ROOT="/usr/local/cuda-9.2"
 ARG CUDA_LIB_DIR="/usr/local/cuda-9.2/lib64"
-RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "cusolver" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | \
-    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" && \
-    $CUDA_ROOT/bin/nvprune $GENCODE -gencode code=compute_30 $CUDA_LIB_DIR/libcudnn_static.a -o $CUDA_LIB_DIR/libcudnn_static.a
+RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "cusolver" | grep -v "culibos" | grep -v "cudart" | \
+    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
 
 
 # Anaconda
diff --git a/conda/magma-cuda90-2.3.0/build.sh b/conda/magma-cuda90-2.4.0/build.sh
similarity index 100%
rename from conda/magma-cuda90-2.3.0/build.sh
rename to conda/magma-cuda90-2.4.0/build.sh
diff --git a/conda/magma-cuda90-2.4.0/cmakelists.patch b/conda/magma-cuda90-2.4.0/cmakelists.patch
new file mode 100644
index 000000000..290403353
--- /dev/null
+++ b/conda/magma-cuda90-2.4.0/cmakelists.patch
@@ -0,0 +1,140 @@
+--- CMakeLists.txt    2016-11-20 20:20:02.000000000 -0500
++++ CMakeLists.txt    2017-07-17 01:10:32.161739159 -040062a63
+62a63
+> set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++ -fno-exceptions")
+82,89c83,90
+< find_package( OpenMP )
+< if ( OPENMP_FOUND )
+< 	message( STATUS "Found OpenMP" )
+< 	message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" )
+< 	message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
+< 	set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
+< 	set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
+< endif()
+---
+> # find_package( OpenMP )
+> # if ( OPENMP_FOUND )
+> # 	message( STATUS "Found OpenMP" )
+> # 	message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" )
+> # 	message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
+> # 	set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
+> # 	set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
+> # endif()
+238a240,246
+> 
+> 	if ( ${GPU_TARGET} MATCHES "All")
+> 	  set( MIN_ARCH 350)
+> 	  SET( NV_SM "-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 ")
+> 	  SET( NV_COMP "")
+> 	endif()
+>       
+244c252,253
+< 	set( CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION}" )
+---
+> 	set( CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS -Xfatbin -compress-all -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION}" )
+> 	MESSAGE(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
+255c264
+< 	find_package( LAPACK )
+---
+> 	# find_package( LAPACK )
+302c311,312
+< set( LIBS tester lapacktest magma )
+---
+> # set( LIBS tester lapacktest magma )
+> set( LIBS magma)
+410a421
+> set_target_properties(magma PROPERTIES POSITION_INDEPENDENT_CODE ON)
+414c425
+< add_library( tester ${libtest_all} )
+---
+> # add_library( tester ${libtest_all} )
+421,436c432,447
+< if ( USE_FORTRAN )
+< 	foreach( f ${liblapacktest_all} )
+< 		if ( ${f} MATCHES "\\.(f|f90)$" )
+< 			list( APPEND liblapacktest_all_f ${f} )
+< 		endif()
+< 	endforeach()
+< 	add_library( lapacktest ${liblapacktest_all_f} )
+< else()
+< 	# alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
+< 	foreach( f ${liblapacktest_all} )
+< 		if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+< 			list( APPEND liblapacktest_all_cpp ${f} )
+< 		endif()
+< 	endforeach()
+< 	add_library( lapacktest ${liblapacktest_all_cpp} )
+< endif()
+---
+> # if ( USE_FORTRAN )
+> # 	foreach( f ${liblapacktest_all} )
+> # 		if ( ${f} MATCHES "\\.(f|f90)$" )
+> # 			list( APPEND liblapacktest_all_f ${f} )
+> # 		endif()
+> # 	endforeach()
+> # 	add_library( lapacktest ${liblapacktest_all_f} )
+> # else()
+> # 	# alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
+> # 	foreach( f ${liblapacktest_all} )
+> # 		if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+> # 			list( APPEND liblapacktest_all_cpp ${f} )
+> # 		endif()
+> # 	endforeach()
+> # 	add_library( lapacktest ${liblapacktest_all_cpp} )
+> # endif()
+456c467
+< 
+---
+> set_target_properties(magma_sparse PROPERTIES POSITION_INDEPENDENT_CODE ON)
+462,464c473,475
+< set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
+< set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
+< set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
+---
+> # set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
+> # set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
+> # set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
+467,478c478,489
+< foreach( f ${testing_all} )
+< 	if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+< 		list( APPEND testing_all_cpp ${f} )
+< 	endif()
+< endforeach()
+< foreach( TEST ${testing_all_cpp} )
+< 	string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
+< 	string( REGEX REPLACE "testing/" "" EXE ${EXE} )
+< 	#message( "${TEST} --> ${EXE}" )
+< 	add_executable( ${EXE} ${TEST} )
+< 	target_link_libraries( ${EXE} ${LIBS} )
+< endforeach()
+---
+> # foreach( f ${testing_all} )
+> # 	if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+> # 		list( APPEND testing_all_cpp ${f} )
+> # 	endif()
+> # endforeach()
+> # foreach( TEST ${testing_all_cpp} )
+> # 	string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
+> # 	string( REGEX REPLACE "testing/" "" EXE ${EXE} )
+> # 	#message( "${TEST} --> ${EXE}" )
+> # 	add_executable( ${EXE} ${TEST} )
+> # 	target_link_libraries( ${EXE} ${LIBS} )
+> # endforeach()
+483,490c494,501
+< set( CMAKE_RUNTIME_OUTPUT_DIRECTORY sparse/testing )
+< foreach( TEST ${sparse_testing_all} )
+< 	string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
+< 	string( REGEX REPLACE "sparse/testing/" "" EXE ${EXE} )
+< 	#message( "${TEST} --> ${EXE}" )
+< 	add_executable( ${EXE} ${TEST} )
+< 	target_link_libraries( ${EXE} ${LIBS_SPARSE} )
+< endforeach()
+---
+> # set( CMAKE_RUNTIME_OUTPUT_DIRECTORY sparse/testing )
+> # foreach( TEST ${sparse_testing_all} )
+> # 	string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
+> # 	string( REGEX REPLACE "sparse/testing/" "" EXE ${EXE} )
+> # 	#message( "${TEST} --> ${EXE}" )
+> # 	add_executable( ${EXE} ${TEST} )
+> # 	target_link_libraries( ${EXE} ${LIBS_SPARSE} )
+> # endforeach()
diff --git a/conda/magma-cuda90-2.4.0/meta.yaml b/conda/magma-cuda90-2.4.0/meta.yaml
new file mode 100644
index 000000000..3b271c53e
--- /dev/null
+++ b/conda/magma-cuda90-2.4.0/meta.yaml
@@ -0,0 +1,17 @@
+package:
+  name: magma-cuda90
+  version: 2.4.0
+
+source:
+   url: http://icl.cs.utk.edu/projectsfiles/magma/downloads/magma-2.4.0.tar.gz
+   patches:
+     - cmakelists.patch
+     - thread_queue.patch
+
+build:
+  number: 1
+
+about:
+  home: http://icl.cs.utk.edu/magma/software/index.html
+  license: BSD
+  license_file: COPYRIGHT
diff --git a/conda/magma-cuda80-2.3.0/run_test.sh b/conda/magma-cuda90-2.4.0/run_test.sh
similarity index 100%
rename from conda/magma-cuda80-2.3.0/run_test.sh
rename to conda/magma-cuda90-2.4.0/run_test.sh
diff --git a/conda/magma-cuda80-2.3.0/thread_queue.patch b/conda/magma-cuda90-2.4.0/thread_queue.patch
similarity index 100%
rename from conda/magma-cuda80-2.3.0/thread_queue.patch
rename to conda/magma-cuda90-2.4.0/thread_queue.patch
diff --git a/conda/magma-cuda92-2.3.0/build.sh b/conda/magma-cuda92-2.4.0/build.sh
similarity index 100%
rename from conda/magma-cuda92-2.3.0/build.sh
rename to conda/magma-cuda92-2.4.0/build.sh
diff --git a/conda/magma-cuda92-2.4.0/cmakelists.patch b/conda/magma-cuda92-2.4.0/cmakelists.patch
new file mode 100644
index 000000000..290403353
--- /dev/null
+++ b/conda/magma-cuda92-2.4.0/cmakelists.patch
@@ -0,0 +1,140 @@
+--- CMakeLists.txt    2016-11-20 20:20:02.000000000 -0500
++++ CMakeLists.txt    2017-07-17 01:10:32.161739159 -040062a63
+62a63
+> set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++ -fno-exceptions")
+82,89c83,90
+< find_package( OpenMP )
+< if ( OPENMP_FOUND )
+< 	message( STATUS "Found OpenMP" )
+< 	message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" )
+< 	message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
+< 	set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
+< 	set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
+< endif()
+---
+> # find_package( OpenMP )
+> # if ( OPENMP_FOUND )
+> # 	message( STATUS "Found OpenMP" )
+> # 	message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" )
+> # 	message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
+> # 	set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
+> # 	set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
+> # endif()
+238a240,246
+> 
+> 	if ( ${GPU_TARGET} MATCHES "All")
+> 	  set( MIN_ARCH 350)
+> 	  SET( NV_SM "-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 ")
+> 	  SET( NV_COMP "")
+> 	endif()
+>       
+244c252,253
+< 	set( CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION}" )
+---
+> 	set( CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS -Xfatbin -compress-all -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION}" )
+> 	MESSAGE(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
+255c264
+< 	find_package( LAPACK )
+---
+> 	# find_package( LAPACK )
+302c311,312
+< set( LIBS tester lapacktest magma )
+---
+> # set( LIBS tester lapacktest magma )
+> set( LIBS magma)
+410a421
+> set_target_properties(magma PROPERTIES POSITION_INDEPENDENT_CODE ON)
+414c425
+< add_library( tester ${libtest_all} )
+---
+> # add_library( tester ${libtest_all} )
+421,436c432,447
+< if ( USE_FORTRAN )
+< 	foreach( f ${liblapacktest_all} )
+< 		if ( ${f} MATCHES "\\.(f|f90)$" )
+< 			list( APPEND liblapacktest_all_f ${f} )
+< 		endif()
+< 	endforeach()
+< 	add_library( lapacktest ${liblapacktest_all_f} )
+< else()
+< 	# alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
+< 	foreach( f ${liblapacktest_all} )
+< 		if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+< 			list( APPEND liblapacktest_all_cpp ${f} )
+< 		endif()
+< 	endforeach()
+< 	add_library( lapacktest ${liblapacktest_all_cpp} )
+< endif()
+---
+> # if ( USE_FORTRAN )
+> # 	foreach( f ${liblapacktest_all} )
+> # 		if ( ${f} MATCHES "\\.(f|f90)$" )
+> # 			list( APPEND liblapacktest_all_f ${f} )
+> # 		endif()
+> # 	endforeach()
+> # 	add_library( lapacktest ${liblapacktest_all_f} )
+> # else()
+> # 	# alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
+> # 	foreach( f ${liblapacktest_all} )
+> # 		if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+> # 			list( APPEND liblapacktest_all_cpp ${f} )
+> # 		endif()
+> # 	endforeach()
+> # 	add_library( lapacktest ${liblapacktest_all_cpp} )
+> # endif()
+456c467
+< 
+---
+> set_target_properties(magma_sparse PROPERTIES POSITION_INDEPENDENT_CODE ON)
+462,464c473,475
+< set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
+< set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
+< set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
+---
+> # set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
+> # set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
+> # set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
+467,478c478,489
+< foreach( f ${testing_all} )
+< 	if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+< 		list( APPEND testing_all_cpp ${f} )
+< 	endif()
+< endforeach()
+< foreach( TEST ${testing_all_cpp} )
+< 	string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
+< 	string( REGEX REPLACE "testing/" "" EXE ${EXE} )
+< 	#message( "${TEST} --> ${EXE}" )
+< 	add_executable( ${EXE} ${TEST} )
+< 	target_link_libraries( ${EXE} ${LIBS} )
+< endforeach()
+---
+> # foreach( f ${testing_all} )
+> # 	if ( ${f} MATCHES "\\.(c|cu|cpp)$" )
+> # 		list( APPEND testing_all_cpp ${f} )
+> # 	endif()
+> # endforeach()
+> # foreach( TEST ${testing_all_cpp} )
+> # 	string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
+> # 	string( REGEX REPLACE "testing/" "" EXE ${EXE} )
+> # 	#message( "${TEST} --> ${EXE}" )
+> # 	add_executable( ${EXE} ${TEST} )
+> # 	target_link_libraries( ${EXE} ${LIBS} )
+> # endforeach()
+483,490c494,501
+< set( CMAKE_RUNTIME_OUTPUT_DIRECTORY sparse/testing )
+< foreach( TEST ${sparse_testing_all} )
+< 	string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
+< 	string( REGEX REPLACE "sparse/testing/" "" EXE ${EXE} )
+< 	#message( "${TEST} --> ${EXE}" )
+< 	add_executable( ${EXE} ${TEST} )
+< 	target_link_libraries( ${EXE} ${LIBS_SPARSE} )
+< endforeach()
+---
+> # set( CMAKE_RUNTIME_OUTPUT_DIRECTORY sparse/testing )
+> # foreach( TEST ${sparse_testing_all} )
+> # 	string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} )
+> # 	string( REGEX REPLACE "sparse/testing/" "" EXE ${EXE} )
+> # 	#message( "${TEST} --> ${EXE}" )
+> # 	add_executable( ${EXE} ${TEST} )
+> # 	target_link_libraries( ${EXE} ${LIBS_SPARSE} )
+> # endforeach()
diff --git a/conda/magma-cuda92-2.4.0/meta.yaml b/conda/magma-cuda92-2.4.0/meta.yaml
new file mode 100644
index 000000000..4d9a26383
--- /dev/null
+++ b/conda/magma-cuda92-2.4.0/meta.yaml
@@ -0,0 +1,17 @@
+package:
+  name: magma-cuda92
+  version: 2.4.0
+
+source:
+   url: http://icl.cs.utk.edu/projectsfiles/magma/downloads/magma-2.4.0.tar.gz
+   patches:
+     - cmakelists.patch
+     - thread_queue.patch
+
+build:
+  number: 1
+
+about:
+  home: http://icl.cs.utk.edu/magma/software/index.html
+  license: BSD
+  license_file: COPYRIGHT
diff --git a/conda/magma-cuda90-2.3.0/run_test.sh b/conda/magma-cuda92-2.4.0/run_test.sh
similarity index 100%
rename from conda/magma-cuda90-2.3.0/run_test.sh
rename to conda/magma-cuda92-2.4.0/run_test.sh
diff --git a/conda/magma-cuda90-2.3.0/thread_queue.patch b/conda/magma-cuda92-2.4.0/thread_queue.patch
similarity index 100%
rename from conda/magma-cuda90-2.3.0/thread_queue.patch
rename to conda/magma-cuda92-2.4.0/thread_queue.patch
diff --git a/conda/magma-cuda80-2.3.0/build.sh b/conda/old/magma-cuda80-2.3.0/build.sh
similarity index 100%
rename from conda/magma-cuda80-2.3.0/build.sh
rename to conda/old/magma-cuda80-2.3.0/build.sh
diff --git a/conda/magma-cuda80-2.3.0/cmakelists.patch b/conda/old/magma-cuda80-2.3.0/cmakelists.patch
similarity index 100%
rename from conda/magma-cuda80-2.3.0/cmakelists.patch
rename to conda/old/magma-cuda80-2.3.0/cmakelists.patch
diff --git a/conda/magma-cuda80-2.3.0/magma_cparict_tools.patch b/conda/old/magma-cuda80-2.3.0/magma_cparict_tools.patch
similarity index 100%
rename from conda/magma-cuda80-2.3.0/magma_cparict_tools.patch
rename to conda/old/magma-cuda80-2.3.0/magma_cparict_tools.patch
diff --git a/conda/magma-cuda80-2.3.0/magma_dparict_tools.patch b/conda/old/magma-cuda80-2.3.0/magma_dparict_tools.patch
similarity index 100%
rename from conda/magma-cuda80-2.3.0/magma_dparict_tools.patch
rename to conda/old/magma-cuda80-2.3.0/magma_dparict_tools.patch
diff --git a/conda/magma-cuda80-2.3.0/magma_sparict_tools.patch b/conda/old/magma-cuda80-2.3.0/magma_sparict_tools.patch
similarity index 100%
rename from conda/magma-cuda80-2.3.0/magma_sparict_tools.patch
rename to conda/old/magma-cuda80-2.3.0/magma_sparict_tools.patch
diff --git a/conda/magma-cuda80-2.3.0/magma_zparict_tools.patch b/conda/old/magma-cuda80-2.3.0/magma_zparict_tools.patch
similarity index 100%
rename from conda/magma-cuda80-2.3.0/magma_zparict_tools.patch
rename to conda/old/magma-cuda80-2.3.0/magma_zparict_tools.patch
diff --git a/conda/magma-cuda80-2.3.0/meta.yaml b/conda/old/magma-cuda80-2.3.0/meta.yaml
similarity index 100%
rename from conda/magma-cuda80-2.3.0/meta.yaml
rename to conda/old/magma-cuda80-2.3.0/meta.yaml
diff --git a/conda/magma-cuda91-2.3.0/run_test.sh b/conda/old/magma-cuda80-2.3.0/run_test.sh
similarity index 100%
rename from conda/magma-cuda91-2.3.0/run_test.sh
rename to conda/old/magma-cuda80-2.3.0/run_test.sh
diff --git a/conda/magma-cuda91-2.3.0/thread_queue.patch b/conda/old/magma-cuda80-2.3.0/thread_queue.patch
similarity index 100%
rename from conda/magma-cuda91-2.3.0/thread_queue.patch
rename to conda/old/magma-cuda80-2.3.0/thread_queue.patch
diff --git a/conda/old/magma-cuda90-2.3.0/build.sh b/conda/old/magma-cuda90-2.3.0/build.sh
new file mode 100644
index 000000000..b23fe4e66
--- /dev/null
+++ b/conda/old/magma-cuda90-2.3.0/build.sh
@@ -0,0 +1,16 @@
+export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH
+export CMAKE_PREFIX_PATH=$PREFIX
+export PATH=$PREFIX/bin:$PATH
+
+CUDA__VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",")
+if [ "$CUDA__VERSION" != "9.0" ]; then
+    echo "CUDA Version is not 9.0. CUDA Version found: $CUDA__VERSION"
+    exit 1
+fi
+
+mkdir build
+cd build
+cmake .. -DUSE_FORTRAN=OFF -DGPU_TARGET="All" -DCMAKE_INSTALL_PREFIX=$PREFIX
+make -j$(getconf _NPROCESSORS_CONF)
+make install
+cd ..
diff --git a/conda/magma-cuda90-2.3.0/cmakelists.patch b/conda/old/magma-cuda90-2.3.0/cmakelists.patch
similarity index 100%
rename from conda/magma-cuda90-2.3.0/cmakelists.patch
rename to conda/old/magma-cuda90-2.3.0/cmakelists.patch
diff --git a/conda/magma-cuda90-2.3.0/magma_cparict_tools.patch b/conda/old/magma-cuda90-2.3.0/magma_cparict_tools.patch
similarity index 100%
rename from conda/magma-cuda90-2.3.0/magma_cparict_tools.patch
rename to conda/old/magma-cuda90-2.3.0/magma_cparict_tools.patch
diff --git a/conda/magma-cuda90-2.3.0/magma_dparict_tools.patch b/conda/old/magma-cuda90-2.3.0/magma_dparict_tools.patch
similarity index 100%
rename from conda/magma-cuda90-2.3.0/magma_dparict_tools.patch
rename to conda/old/magma-cuda90-2.3.0/magma_dparict_tools.patch
diff --git a/conda/magma-cuda90-2.3.0/magma_sparict_tools.patch b/conda/old/magma-cuda90-2.3.0/magma_sparict_tools.patch
similarity index 100%
rename from conda/magma-cuda90-2.3.0/magma_sparict_tools.patch
rename to conda/old/magma-cuda90-2.3.0/magma_sparict_tools.patch
diff --git a/conda/magma-cuda90-2.3.0/magma_zparict_tools.patch b/conda/old/magma-cuda90-2.3.0/magma_zparict_tools.patch
similarity index 100%
rename from conda/magma-cuda90-2.3.0/magma_zparict_tools.patch
rename to conda/old/magma-cuda90-2.3.0/magma_zparict_tools.patch
diff --git a/conda/magma-cuda90-2.3.0/meta.yaml b/conda/old/magma-cuda90-2.3.0/meta.yaml
similarity index 100%
rename from conda/magma-cuda90-2.3.0/meta.yaml
rename to conda/old/magma-cuda90-2.3.0/meta.yaml
diff --git a/conda/magma-cuda92-2.3.0/run_test.sh b/conda/old/magma-cuda90-2.3.0/run_test.sh
similarity index 100%
rename from conda/magma-cuda92-2.3.0/run_test.sh
rename to conda/old/magma-cuda90-2.3.0/run_test.sh
diff --git a/conda/magma-cuda92-2.3.0/thread_queue.patch b/conda/old/magma-cuda90-2.3.0/thread_queue.patch
similarity index 100%
rename from conda/magma-cuda92-2.3.0/thread_queue.patch
rename to conda/old/magma-cuda90-2.3.0/thread_queue.patch
diff --git a/conda/magma-cuda91-2.3.0/build.sh b/conda/old/magma-cuda91-2.3.0/build.sh
similarity index 100%
rename from conda/magma-cuda91-2.3.0/build.sh
rename to conda/old/magma-cuda91-2.3.0/build.sh
diff --git a/conda/magma-cuda91-2.3.0/cmakelists.patch b/conda/old/magma-cuda91-2.3.0/cmakelists.patch
similarity index 100%
rename from conda/magma-cuda91-2.3.0/cmakelists.patch
rename to conda/old/magma-cuda91-2.3.0/cmakelists.patch
diff --git a/conda/magma-cuda91-2.3.0/magma_cparict_tools.patch b/conda/old/magma-cuda91-2.3.0/magma_cparict_tools.patch
similarity index 100%
rename from conda/magma-cuda91-2.3.0/magma_cparict_tools.patch
rename to conda/old/magma-cuda91-2.3.0/magma_cparict_tools.patch
diff --git a/conda/magma-cuda91-2.3.0/magma_dparict_tools.patch b/conda/old/magma-cuda91-2.3.0/magma_dparict_tools.patch
similarity index 100%
rename from conda/magma-cuda91-2.3.0/magma_dparict_tools.patch
rename to conda/old/magma-cuda91-2.3.0/magma_dparict_tools.patch
diff --git a/conda/magma-cuda91-2.3.0/magma_sparict_tools.patch b/conda/old/magma-cuda91-2.3.0/magma_sparict_tools.patch
similarity index 100%
rename from conda/magma-cuda91-2.3.0/magma_sparict_tools.patch
rename to conda/old/magma-cuda91-2.3.0/magma_sparict_tools.patch
diff --git a/conda/magma-cuda91-2.3.0/magma_zparict_tools.patch b/conda/old/magma-cuda91-2.3.0/magma_zparict_tools.patch
similarity index 100%
rename from conda/magma-cuda91-2.3.0/magma_zparict_tools.patch
rename to conda/old/magma-cuda91-2.3.0/magma_zparict_tools.patch
diff --git a/conda/magma-cuda91-2.3.0/meta.yaml b/conda/old/magma-cuda91-2.3.0/meta.yaml
similarity index 100%
rename from conda/magma-cuda91-2.3.0/meta.yaml
rename to conda/old/magma-cuda91-2.3.0/meta.yaml
diff --git a/conda/old/magma-cuda91-2.3.0/run_test.sh b/conda/old/magma-cuda91-2.3.0/run_test.sh
new file mode 100644
index 000000000..a1870840c
--- /dev/null
+++ b/conda/old/magma-cuda91-2.3.0/run_test.sh
@@ -0,0 +1,136 @@
+#build/testing/testing_cgegqr_gpu
+#build/testing/testing_cgelqf_gpu
+#build/testing/testing_cgels3_gpu
+#build/testing/testing_cgels_gpu
+#build/testing/testing_cgeqp3_gpu
+#build/testing/testing_cgeqr2_gpu
+#build/testing/testing_cgeqr2x_gpu
+#build/testing/testing_cgeqrf_gpu
+#build/testing/testing_cgeqrf_mgpu
+#build/testing/testing_cgesv_gpu
+#build/testing/testing_cgetf2_gpu
+#build/testing/testing_cgetrf_gpu
+#build/testing/testing_cgetrf_mgpu
+#build/testing/testing_cgetri_gpu
+#build/testing/testing_cheevd_gpu
+#build/testing/testing_chegst_gpu
+#build/testing/testing_chemm_mgpu
+#build/testing/testing_chemv_mgpu
+#build/testing/testing_cher2k_mgpu
+#build/testing/testing_chesv_nopiv_gpu
+#build/testing/testing_chetrd_gpu
+#build/testing/testing_chetrd_mgpu
+#build/testing/testing_clarfb_gpu
+#build/testing/testing_cposv_gpu
+#build/testing/testing_cpotf2_gpu
+#build/testing/testing_cpotrf_gpu
+#build/testing/testing_cpotrf_mgpu
+#build/testing/testing_cpotri_gpu
+#build/testing/testing_csysv_nopiv_gpu
+#build/testing/testing_ctrtri_gpu
+#build/testing/testing_cungqr_gpu
+#build/testing/testing_cunmql_gpu
+#build/testing/testing_cunmqr_gpu
+#build/testing/testing_dgegqr_gpu
+#build/testing/testing_dgelqf_gpu
+#build/testing/testing_dgels3_gpu
+#build/testing/testing_dgels_gpu
+#build/testing/testing_dgeqp3_gpu
+#build/testing/testing_dgeqr2_gpu
+#build/testing/testing_dgeqr2x_gpu
+#build/testing/testing_dgeqrf_gpu
+#build/testing/testing_dgeqrf_mgpu
+#build/testing/testing_dgesv_gpu
+#build/testing/testing_dgetf2_gpu
+#build/testing/testing_dgetrf_gpu
+#build/testing/testing_dgetrf_mgpu
+#build/testing/testing_dgetri_gpu
+#build/testing/testing_dlarfb_gpu
+#build/testing/testing_dorgqr_gpu
+#build/testing/testing_dormql_gpu
+#build/testing/testing_dormqr_gpu
+#build/testing/testing_dposv_gpu
+#build/testing/testing_dpotf2_gpu
+#build/testing/testing_dpotrf_gpu
+#build/testing/testing_dpotrf_mgpu
+#build/testing/testing_dpotri_gpu
+#build/testing/testing_dsgeqrsv_gpu
+#build/testing/testing_dsgesv_gpu
+#build/testing/testing_dsposv_gpu
+#build/testing/testing_dsyevd_gpu
+#build/testing/testing_dsygst_gpu
+#build/testing/testing_dsymm_mgpu
+#build/testing/testing_dsymv_mgpu
+#build/testing/testing_dsyr2k_mgpu
+#build/testing/testing_dsysv_nopiv_gpu
+#build/testing/testing_dsytrd_gpu
+#build/testing/testing_dsytrd_mgpu
+#build/testing/testing_dtrtri_gpu
+#build/testing/testing_sgegqr_gpu
+#build/testing/testing_sgelqf_gpu
+#build/testing/testing_sgels3_gpu
+#build/testing/testing_sgels_gpu
+#build/testing/testing_sgeqp3_gpu
+#build/testing/testing_sgeqr2_gpu
+#build/testing/testing_sgeqr2x_gpu
+#build/testing/testing_sgeqrf_gpu
+#build/testing/testing_sgeqrf_mgpu
+#build/testing/testing_sgesv_gpu
+#build/testing/testing_sgetf2_gpu
+#build/testing/testing_sgetrf_gpu
+#build/testing/testing_sgetrf_mgpu
+#build/testing/testing_sgetri_gpu
+#build/testing/testing_slarfb_gpu
+#build/testing/testing_sorgqr_gpu
+#build/testing/testing_sormql_gpu
+#build/testing/testing_sormqr_gpu
+#build/testing/testing_sposv_gpu
+#build/testing/testing_spotf2_gpu
+#build/testing/testing_spotrf_gpu
+#build/testing/testing_spotrf_mgpu
+#build/testing/testing_spotri_gpu
+#build/testing/testing_ssyevd_gpu
+#build/testing/testing_ssygst_gpu
+#build/testing/testing_ssymm_mgpu
+#build/testing/testing_ssymv_mgpu
+#build/testing/testing_ssyr2k_mgpu
+#build/testing/testing_ssysv_nopiv_gpu
+#build/testing/testing_ssytrd_gpu
+#build/testing/testing_ssytrd_mgpu
+#build/testing/testing_strtri_gpu
+#build/testing/testing_zcgeqrsv_gpu
+#build/testing/testing_zcgesv_gpu
+#build/testing/testing_zcposv_gpu
+#build/testing/testing_zgegqr_gpu
+#build/testing/testing_zgelqf_gpu
+#build/testing/testing_zgels3_gpu
+#build/testing/testing_zgels_gpu
+#build/testing/testing_zgeqp3_gpu
+#build/testing/testing_zgeqr2_gpu
+#build/testing/testing_zgeqr2x_gpu
+#build/testing/testing_zgeqrf_gpu
+#build/testing/testing_zgeqrf_mgpu
+#build/testing/testing_zgesv_gpu
+#build/testing/testing_zgetf2_gpu
+#build/testing/testing_zgetrf_gpu
+#build/testing/testing_zgetrf_mgpu
+#build/testing/testing_zgetri_gpu
+#build/testing/testing_zheevd_gpu
+#build/testing/testing_zhegst_gpu
+#build/testing/testing_zhemm_mgpu
+#build/testing/testing_zhemv_mgpu
+#build/testing/testing_zher2k_mgpu
+#build/testing/testing_zhesv_nopiv_gpu
+#build/testing/testing_zhetrd_gpu
+#build/testing/testing_zhetrd_mgpu
+#build/testing/testing_zlarfb_gpu
+#build/testing/testing_zposv_gpu
+#build/testing/testing_zpotf2_gpu
+#build/testing/testing_zpotrf_gpu
+#build/testing/testing_zpotrf_mgpu
+#build/testing/testing_zpotri_gpu
+#build/testing/testing_zsysv_nopiv_gpu
+#build/testing/testing_ztrtri_gpu
+#build/testing/testing_zungqr_gpu
+#build/testing/testing_zunmql_gpu
+#build/testing/testing_zunmqr_gpu
diff --git a/conda/old/magma-cuda91-2.3.0/thread_queue.patch b/conda/old/magma-cuda91-2.3.0/thread_queue.patch
new file mode 100644
index 000000000..1c2fa400f
--- /dev/null
+++ b/conda/old/magma-cuda91-2.3.0/thread_queue.patch
@@ -0,0 +1,20 @@
+--- control/thread_queue.cpp	2016-08-30 06:37:49.000000000 -0700
++++ control/thread_queue.cpp	2016-10-10 19:47:28.911580965 -0700
+@@ -15,7 +15,7 @@
+ {
+     if ( err != 0 ) {
+         fprintf( stderr, "Error: %s (%d)\n", strerror(err), err );
+-        throw std::exception();
++        // throw std::exception();
+     }
+ }
+ 
+@@ -172,7 +172,7 @@
+     check( pthread_mutex_lock( &mutex ));
+     if ( quit_flag ) {
+         fprintf( stderr, "Error: push_task() called after quit()\n" );
+-        throw std::exception();
++        // throw std::exception();
+     }
+     q.push( task );
+     ntask += 1;
diff --git a/conda/old/magma-cuda92-2.3.0/build.sh b/conda/old/magma-cuda92-2.3.0/build.sh
new file mode 100644
index 000000000..59ed0d864
--- /dev/null
+++ b/conda/old/magma-cuda92-2.3.0/build.sh
@@ -0,0 +1,16 @@
+export CMAKE_LIBRARY_PATH=$PREFIX/lib:$PREFIX/include:$CMAKE_LIBRARY_PATH
+export CMAKE_PREFIX_PATH=$PREFIX
+export PATH=$PREFIX/bin:$PATH
+
+CUDA__VERSION=$(nvcc --version|tail -n1|cut -f5 -d" "|cut -f1 -d",")
+if [ "$CUDA__VERSION" != "9.2" ]; then
+    echo "CUDA Version is not 9.2. CUDA Version found: $CUDA__VERSION"
+    exit 1
+fi
+
+mkdir build
+cd build
+cmake .. -DUSE_FORTRAN=OFF -DGPU_TARGET="All" -DCMAKE_INSTALL_PREFIX=$PREFIX
+make -j$(getconf _NPROCESSORS_CONF)
+make install
+cd ..
diff --git a/conda/magma-cuda92-2.3.0/cmakelists.patch b/conda/old/magma-cuda92-2.3.0/cmakelists.patch
similarity index 100%
rename from conda/magma-cuda92-2.3.0/cmakelists.patch
rename to conda/old/magma-cuda92-2.3.0/cmakelists.patch
diff --git a/conda/magma-cuda92-2.3.0/magma_cparict_tools.patch b/conda/old/magma-cuda92-2.3.0/magma_cparict_tools.patch
similarity index 100%
rename from conda/magma-cuda92-2.3.0/magma_cparict_tools.patch
rename to conda/old/magma-cuda92-2.3.0/magma_cparict_tools.patch
diff --git a/conda/magma-cuda92-2.3.0/magma_dparict_tools.patch b/conda/old/magma-cuda92-2.3.0/magma_dparict_tools.patch
similarity index 100%
rename from conda/magma-cuda92-2.3.0/magma_dparict_tools.patch
rename to conda/old/magma-cuda92-2.3.0/magma_dparict_tools.patch
diff --git a/conda/magma-cuda92-2.3.0/magma_sparict_tools.patch b/conda/old/magma-cuda92-2.3.0/magma_sparict_tools.patch
similarity index 100%
rename from conda/magma-cuda92-2.3.0/magma_sparict_tools.patch
rename to conda/old/magma-cuda92-2.3.0/magma_sparict_tools.patch
diff --git a/conda/magma-cuda92-2.3.0/magma_zparict_tools.patch b/conda/old/magma-cuda92-2.3.0/magma_zparict_tools.patch
similarity index 100%
rename from conda/magma-cuda92-2.3.0/magma_zparict_tools.patch
rename to conda/old/magma-cuda92-2.3.0/magma_zparict_tools.patch
diff --git a/conda/magma-cuda92-2.3.0/meta.yaml b/conda/old/magma-cuda92-2.3.0/meta.yaml
similarity index 100%
rename from conda/magma-cuda92-2.3.0/meta.yaml
rename to conda/old/magma-cuda92-2.3.0/meta.yaml
diff --git a/conda/old/magma-cuda92-2.3.0/run_test.sh b/conda/old/magma-cuda92-2.3.0/run_test.sh
new file mode 100644
index 000000000..a1870840c
--- /dev/null
+++ b/conda/old/magma-cuda92-2.3.0/run_test.sh
@@ -0,0 +1,136 @@
+#build/testing/testing_cgegqr_gpu
+#build/testing/testing_cgelqf_gpu
+#build/testing/testing_cgels3_gpu
+#build/testing/testing_cgels_gpu
+#build/testing/testing_cgeqp3_gpu
+#build/testing/testing_cgeqr2_gpu
+#build/testing/testing_cgeqr2x_gpu
+#build/testing/testing_cgeqrf_gpu
+#build/testing/testing_cgeqrf_mgpu
+#build/testing/testing_cgesv_gpu
+#build/testing/testing_cgetf2_gpu
+#build/testing/testing_cgetrf_gpu
+#build/testing/testing_cgetrf_mgpu
+#build/testing/testing_cgetri_gpu
+#build/testing/testing_cheevd_gpu
+#build/testing/testing_chegst_gpu
+#build/testing/testing_chemm_mgpu
+#build/testing/testing_chemv_mgpu
+#build/testing/testing_cher2k_mgpu
+#build/testing/testing_chesv_nopiv_gpu
+#build/testing/testing_chetrd_gpu
+#build/testing/testing_chetrd_mgpu
+#build/testing/testing_clarfb_gpu
+#build/testing/testing_cposv_gpu
+#build/testing/testing_cpotf2_gpu
+#build/testing/testing_cpotrf_gpu
+#build/testing/testing_cpotrf_mgpu
+#build/testing/testing_cpotri_gpu
+#build/testing/testing_csysv_nopiv_gpu
+#build/testing/testing_ctrtri_gpu
+#build/testing/testing_cungqr_gpu
+#build/testing/testing_cunmql_gpu
+#build/testing/testing_cunmqr_gpu
+#build/testing/testing_dgegqr_gpu
+#build/testing/testing_dgelqf_gpu
+#build/testing/testing_dgels3_gpu
+#build/testing/testing_dgels_gpu
+#build/testing/testing_dgeqp3_gpu
+#build/testing/testing_dgeqr2_gpu
+#build/testing/testing_dgeqr2x_gpu
+#build/testing/testing_dgeqrf_gpu
+#build/testing/testing_dgeqrf_mgpu
+#build/testing/testing_dgesv_gpu
+#build/testing/testing_dgetf2_gpu
+#build/testing/testing_dgetrf_gpu
+#build/testing/testing_dgetrf_mgpu
+#build/testing/testing_dgetri_gpu
+#build/testing/testing_dlarfb_gpu
+#build/testing/testing_dorgqr_gpu
+#build/testing/testing_dormql_gpu
+#build/testing/testing_dormqr_gpu
+#build/testing/testing_dposv_gpu
+#build/testing/testing_dpotf2_gpu
+#build/testing/testing_dpotrf_gpu
+#build/testing/testing_dpotrf_mgpu
+#build/testing/testing_dpotri_gpu
+#build/testing/testing_dsgeqrsv_gpu
+#build/testing/testing_dsgesv_gpu
+#build/testing/testing_dsposv_gpu
+#build/testing/testing_dsyevd_gpu
+#build/testing/testing_dsygst_gpu
+#build/testing/testing_dsymm_mgpu
+#build/testing/testing_dsymv_mgpu
+#build/testing/testing_dsyr2k_mgpu
+#build/testing/testing_dsysv_nopiv_gpu
+#build/testing/testing_dsytrd_gpu
+#build/testing/testing_dsytrd_mgpu
+#build/testing/testing_dtrtri_gpu
+#build/testing/testing_sgegqr_gpu
+#build/testing/testing_sgelqf_gpu
+#build/testing/testing_sgels3_gpu
+#build/testing/testing_sgels_gpu
+#build/testing/testing_sgeqp3_gpu
+#build/testing/testing_sgeqr2_gpu
+#build/testing/testing_sgeqr2x_gpu
+#build/testing/testing_sgeqrf_gpu
+#build/testing/testing_sgeqrf_mgpu
+#build/testing/testing_sgesv_gpu
+#build/testing/testing_sgetf2_gpu
+#build/testing/testing_sgetrf_gpu
+#build/testing/testing_sgetrf_mgpu
+#build/testing/testing_sgetri_gpu
+#build/testing/testing_slarfb_gpu
+#build/testing/testing_sorgqr_gpu
+#build/testing/testing_sormql_gpu
+#build/testing/testing_sormqr_gpu
+#build/testing/testing_sposv_gpu
+#build/testing/testing_spotf2_gpu
+#build/testing/testing_spotrf_gpu
+#build/testing/testing_spotrf_mgpu
+#build/testing/testing_spotri_gpu
+#build/testing/testing_ssyevd_gpu
+#build/testing/testing_ssygst_gpu
+#build/testing/testing_ssymm_mgpu
+#build/testing/testing_ssymv_mgpu
+#build/testing/testing_ssyr2k_mgpu
+#build/testing/testing_ssysv_nopiv_gpu
+#build/testing/testing_ssytrd_gpu
+#build/testing/testing_ssytrd_mgpu
+#build/testing/testing_strtri_gpu
+#build/testing/testing_zcgeqrsv_gpu
+#build/testing/testing_zcgesv_gpu
+#build/testing/testing_zcposv_gpu
+#build/testing/testing_zgegqr_gpu
+#build/testing/testing_zgelqf_gpu
+#build/testing/testing_zgels3_gpu
+#build/testing/testing_zgels_gpu
+#build/testing/testing_zgeqp3_gpu
+#build/testing/testing_zgeqr2_gpu
+#build/testing/testing_zgeqr2x_gpu
+#build/testing/testing_zgeqrf_gpu
+#build/testing/testing_zgeqrf_mgpu
+#build/testing/testing_zgesv_gpu
+#build/testing/testing_zgetf2_gpu
+#build/testing/testing_zgetrf_gpu
+#build/testing/testing_zgetrf_mgpu
+#build/testing/testing_zgetri_gpu
+#build/testing/testing_zheevd_gpu
+#build/testing/testing_zhegst_gpu
+#build/testing/testing_zhemm_mgpu
+#build/testing/testing_zhemv_mgpu
+#build/testing/testing_zher2k_mgpu
+#build/testing/testing_zhesv_nopiv_gpu
+#build/testing/testing_zhetrd_gpu
+#build/testing/testing_zhetrd_mgpu
+#build/testing/testing_zlarfb_gpu
+#build/testing/testing_zposv_gpu
+#build/testing/testing_zpotf2_gpu
+#build/testing/testing_zpotrf_gpu
+#build/testing/testing_zpotrf_mgpu
+#build/testing/testing_zpotri_gpu
+#build/testing/testing_zsysv_nopiv_gpu
+#build/testing/testing_ztrtri_gpu
+#build/testing/testing_zungqr_gpu
+#build/testing/testing_zunmql_gpu
+#build/testing/testing_zunmqr_gpu
diff --git a/conda/old/magma-cuda92-2.3.0/thread_queue.patch b/conda/old/magma-cuda92-2.3.0/thread_queue.patch
new file mode 100644
index 000000000..1c2fa400f
--- /dev/null
+++ b/conda/old/magma-cuda92-2.3.0/thread_queue.patch
@@ -0,0 +1,20 @@
+--- control/thread_queue.cpp	2016-08-30 06:37:49.000000000 -0700
++++ control/thread_queue.cpp	2016-10-10 19:47:28.911580965 -0700
+@@ -15,7 +15,7 @@
+ {
+     if ( err != 0 ) {
+         fprintf( stderr, "Error: %s (%d)\n", strerror(err), err );
+-        throw std::exception();
++        // throw std::exception();
+     }
+ }
+ 
+@@ -172,7 +172,7 @@
+     check( pthread_mutex_lock( &mutex ));
+     if ( quit_flag ) {
+         fprintf( stderr, "Error: push_task() called after quit()\n" );
+-        throw std::exception();
++        // throw std::exception();
+     }
+     q.push( task );
+     ntask += 1;
diff --git a/cron/backfill_binary_sizes.sh b/cron/backfill_binary_sizes.sh
new file mode 100755
index 000000000..f4a5418fe
--- /dev/null
+++ b/cron/backfill_binary_sizes.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+set -ex
+SOURCE_DIR=$(cd $(dirname $0) && pwd)
+
+# Parameters
+##############################################################################
+if [[ "$#" < 1 ]]; then
+    echo "Usage: backfill_binary_sizes start_date [end_date=today]"
+    echo "Dates in format YYYY_mm_dd"
+    exit 1
+else
+    start_date=$1
+    if [[ "$#" < 2 ]]; then
+        stop_date="$(date +%Y_%m_%d)"
+    else
+        stop_date=$2
+    fi
+fi
+start_dash="$(echo $start_date | tr _ -)"
+
+
+days_passed=0
+next_date="$start_date"
+loop_limit=100
+
+while [[ "$next_date" != "$stop_date" ]]; do
+
+    # Upload binary sizes!
+    "${SOURCE_DIR}/upload_binary_sizes.sh" "$next_date"
+
+    # No infinite loop if dates are badly formatted
+    if (( "$days_passed" > "$loop_limit" )); then
+        break
+    fi
+
+    # Move on to next day, the date arithmetic requires - instead of _
+    days_passed=$(($days_passed + 1))
+    next_date=$(date +%Y-%m-%d -d "$start_dash + $days_passed day" | tr - _)
+done
diff --git a/cron/build_docker.sh b/cron/build_docker.sh
index e580824d2..db1dcf31c 100755
--- a/cron/build_docker.sh
+++ b/cron/build_docker.sh
@@ -89,6 +89,9 @@ if [[ -n "$ON_SUCCESS_WRITE_ME" ]]; then
     success_basename="$(basename $ON_SUCCESS_WRITE_ME)"
 fi
 
+# Pull a fresh copy of the docker image
+docker pull "$docker_image"
+
 # Build up Docker Arguments
 ##############################################################################
 docker_args=""
@@ -161,10 +164,6 @@ nvidia-docker cp "$NIGHTLIES_PYTORCH_ROOT" "$id:/pytorch"
 
     echo "cd /"
 
-    # Instal mkldnn
-    # TODO this is expensive and should be moved into the Docker images themselves
-    # echo '/remote/install_mkldnn.sh'
-
     # Run the build script
     echo "$build_script"
 
diff --git a/cron/build_multiple.sh b/cron/build_multiple.sh
index 464c566cd..85cb5181d 100755
--- a/cron/build_multiple.sh
+++ b/cron/build_multiple.sh
@@ -111,6 +111,13 @@ for config in "${all_configs[@]}"; do
       build_script="${NIGHTLIES_BUILDER_ROOT}/cron/build_docker.sh"
   fi
 
+  # Swap timeout out for libtorch
+  if [[ "$package_type" == libtorch ]]; then
+      _timeout="$PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT"
+  else
+      _timeout="$PYTORCH_NIGHTLIES_TIMEOUT"
+  fi
+
   set +x
   echo
   echo "##############################"
@@ -124,7 +131,7 @@ for config in "${all_configs[@]}"; do
       DESIRED_PYTHON="$py_ver" \
       DESIRED_CUDA="$cuda_ver" \
       ON_SUCCESS_WRITE_ME="$succeeded_log_loc" \
-      $PORTABLE_TIMEOUT "$PYTORCH_NIGHTLIES_TIMEOUT" \
+      $PORTABLE_TIMEOUT "$_timeout" \
           "$build_script" > "$log_name" 2>&1
   ret="$?"
   duration="$SECONDS"
diff --git a/cron/nightly_defaults.sh b/cron/nightly_defaults.sh
index 0a1c6c29a..f5a6c8aeb 100755
--- a/cron/nightly_defaults.sh
+++ b/cron/nightly_defaults.sh
@@ -61,7 +61,9 @@ mkdir -p "$today" || true
 
 # List of people to email when things go wrong. This is passed directly to
 # `mail -t`
-export NIGHTLIES_EMAIL_LIST='hellemn@fb.com'
+if [[ -z "$NIGHTLIES_EMAIL_LIST" ]]; then
+    export NIGHTLIES_EMAIL_LIST='hellemn@fb.com'
+fi
 
 # PYTORCH_CREDENTIALS_FILE
 #   A bash file that exports credentials needed to upload to aws and anaconda.
@@ -79,7 +81,9 @@ fi
 # Location of the temporary miniconda that is downloaded to install conda-build
 # and aws to upload finished packages TODO this is messy to install this in
 # upload.sh and later use it in upload_logs.sh
-CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
+if [[ -z "$CONDA_UPLOADER_INSTALLATION" ]]; then
+    export CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
+fi
 
 # N.B. BUILDER_REPO and BUILDER_BRANCH are both set in cron_start.sh, as that
 # is the script that actually clones the builder repo that /this/ script is
@@ -87,7 +91,9 @@ CONDA_UPLOADER_INSTALLATION="${today}/miniconda"
 export NIGHTLIES_BUILDER_ROOT="$(cd $(dirname $0)/.. && pwd)"
 
 # The shared pytorch repo to be used by all builds
-export NIGHTLIES_PYTORCH_ROOT="${today}/pytorch"
+if [[ -z "$NIGHTLIES_PYTORCH_ROOT" ]]; then
+    export NIGHTLIES_PYTORCH_ROOT="${today}/pytorch"
+fi
 
 # PYTORCH_REPO
 #   The Github org/user whose fork of Pytorch to check out (git clone
@@ -190,17 +196,26 @@ nightlies_package_folder () {
 #   should be empty. Logs are written out to RUNNING_LOG_DIR. When a build
 #   fails, it's log should be moved to FAILED_LOG_DIR, and similarily for
 #   succeeded builds.
-export RUNNING_LOG_DIR="${today}/logs"
-export FAILED_LOG_DIR="${today}/logs/failed"
-export SUCCEEDED_LOG_DIR="${today}/logs/succeeded"
+if [[ -z "$RUNNING_LOG_DIR" ]]; then
+    export RUNNING_LOG_DIR="${today}/logs"
+fi
+if [[ -z "$FAILED_LOG_DIR" ]]; then
+    export FAILED_LOG_DIR="${today}/logs/failed"
+fi
+if [[ -z "$SUCCEEDED_LOG_DIR" ]]; then
+    export SUCCEEDED_LOG_DIR="${today}/logs/succeeded"
+fi
 
 # Log s3 directory, must not end in a /
-if [[ "$(uname)" == 'Darwin' ]]; then
-    export LOGS_S3_DIR="nightly_logs/macos/$NIGHTLIES_DATE"
-else
-    export LOGS_S3_DIR="nightly_logs/linux/$NIGHTLIES_DATE"
+if [[ -z "$LOGS_S3_DIR" ]]; then
+    if [[ "$(uname)" == 'Darwin' ]]; then
+        export LOGS_S3_DIR="nightly_logs/macos/$NIGHTLIES_DATE"
+    else
+        export LOGS_S3_DIR="nightly_logs/linux/$NIGHTLIES_DATE"
+    fi
 fi
-export BINARY_SIZES_S3_DIR="nightly_logs/binary_sizes"
+# The location of the binary_sizes dir in s3 is hardcoded into
+# upload_binary_sizes.sh
 
 # DAYS_TO_KEEP
 #   How many days to keep around for clean.sh. Build folders older than this
@@ -235,6 +250,11 @@ if [[ -z "$PYTORCH_NIGHTLIES_TIMEOUT" ]]; then
         export PYTORCH_NIGHTLIES_TIMEOUT=4800
     fi
 fi
+if [[ -z "$PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT" ]]; then
+    # The libtorch job actually runs for several cpu/cuda versions in sequence
+    # and so takes a long time
+    export PYTORCH_NIGHTLIES_LIBTORCH_TIMEOUT=10800
+fi
 
 # PORTABLE_TIMEOUT
 #   Command/executable of some timeout command. Defined here because the path
diff --git a/cron/parse_conda_json.py b/cron/parse_conda_json.py
index ac4208648..65bb77898 100644
--- a/cron/parse_conda_json.py
+++ b/cron/parse_conda_json.py
@@ -16,16 +16,19 @@
 
     # conda search returns format {'pytorch-nightly': [{key:val}...]}
     pkg_name = list(rawdata.keys())[0]
+    print('parse_conda_json.py:: Parsing package {}'.format(pkg_name))
 
     # Loop through versions found, keeping only 'build', and size
     # size is in bytes
     for result in rawdata[pkg_name]:
         size = result['size']
-
         # 'build' is of the form 'py2.7_cuda8.0.61_cudnn7.1.2_0'
+        build = result['build']
+
+        print('parse_conda_json.py:: Size of {} is {}'.format(build, size))
+
         # Since all Python versions are always 3 digits, it is safe to extract
         # the CUDA version based on index alone.
-        build = result['build']
         py_ver = build[2:5]
         cu_ver = ('cu' + build[10] + build[12]) if 'cuda' in build else 'cpu'
 
diff --git a/cron/upload_binary_sizes.sh b/cron/upload_binary_sizes.sh
index 744fe6ec1..4633d2d7c 100755
--- a/cron/upload_binary_sizes.sh
+++ b/cron/upload_binary_sizes.sh
@@ -3,15 +3,26 @@
 set -ex
 echo "collect_binary_sizes.sh at $(pwd) starting at $(date) on $(uname -a) with pid $$"
 SOURCE_DIR=$(cd $(dirname $0) && pwd)
-source "${SOURCE_DIR}/nightly_defaults.sh"
+# N.B. we do not source nightly_defaults.sh to avoid cloning repos every date
+# of a backfill
 
 # Usage:
-#   collect_binary_sizes.sh
+#   collect_binary_sizes.sh [date]
 # Queries s3 and conda to get the binary sizes (as they're stored in the cloud)
 # for a day
 
-binary_sizes_log="$today/binary_sizes.log"
-binary_sizes_json="$today/$NIGHTLIES_DATE.json"
+# Optionally accept a date to upload for
+if [[ "$#" > 0 ]]; then
+    target_date=$1
+    target_version="1.0.0.dev$(echo $target_date | tr -d _)"
+else
+    source "${SOURCE_DIR}/nightly_defaults.sh"
+    target_date="$NIGHTLIES_DATE"
+    target_version="$PYTORCH_BUILD_VERSION"
+fi
+
+binary_sizes_log="$SOURCE_DIR/binary_sizes.log"
+binary_sizes_json="$SOURCE_DIR/$target_date.json"
 rm -f "$binary_sizes_log"
 rm -f "$binary_sizes_json"
 touch "$binary_sizes_log"
@@ -22,14 +33,15 @@ touch "$binary_sizes_log"
 # This is read from `conda search`. 
 conda_platforms=('linux-64' 'osx-64')
 conda_pkg_names=('pytorch-nightly' 'pytorch-nightly-cpu')
-tmp_json="$today/conda_search.json"
+tmp_json="_conda_search.json"
 for pkg_name in "${conda_pkg_names[@]}"; do
     for platform in "${conda_platforms[@]}"; do
 
         # Read the info from conda-search
+        touch "$tmp_json"
         set +e
         conda search -c pytorch --json --platform "$platform" \
-                "$pkg_name==$PYTORCH_BUILD_VERSION" > "$tmp_json"
+                "$pkg_name==$target_version" > "$tmp_json"
         if [[ "$?" != 0 ]]; then
             set -e
             echo "ERROR: Could not query conda for $platform"
@@ -38,7 +50,7 @@ for pkg_name in "${conda_pkg_names[@]}"; do
         set -e
 
         # Call Python to parse the json into 'log_name_form size_in_bytes'
-        python "$NIGHTLIES_BUILDER_ROOT/cron/parse_conda_json.py" "$tmp_json" "$binary_sizes_log"
+        python "$SOURCE_DIR/parse_conda_json.py" "$tmp_json" "$binary_sizes_log"
     done
 done
 rm -f "$tmp_json"
@@ -51,7 +63,14 @@ for cu_ver in "${cuda_versions[@]}"; do
 
     # Read the info from s3
     s3_dir="s3://pytorch/whl/nightly/${cu_ver}/"
-    outputs=($(aws s3 ls "$s3_dir" | grep --only-matching "\S* \S*$PYTORCH_BUILD_VERSION\S*\.whl"))
+    set +e
+    outputs=($(aws s3 ls "$s3_dir" | grep --only-matching "\S* \S*$target_version\S*\.whl"))
+    if [[ "$?" != 0 ]]; then
+        set -e
+        echo "ERROR: Could find no [many]wheels for $cu_ver"
+        continue
+    fi
+    set -e
 
     # outputs is now a list of [size whl size whl...] as different elements
     for i in $(seq 0 2 $(( ${#outputs[@]} - 1 )) ); do
@@ -82,7 +101,7 @@ done
 
 # Convert the file of '<platform> <log_name> <size>' into a json for easy
 # ingestion in the react HUD
-python "$NIGHTLIES_BUILDER_ROOT/cron/write_json.py" "$binary_sizes_log" "$binary_sizes_json"
+python "$SOURCE_DIR/write_json.py" "$binary_sizes_log" "$binary_sizes_json"
 
 # Upload the log to s3
-aws s3 cp "$binary_sizes_json" "s3://pytorch/$BINARY_SIZES_S3_DIR/" --acl public-read --quiet
+aws s3 cp "$binary_sizes_json" "s3://pytorch/nightly_logs/binary_sizes/" --acl public-read --quiet
diff --git a/manywheel/Dockerfile_90 b/manywheel/Dockerfile_90
index e97bd0271..eb5f42464 100644
--- a/manywheel/Dockerfile_90
+++ b/manywheel/Dockerfile_90
@@ -57,17 +57,29 @@ RUN wget -q https://developer.nvidia.com/compute/cuda/9.0/Prod/patches/2/cuda_9.
 
 # install CUDA 9.0 CuDNN
 # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.1.2/cudnn-9.0-linux-x64-v7.1.tgz -O && \
-    tar --no-same-owner -xzf cudnn-9.0-linux-x64-v7.1.tgz -C /usr/local && \
-    rm cudnn-9.0-linux-x64-v7.1.tgz && \
+RUN mkdir tmp_cudnn && cd tmp_cudnn && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.4.1.5-1+cuda9.0_amd64.deb && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.4.1.5-1+cuda9.0_amd64.deb && \
+    ar -x libcudnn7-dev_7.4.1.5-1+cuda9.0_amd64.deb && tar -xvf data.tar.xz && \
+    ar -x libcudnn7_7.4.1.5-1+cuda9.0_amd64.deb && tar -xvf data.tar.xz && \
+    mkdir -p cuda/include && mkdir -p cuda/lib64 && \
+    cp -a usr/include/x86_64-linux-gnu/cudnn_v7.h cuda/include/cudnn.h && \
+    cp -a usr/lib/x86_64-linux-gnu/libcudnn* cuda/lib64 && \
+    mv cuda/lib64/libcudnn_static_v7.a cuda/lib64/libcudnn_static.a && \
+    ln -s libcudnn.so.7 cuda/lib64/libcudnn.so && \
+    chmod +x cuda/lib64/*.so && \
+    cp -a cuda/include/* /usr/local/cuda/include/ && \
+    cp -a cuda/lib64/* /usr/local/cuda/lib64/ && \
+    cd .. && \
+    rm -rf tmp_cudnn && \
     ldconfig
 
 # NCCL2 license: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html
-RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.2.13-1%2Bcuda9.0_x86_64.txz && \
-    tar --no-same-owner -xvf nccl_2.2.13-1+cuda9.0_x86_64.txz && \
-    mv nccl_2.2.13-1+cuda9.0_x86_64/include/* /usr/local/cuda/include/ && \
-    cp -P nccl_2.2.13-1+cuda9.0_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
-    rm -rf nccl_2.2.13-1+cuda9.0_x86_64* && \
+RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.3.7-1%2Bcuda9.0_x86_64.txz && \
+    tar --no-same-owner -xvf nccl_2.3.7-1+cuda9.0_x86_64.txz && \
+    mv nccl_2.3.7-1+cuda9.0_x86_64/include/* /usr/local/cuda/include/ && \
+    cp -P nccl_2.3.7-1+cuda9.0_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
+    rm -rf nccl_2.3.7-1+cuda9.0_x86_64* && \
     ldconfig
 
 # magma
@@ -107,6 +119,5 @@ RUN git clone https://github.com/NixOS/patchelf && \
 ARG GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70"
 ARG CUDA_ROOT="/usr/local/cuda-9.0"
 ARG CUDA_LIB_DIR="/usr/local/cuda-9.0/lib64"
-RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | \
-    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" && \
-    $CUDA_ROOT/bin/nvprune $GENCODE -gencode code=compute_30 $CUDA_LIB_DIR/libcudnn_static.a -o $CUDA_LIB_DIR/libcudnn_static.a
+RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | \
+    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
\ No newline at end of file
diff --git a/manywheel/Dockerfile_92 b/manywheel/Dockerfile_92
index 92692c909..d1f1534e4 100644
--- a/manywheel/Dockerfile_92
+++ b/manywheel/Dockerfile_92
@@ -47,17 +47,29 @@ RUN wget -q https://developer.nvidia.com/compute/cuda/9.2/Prod2/local_installers
 
 # install CUDA 9.2 CuDNN
 # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-RUN curl -fsSL http://developer.download.nvidia.com/compute/redist/cudnn/v7.1.4/cudnn-9.2-linux-x64-v7.1.tgz -O && \
-    tar --no-same-owner -xzf cudnn-9.2-linux-x64-v7.1.tgz -C /usr/local && \
-    rm cudnn-9.2-linux-x64-v7.1.tgz && \
+RUN mkdir tmp_cudnn && cd tmp_cudnn && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7-dev_7.4.1.5-1+cuda9.2_amd64.deb && \
+    wget -q http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libcudnn7_7.4.1.5-1+cuda9.2_amd64.deb && \
+    ar -x libcudnn7-dev_7.4.1.5-1+cuda9.2_amd64.deb && tar -xvf data.tar.xz && \
+    ar -x libcudnn7_7.4.1.5-1+cuda9.2_amd64.deb && tar -xvf data.tar.xz && \
+    mkdir -p cuda/include && mkdir -p cuda/lib64 && \
+    cp -a usr/include/x86_64-linux-gnu/cudnn_v7.h cuda/include/cudnn.h && \
+    cp -a usr/lib/x86_64-linux-gnu/libcudnn* cuda/lib64 && \
+    mv cuda/lib64/libcudnn_static_v7.a cuda/lib64/libcudnn_static.a && \
+    ln -s libcudnn.so.7 cuda/lib64/libcudnn.so && \
+    chmod +x cuda/lib64/*.so && \
+    cp -a cuda/include/* /usr/local/cuda/include/ && \
+    cp -a cuda/lib64/* /usr/local/cuda/lib64/ && \
+    cd .. && \
+    rm -rf tmp_cudnn && \
     ldconfig
 
 # NCCL2 license: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html
-RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.2.13-1%2Bcuda9.2_x86_64.txz && \
-    tar --no-same-owner -xvf nccl_2.2.13-1+cuda9.2_x86_64.txz && \
-    mv nccl_2.2.13-1+cuda9.2_x86_64/include/* /usr/local/cuda/include/ && \
-    cp -P nccl_2.2.13-1+cuda9.2_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
-    rm -rf nccl_2.2.13-1+cuda9.2_x86_64* && \
+RUN wget -q https://s3.amazonaws.com/pytorch/nccl_2.3.7-1%2Bcuda9.2_x86_64.txz && \
+    tar --no-same-owner -xvf nccl_2.3.7-1+cuda9.2_x86_64.txz && \
+    mv nccl_2.3.7-1+cuda9.2_x86_64/include/* /usr/local/cuda/include/ && \
+    cp -P nccl_2.3.7-1+cuda9.2_x86_64/lib/libnccl* /usr/local/cuda/lib64/ && \
+    rm -rf nccl_2.3.7-1+cuda9.2_x86_64* && \
     ldconfig
 
 # magma
@@ -97,6 +109,5 @@ RUN git clone https://github.com/NixOS/patchelf && \
 ARG GENCODE="-gencode arch=compute_35,code=sm_35 -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70"
 ARG CUDA_ROOT="/usr/local/cuda-9.2"
 ARG CUDA_LIB_DIR="/usr/local/cuda-9.2/lib64"
-RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "cusolver" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | \
-    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" && \
-    $CUDA_ROOT/bin/nvprune $GENCODE -gencode code=compute_30 $CUDA_LIB_DIR/libcudnn_static.a -o $CUDA_LIB_DIR/libcudnn_static.a
+RUN ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "cusolver" | grep -v "culibos" | grep -v "cudart" | \
+    xargs -I {} bash -c "echo {} && $CUDA_ROOT/bin/nvprune $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
diff --git a/windows/azure-pipelines.yml b/windows/azure-pipelines.yml
index 3ddd06e84..0f7666f39 100644
--- a/windows/azure-pipelines.yml
+++ b/windows/azure-pipelines.yml
@@ -32,6 +32,9 @@ jobs:
         DESIRED_PYTHON: 3.6
       PY3.7:
         DESIRED_PYTHON: 3.7
+      LIBTORCH:
+        DESIRED_PYTHON: 3
+        BUILD_PYTHONLESS: 1
 
   pool:
     vmImage: 'vs2017-win2016'
@@ -192,9 +195,12 @@ jobs:
         DESIRED_PYTHON: 3.6
       PY3.7:
         DESIRED_PYTHON: 3.7
+      LIBTORCH:
+        DESIRED_PYTHON: 3
+        BUILD_PYTHONLESS: 1
 
   pool:
-    name: 'Default'
+    name: 'pytorch'
 
   steps:
   - task: BatchScript@1
@@ -267,7 +273,7 @@ jobs:
         DESIRED_PYTHON: 3.7
 
   pool:
-    name: 'Default'
+    name: 'pytorch'
 
   steps:
   - task: BatchScript@1
diff --git a/windows/build_pytorch.bat b/windows/build_pytorch.bat
index 8429176a5..0282fc108 100644
--- a/windows/build_pytorch.bat
+++ b/windows/build_pytorch.bat
@@ -105,7 +105,7 @@ for %%v in (%DESIRED_PYTHON_PREFIX%) do (
     )
     call %CUDA_PREFIX%.bat
     IF ERRORLEVEL 1 exit /b 1
-    call internal\test.bat
+    IF "%BUILD_PYTHONLESS%" == "" call internal\test.bat
     IF ERRORLEVEL 1 exit /b 1
     @endlocal
 )
diff --git a/windows/internal/publish.bat b/windows/internal/publish.bat
index 1271b9f1d..a93d43ee0 100644
--- a/windows/internal/publish.bat
+++ b/windows/internal/publish.bat
@@ -21,7 +21,7 @@ IF ERRORLEVEL 1 (
 
 IF ERRORLEVEL 1 (
     echo Clone failed
-    exit /b 1
+    goto err
 )
 
 cd pytorch_builder
@@ -49,11 +49,32 @@ git checkout --orphan %PUBLISH_BRANCH%
 git remote add origin %ARTIFACT_REPO_URL%
 git add .
 git commit -m "Update artifacts"
-git push origin %PUBLISH_BRANCH%% -f > nul 2>&1
 
-popd
+:push
+
+if "%RETRY_TIMES%" == "" (
+    set /a RETRY_TIMES=3
+) else (
+    set /a RETRY_TIMES=%RETRY_TIMES%-1
+)
+
+git push origin %PUBLISH_BRANCH%% -f > nul 2>&1
 
 IF ERRORLEVEL 1 (
-    echo Push failed
-    exit /b 1
+    echo Git push retry times remaining: %RETRY_TIMES%
+    IF %RETRY_TIMES% EQU 0 (
+        echo Push failed
+        goto err
+    )
+    goto push
 )
+
+popd
+
+exit /b 0
+
+:err
+
+popd
+
+exit /b 1
diff --git a/windows/internal/setup.bat b/windows/internal/setup.bat
index 021b3e410..e530ee179 100755
--- a/windows/internal/setup.bat
+++ b/windows/internal/setup.bat
@@ -25,8 +25,38 @@ if "%CXX%"=="sccache cl" (
     sccache --zero-stats
 )
 
+
+if "%BUILD_PYTHONLESS%" == "" goto pytorch else goto libtorch
+
+:libtorch
+set VARIANT=shared-with-deps
+
+mkdir libtorch
+set "INSTALL_DIR=%CD%\libtorch"
+mkdir libtorch\lib
+copy /Y torch\lib\*.dll libtorch\lib\
+
+mkdir build
+pushd build
+python ../tools/build_libtorch.py
+popd
+
+IF ERRORLEVEL 1 exit /b 1
+IF NOT ERRORLEVEL 0 exit /b 1
+
+move /Y libtorch\bin\*.dll libtorch\lib\
+
+7z a -tzip libtorch-%VARIANT%-%PYTORCH_BUILD_VERSION%.zip libtorch\*
+
+mkdir ..\output\%CUDA_PREFIX%
+copy /Y libtorch-%VARIANT%-%PYTORCH_BUILD_VERSION%.zip ..\output\%CUDA_PREFIX%\
+
+goto build_end
+
+:pytorch
 pip wheel -e . --wheel-dir ../output/%CUDA_PREFIX%
 
+:build_end
 IF ERRORLEVEL 1 exit /b 1
 IF NOT ERRORLEVEL 0 exit /b 1