fixing lapack issues and mkl blas issues

apache · Jul 3, 2020 · 3e6b50f · 3e6b50f
1 parent b7e2292
commit 3e6b50f
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 16 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -36,18 +36,6 @@ endif()
 
 include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Utils.cmake)
 
-if(DEFINED USE_INT64_TENSOR_SIZE AND NOT USE_INT64_TENSOR_SIZE OR CMAKE_SIZEOF_VOID_P EQUAL 4)
-  message(STATUS "Large Tensor disabled !!")
-  set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")
-else()
-  message(STATUS "Large Tensor enabled !!")
-  set(USE_INT64_TENSOR_SIZE ON CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")
-  if(USE_BLAS STREQUAL "MKL")
-    # Enable MKL ILP64 support when Large Tensor enabled
-    set(MKL_USE_ILP64 ON CACHE BOOL "enable MKL ILP64 interface.")
-  endif()
-endif()
-
 include(CMakeDependentOption)
 #Some things have order. This must be put in front alone
 option(MXNET_BUILD_SHARED_LIBS "Build shared libraries instead of static libraries" ON)
@@ -924,3 +912,16 @@ if(BUILD_CYTHON_MODULES)
     message(FATAL_ERROR "No python interpreter found to build cython modules")
   endif()
 endif()
+
+if(DEFINED USE_INT64_TENSOR_SIZE AND NOT USE_INT64_TENSOR_SIZE OR CMAKE_SIZEOF_VOID_P EQUAL 4)
+  message(STATUS "Large Tensor disabled !!")
+  set(USE_INT64_TENSOR_SIZE OFF CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")
+else()
+  message(STATUS "Large Tensor enabled !!")
+  set(USE_INT64_TENSOR_SIZE ON CACHE BOOL "Use int64_t to represent the total number of elements in a tensor")
+  if(USE_BLAS STREQUAL "MKL")
+    # Enable MKL ILP64 support when Large Tensor enabled
+    set(MKL_USE_ILP64 ON CACHE BOOL "enable MKL ILP64 interface.")
+  endif()
+endif()
+
diff --git a/src/operator/c_lapack_api.h b/src/operator/c_lapack_api.h
@@ -377,7 +377,7 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) {
 
   #define MXNET_LAPACK_CWRAP_GETRI(prefix, dtype) \
   inline int MXNET_LAPACK_##prefix##getri(int matrix_layout, int n, dtype *a, int lda, \
-                                          int *ipiv, dtype *work, int lwork) { \
+                                          index_t *ipiv, dtype *work, int lwork) { \
     if (lwork != -1) { \
       return LAPACKE_##prefix##getri(matrix_layout, n, a, lda, ipiv); \
     } \
@@ -406,7 +406,7 @@ inline void flip(int m, int n, DType *b, int ldb, DType *a, int lda) {
   #define MXNET_LAPACK_CWRAP_GELSD(prefix, dtype) \
   inline int MXNET_LAPACK_##prefix##gelsd(int matrix_layout, int m, int n, int nrhs, \
                                           dtype *a, int lda, dtype *b, int ldb, \
-                                          dtype *s, dtype rcond, int *rank, \
+                                          dtype *s, dtype rcond, index_t *rank, \
                                           dtype *work, int lwork, int *iwork) { \
     if (lwork != -1) { \
       return LAPACKE_##prefix##gelsd(matrix_layout, m, n, nrhs, a, lda, b, ldb, \

diff --git a/src/operator/linalg_impl.h b/src/operator/linalg_impl.h
@@ -1412,7 +1412,7 @@ LINALG_GPU_GESVD_WORKSPACE_QUERY(DnDgesvd, double)
 #define LINALG_CPU_GETRF(fname, DType) \
 template<> inline \
 void linalg_getrf<cpu, DType>(const Tensor<cpu, 2, DType>& A, \
-                              const Tensor<cpu, 1, int>& pivot, \
+                              const Tensor<cpu, 1, index_t>& pivot, \
                               bool check_singular, Stream<cpu> *s) { \
   int ret(MXNET_LAPACK_##fname(MXNET_LAPACK_COL_MAJOR, A.size(1), A.size(0), \
                                A.dptr_, A.stride_, pivot.dptr_)); \

diff --git a/src/operator/numpy/linalg/np_solve-inl.h b/src/operator/numpy/linalg/np_solve-inl.h
@@ -86,7 +86,7 @@ inline void check_solve(const Tensor<xpu, 2, DType>& A,
 template<> inline \
 void linalg_solve<cpu, DType>(const Tensor<cpu, 2, DType>& A, \
                               const Tensor<cpu, 2, DType>& X, \
-                              const Tensor<cpu, 1, int>& ipiv, \
+                              const Tensor<cpu, 1, index_t>& ipiv, \
                               Stream<cpu> *s) { \
   check_solve(A, X); \
   const int N = X.size(1), nrhs = X.size(0); \