Merge pull request #2 from apache/master

Syncing forks
jinboci · Jul 8, 2020 · cf87b37 · cf87b37
2 parents 75f975b + 348ab4d
commit cf87b37
Show file tree

Hide file tree

Showing 2,333 changed files with 38,027 additions and 287,687 deletions.
diff --git a/.codecov.yml b/.codecov.yml
@@ -4,6 +4,9 @@ codecov:
     require_ci_to_pass: yes
 
 coverage:
+  status:
+    project: off
+    patch: off
   precision: 2
   round: down
   range: "70...100"

diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,8 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Ask a question
+    url: https://stackoverflow.com/questions/tagged/mxnet
+    about: Use Stack Overflow to ask and answer questions
+  - name: Discuss
+    url: https://discuss.mxnet.io/
+    about: Use Discuss forums for discussions [Stackoverflow alternative]
diff --git a/.github/workflows/os_x_staticbuild.yml b/.github/workflows/os_x_staticbuild.yml
@@ -8,18 +8,42 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v2
+
+      - name: Compilation cache
+        uses: actions/cache@v2
+        with:
+          path: ~/.ccache
+          # We include the commit sha in the cache key, as new cache entries are
+          # only created if there is no existing entry for the key yet.
+          key: ${{ runner.os }}-ccache-${{ github.sha }}
+          # Restore any ccache cache entry, if none for
+          # ${{ runner.os }}-ccache-${{ github.sha }} exists
+          restore-keys: |
+            ${{ runner.os }}-ccache
+
+      - name: Setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.6'
+          architecture: x64
+
       - name: Install Dependencies
         run: |
-          brew install nasm automake ninja libtool cmake pkgconfig protobuf hdf5 zlib
-          python3 -m pip install --user -r ci/docker/install/requirements
+          brew install nasm automake ninja libtool cmake pkgconfig protobuf hdf5 zlib ccache
+          ccache -M 500M  # Limit the ccache size; Github's overall cache limit is 5GB
+          python -m pip install -r ci/docker/install/requirements
+        shell: bash
+
       - name: Build project
         run: |
-          git --version
-          clang --version
           CMAKE_STATICBUILD=1 ./tools/staticbuild/build.sh cpu
+
       - name: Setup Python
         run: |
-          python3 -m pip install --user -e python
+          python -m pip install --user -e python
+
       - name: Test project
         run: |
-          python3 -m pytest --durations=50 --verbose tests/python/unittest/ -k 'not (test_subgraph or test_custom_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)'
+          python3 -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'not test_operator and not (test_subgraph or test_custom_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial'
+          MXNET_ENGINE_TYPE=NaiveEngine python3 -m pytest -n 4 --durations=50 --verbose tests/python/unittest/ -k 'test_operator and not (test_subgraph or test_custom_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'not serial'
+          python3 -m pytest --durations=50 --verbose tests/python/unittest/ -k 'not (test_subgraph or test_custom_op or test_recordimage_dataset_with_data_loader_multiworker or test_multi_worker or test_multi_worker_shape or test_multi_worker_forked_data_loader or test_multi_worker_dataloader_release_pool)' -m 'serial'
diff --git a/.gitignore b/.gitignore
@@ -121,6 +121,10 @@ cmake_install.cmake
 # Mac OS X
 .DS_Store
 
+# Windows
+windows_package.7z
+windows_package
+
 #Notebook Automated Test
 !tests/nightly/test_tutorial_config.txt
 !tests/nightly/TestNotebook

diff --git a/3rdparty/mkldnn b/3rdparty/mkldnn
diff --git a/3rdparty/mshadow/CMakeLists.txt b/3rdparty/mshadow/CMakeLists.txt
@@ -4,8 +4,10 @@ project(mshadow C CXX)
 include(CMakeDependentOption)
 option(USE_CUDA "Build with CUDA support" ON)
 option(USE_CUDNN ON)
-cmake_dependent_option(USE_SSE "Build with x86 SSE instruction support" ON "NOT ARM" OFF)
-option(USE_F16C "Build with x86 F16C instruction support" ON) # autodetects support if ON
+cmake_dependent_option(USE_SSE "Build with x86 SSE instruction support" ON
+  "CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd64" OFF)
+cmake_dependent_option(USE_F16C "Build with x86 F16C instruction support" ON
+  "CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd64" OFF)   # autodetects support if ON
 option(USE_INT64_TENSOR_SIZE "Use int64_t to represent the total number of elements in a tensor" OFF)
 option(MSHADOW_IN_CXX11 ON)
 

diff --git a/3rdparty/mshadow/cmake/AutoDetectF16C.cmake b/3rdparty/mshadow/cmake/AutoDetectF16C.cmake
@@ -26,10 +26,6 @@ if(AUTO_DETECT_F16_CMAKE_INCLUDED)
 endif()
 set(AUTO_DETECT_F16_CMAKE_INCLUDED True)
 set(SUPPORT_F16C False)
-if(ANDROID)
-    message("F16C instruction set is not yet supported for Andriod")
-    return()
-endif()
 if(MSVC)
     message("F16C instruction set is not yet supported for MSVC")
     return()

diff --git a/3rdparty/mshadow/mshadow/base.h b/3rdparty/mshadow/mshadow/base.h
@@ -251,7 +251,7 @@ extern "C" {
     if (e == cudaErrorCudartUnloading) {                           \
       throw dmlc::Error(cudaGetErrorString(e));                    \
     }                                                              \
-    CHECK(e == cudaSuccess)                                        \
+    CHECK_EQ(e, cudaSuccess)                                       \
         << "CUDA: " << cudaGetErrorString(e);                      \
   }
 
@@ -272,6 +272,7 @@ extern "C" {
   }
 
 #include "./half.h"
+#include "./half2.h"
 #include "./bfloat.h"
 #define MSHADOW_HALF_BF_OPERATOR(RTYPE, OP)                                               \
   MSHADOW_XINLINE RTYPE operator OP(mshadow::half::half_t a, mshadow::bfloat::bf16_t b) { \
@@ -386,6 +387,11 @@ struct DataType<half::half_t> {
 #endif
 };
 template<>
+struct DataType<half::half2_t> {
+  static const int kFlag = kFloat16;
+  static const int kLanes = 2;
+};
+template<>
 struct DataType<bfloat::bf16_t> {
   static const int kFlag = kBfloat16;
   static const int kLanes = 1;
@@ -1138,6 +1144,48 @@ struct minimum {
   }
 #endif
 
+#define MSHADOW_TYPE_SWITCH_WITH_HALF2(type, DType, ...)  \
+  switch (type) {                                         \
+  case mshadow::kFloat32:                                 \
+    {                                                     \
+      typedef float DType;                                \
+      {__VA_ARGS__}                                       \
+    }                                                     \
+    break;                                                \
+  case mshadow::kFloat64:                                 \
+    {                                                     \
+      typedef double DType;                               \
+      {__VA_ARGS__}                                       \
+    }                                                     \
+    break;                                                \
+  case mshadow::kFloat16:                                 \
+    {                                                     \
+      typedef mshadow::half::half2_t DType;               \
+      {__VA_ARGS__}                                       \
+    }                                                     \
+    break;                                                \
+  case mshadow::kUint8:                                   \
+    {                                                     \
+      typedef uint8_t DType;                              \
+      {__VA_ARGS__}                                       \
+    }                                                     \
+    break;                                                \
+  case mshadow::kInt32:                                   \
+    {                                                     \
+      typedef int32_t DType;                              \
+      {__VA_ARGS__}                                       \
+    }                                                     \
+    break;                                                \
+  case mshadow::kInt64:                                   \
+    {                                                     \
+      typedef int64_t DType;                              \
+      {__VA_ARGS__}                                       \
+    }                                                     \
+    break;                                                \
+  default:                                                \
+    LOG(FATAL) << "Unknown type enum " << type;           \
+  }
+
 #define MSHADOW_SGL_DBL_TYPE_SWITCH(type, DType, ...)  \
   switch (type) {                                      \
   case mshadow::kFloat32:                              \

diff --git a/3rdparty/mshadow/mshadow/cuda/tensor_gpu-inl.cuh b/3rdparty/mshadow/mshadow/cuda/tensor_gpu-inl.cuh
@@ -35,7 +35,7 @@
 #define MSHADOW_CUDA_POST_KERNEL_CHECK(x) \
   /* Code block avoids redefinition of cudaError_t err */ \
   do { \
-    cudaError err = cudaPeekAtLastError(); \
+    cudaError err = cudaGetLastError(); \
     CHECK_EQ(err, cudaSuccess) << "Name: " << #x << " ErrStr:" << cudaGetErrorString(err); \
   } while (0)
 namespace mshadow {

diff --git a/3rdparty/mshadow/mshadow/half2.h b/3rdparty/mshadow/mshadow/half2.h
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2017 by Contributors
+ * \file half2.h
+ * \brief definition of vector float16, half2 type.
+ *
+ * \author Antti-Pekka Hynninen
+ */
+#ifndef MSHADOW_HALF2_H_
+#define MSHADOW_HALF2_H_
+
+#if (defined(__CUDACC__) && __CUDA_ARCH__ >= 530 && MSHADOW_USE_CUDA && CUDA_VERSION >= 7050)
+  #define MSHADOW_CUDA_HALF2 1
+  #include <cuda_fp16.h>
+#else
+  #define MSHADOW_CUDA_HALF2 0
+#endif
+
+#include<math.h>
+
+/*! \brief namespace for mshadow */
+namespace mshadow {
+/* \brief name space for host/device portable half-precision floats */
+namespace half {
+
+#define MSHADOW_HALF2_ASSIGNOP(AOP, OP)                                   \
+  template<typename T>                                                    \
+  MSHADOW_XINLINE half2_t operator AOP (const T& a) {                     \
+    return *this = half2_t(*this OP a);  /* NOLINT(*)*/                   \
+  }                                                                       \
+
+class MSHADOW_ALIGNED(4) half2_t {
+ public:
+#if MSHADOW_CUDA_HALF2
+  half2 half2_;
+#else
+  half_t half_t2[2];
+#endif
+
+  MSHADOW_XINLINE half2_t() {}
+
+#if MSHADOW_CUDA_HALF2
+  MSHADOW_XINLINE explicit half2_t(half2 a) : half2_(a) {}
+#else
+  MSHADOW_XINLINE explicit half2_t(half_t a, half_t b) {
+    half_t2[0] = a;
+    half_t2[1] = b;
+  }
+#endif
+
+  MSHADOW_XINLINE explicit half2_t(int a) {
+#if MSHADOW_CUDA_HALF2
+    half2_ = __half2half2(__int2half_rz(a));
+#else
+    half_t2[0] = (half_t)a;
+    half_t2[1] = (half_t)a;
+#endif
+  }
+
+  MSHADOW_XINLINE half2_t operator+() {
+    return *this;
+  }
+
+  MSHADOW_XINLINE half2_t operator-() {
+#if MSHADOW_CUDA_HALF2
+    return half2_t(__hneg2(half2_));
+#else
+    return half2_t(-half_t2[0], -half_t2[1]);
+#endif
+  }
+
+  MSHADOW_XINLINE half2_t operator=(const half2_t& a) {
+#if MSHADOW_CUDA_HALF2
+    half2_ = a.half2_;
+#else
+    half_t2[0] = a.half_t2[0];
+    half_t2[1] = a.half_t2[1];
+#endif
+    return a;
+  }
+
+  MSHADOW_HALF2_ASSIGNOP(+=, +)
+  MSHADOW_HALF2_ASSIGNOP(-=, -)
+  MSHADOW_HALF2_ASSIGNOP(*=, *)
+  MSHADOW_HALF2_ASSIGNOP(/=, /)
+};
+
+/*! \brief overloaded + operator for half2_t */
+MSHADOW_XINLINE half2_t operator+(half2_t a, half2_t b) {
+#if MSHADOW_CUDA_HALF2
+  return half2_t(__floats2half2_rn(__low2float(a.half2_) + __low2float(b.half2_),
+                                   __high2float(a.half2_) + __high2float(b.half2_)));
+#else
+  return half2_t(a.half_t2[0] + b.half_t2[0], a.half_t2[1] + b.half_t2[1]);
+#endif
+}
+/*! \brief overloaded - operator for half2_t */
+MSHADOW_XINLINE half2_t operator-(half2_t a, half2_t b) {
+#if MSHADOW_CUDA_HALF2
+  return half2_t(__floats2half2_rn(__low2float(a.half2_) - __low2float(b.half2_),
+                                   __high2float(a.half2_) - __high2float(b.half2_)));
+#else
+  return half2_t(a.half_t2[0] - b.half_t2[0], a.half_t2[1] - b.half_t2[1]);
+#endif
+}
+/*! \brief overloaded * operator for half2_t */
+MSHADOW_XINLINE half2_t operator*(half2_t a, half2_t b) {
+#if MSHADOW_CUDA_HALF2
+  return half2_t(__floats2half2_rn(__low2float(a.half2_) * __low2float(b.half2_),
+                                   __high2float(a.half2_) * __high2float(b.half2_)));
+#else
+  return half2_t(a.half_t2[0] * b.half_t2[0], a.half_t2[1] * b.half_t2[1]);
+#endif
+}
+/*! \brief overloaded / operator for half2_t */
+MSHADOW_XINLINE half2_t operator/(half2_t a, half2_t b) {
+#if MSHADOW_CUDA_HALF2
+  return half2_t(__floats2half2_rn(__low2float(a.half2_) / __low2float(b.half2_),
+                                   __high2float(a.half2_) / __high2float(b.half2_)));
+#else
+  return half2_t(a.half_t2[0] / b.half_t2[0], a.half_t2[1] / b.half_t2[1]);
+#endif
+}
+/*! \brief overloaded % operator for half2_t */
+MSHADOW_XINLINE half2_t operator%(half2_t a, half2_t b) {
+#if MSHADOW_CUDA_HALF2
+  return half2_t(__floats2half2_rn(::fmod(__low2float(a.half2_), __low2float(b.half2_)),
+                                   ::fmod(__high2float(a.half2_), __high2float(b.half2_))));
+#else
+  return half2_t(::fmod(a.half_t2[0], b.half_t2[0]), ::fmod(a.half_t2[1], b.half_t2[1]));
+#endif
+}
+/*! \brief overloaded == operator for half2_t */
+MSHADOW_XINLINE bool operator==(half2_t a, half2_t b) {
+#if MSHADOW_CUDA_HALF2
+  return __hbeq2(a.half2_, b.half2_);
+#else
+  return (a.half_t2[0] == b.half_t2[0] && a.half_t2[1] == b.half_t2[1]);
+#endif
+}
+
+}  // namespace half
+}  // namespace mshadow
+#endif  // MSHADOW_HALF2_H_
diff --git a/3rdparty/mshadow/mshadow/tensor.h b/3rdparty/mshadow/mshadow/tensor.h
diff --git a/3rdparty/mshadow/mshadow/tensor_cpu-inl.h b/3rdparty/mshadow/mshadow/tensor_cpu-inl.h
diff --git a/3rdparty/mshadow/mshadow/tensor_gpu-inl.h b/3rdparty/mshadow/mshadow/tensor_gpu-inl.h