Merge branch 'master' of github.com:dmlc/tvm into dev

apache · Aug 15, 2019 · d6b9381 · d6b9381
2 parents 8bcfc40 + 60fc9f7
commit d6b9381
Show file tree

Hide file tree

Showing 48 changed files with 1,060 additions and 181 deletions.
diff --git a/Jenkinsfile b/Jenkinsfile
@@ -48,7 +48,10 @@ tvm_runtime = "build/libtvm_runtime.so, build/config.cmake"
 tvm_lib = "build/libtvm.so, " + tvm_runtime
 // LLVM upstream lib
 tvm_multilib = "build/libtvm.so, " +
-             "build/libvta.so, build/libtvm_topi.so, build/libnnvm_compiler.so, " + tvm_runtime
+               "build/libvta_tsim.so, " +
+               "build/libvta_fsim.so, " +
+               "build/libtvm_topi.so, " +
+               "build/libnnvm_compiler.so, " + tvm_runtime
 
 // command to start a docker container
 docker_run = 'docker/bash.sh'
@@ -190,11 +193,11 @@ stage('Build') {
         make(ci_cpu, 'build', '-j2')
         pack_lib('cpu', tvm_lib)
         timeout(time: max_time, unit: 'MINUTES') {
-          sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_vta.sh"
           sh "${docker_run} ${ci_cpu} ./tests/scripts/task_rust.sh"
           sh "${docker_run} ${ci_cpu} ./tests/scripts/task_golang.sh"
           sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_unittest.sh"
           sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_integration.sh"
+          sh "${docker_run} ${ci_cpu} ./tests/scripts/task_python_vta.sh"
         }
       }
     }

diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -137,3 +137,12 @@ set(USE_ANTLR OFF)
 
 # Whether use Relay debug mode
 set(USE_RELAY_DEBUG OFF)
+
+# Whether to build fast VTA simulator driver
+set(USE_VTA_FSIM ON)
+
+# Whether to build cycle-accurate VTA simulator driver
+set(USE_VTA_TSIM ON)
+
+# Whether to build VTA FPGA driver (device side only)
+set(USE_VTA_FPGA OFF)
diff --git a/cmake/modules/VTA.cmake b/cmake/modules/VTA.cmake
@@ -37,44 +37,61 @@ elseif(PYTHON)
 
   string(REGEX MATCHALL "(^| )-D[A-Za-z0-9_=.]*" VTA_DEFINITIONS "${__vta_defs}")
 
-  file(GLOB VTA_RUNTIME_SRCS vta/src/*.cc)
-  # Add sim driver sources
-  if(${VTA_TARGET} STREQUAL "sim")
-    file(GLOB __vta_target_srcs vta/src/sim/*.cc)
-  endif()
-  # Add tsim driver sources
-  if(${VTA_TARGET} STREQUAL "tsim")
-    file(GLOB __vta_target_srcs vta/src/tsim/*.cc)
-    file(GLOB RUNTIME_DPI_SRCS vta/src/dpi/module.cc)
-    list(APPEND RUNTIME_SRCS ${RUNTIME_DPI_SRCS})
-  endif()
-  # Add pynq driver sources
-  if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
-    file(GLOB __vta_target_srcs vta/src/pynq/*.cc)
+  # Fast simulator driver build
+  if(USE_VTA_FSIM)
+    # Add fsim driver sources
+    file(GLOB FSIM_RUNTIME_SRCS vta/src/*.cc)
+    list(APPEND FSIM_RUNTIME_SRCS vta/src/sim/sim_driver.cc)
+    # Target lib: vta_fsim
+    add_library(vta_fsim SHARED ${FSIM_RUNTIME_SRCS})
+    target_include_directories(vta_fsim PUBLIC vta/include)
+    foreach(__def ${VTA_DEFINITIONS})
+      string(SUBSTRING ${__def} 3 -1 __strip_def)
+      target_compile_definitions(vta_fsim PUBLIC ${__strip_def})
+    endforeach()
+    include_directories("vta/include")
+    if(APPLE)
+      set_target_properties(vta_fsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    endif(APPLE)
   endif()
-  list(APPEND VTA_RUNTIME_SRCS ${__vta_target_srcs})
-
-  add_library(vta SHARED ${VTA_RUNTIME_SRCS})
 
-  target_include_directories(vta PUBLIC vta/include)
-
-  foreach(__def ${VTA_DEFINITIONS})
-    string(SUBSTRING ${__def} 3 -1 __strip_def)
-    target_compile_definitions(vta PUBLIC ${__strip_def})
-  endforeach()
-
-  # Enable tsim macro
-  if(${VTA_TARGET} STREQUAL "tsim")
+  # Cycle accurate simulator driver build
+  if(USE_VTA_TSIM)
+    # Add tsim driver sources
+    file(GLOB TSIM_RUNTIME_SRCS vta/src/*.cc)
+    list(APPEND TSIM_RUNTIME_SRCS vta/src/tsim/tsim_driver.cc)
+    list(APPEND TSIM_RUNTIME_SRCS vta/src/dpi/module.cc)
+    # Target lib: vta_tsim
+    add_library(vta_tsim SHARED ${TSIM_RUNTIME_SRCS})
+    target_include_directories(vta_tsim PUBLIC vta/include)
+    foreach(__def ${VTA_DEFINITIONS})
+      string(SUBSTRING ${__def} 3 -1 __strip_def)
+      target_compile_definitions(vta_tsim PUBLIC ${__strip_def})
+    endforeach()
     include_directories("vta/include")
-    target_compile_definitions(vta PUBLIC USE_TSIM)
+    # Set USE_TSIM macro
+    target_compile_definitions(vta_tsim PUBLIC USE_TSIM)
+    if(APPLE)
+      set_target_properties(vta_tsim PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
+    endif(APPLE)
   endif()
 
-  if(APPLE)
-    set_target_properties(vta PROPERTIES LINK_FLAGS "-undefined dynamic_lookup")
-  endif(APPLE)
-
-  # PYNQ rules for Pynq v2.4
-  if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
+  # VTA FPGA driver sources
+  if(USE_VTA_FPGA)
+    file(GLOB FPGA_RUNTIME_SRCS vta/src/*.cc)
+    # Rules for Zynq-class FPGAs with pynq OS support (see pynq.io)
+    if(${VTA_TARGET} STREQUAL "pynq" OR
+       ${VTA_TARGET} STREQUAL "ultra96")
+      file(GLOB FPGA_RUNTIME_SRCS vta/src/pynq/pynq_driver.cc)
+    endif()
+    # Target lib: vta
+    add_library(vta SHARED ${FPGA_RUNTIME_SRCS})
+    target_include_directories(vta PUBLIC vta/include)
+    foreach(__def ${VTA_DEFINITIONS})
+      string(SUBSTRING ${__def} 3 -1 __strip_def)
+      target_compile_definitions(vta PUBLIC ${__strip_def})
+    endforeach()
+    # Rules for Pynq v2.4
     find_library(__cma_lib NAMES cma PATH /usr/lib)
     target_link_libraries(vta ${__cma_lib})
   endif()

diff --git a/docs/api/python/relay/ir_pass.rst b/docs/api/python/relay/ir_pass.rst
diff --git a/docs/api/python/relay/transform.rst b/docs/api/python/relay/transform.rst
@@ -26,6 +26,50 @@ tvm.relay.transform
 
 .. autofunction:: tvm.relay.transform.function_pass
 
+.. autofunction:: tvm.relay.transform.InferType
+
+.. autofunction:: tvm.relay.transform.FoldScaleAxis
+
+.. autofunction:: tvm.relay.transform.BackwardFoldScaleAxis
+
+.. autofunction:: tvm.relay.transform.ForwardFoldScaleAxis
+
+.. autofunction:: tvm.relay.transform.SimplifyInference
+
+.. autofunction:: tvm.relay.transform.CanonicalizeOps
+
+.. autofunction:: tvm.relay.transform.DeadCodeElimination
+
+.. autofunction:: tvm.relay.transform.FoldConstant
+
+.. autofunction:: tvm.relay.transform.FuseOps
+
+.. autofunction:: tvm.relay.transform.CombineParallelConv2D
+
+.. autofunction:: tvm.relay.transform.AlterOpLayout
+
+.. autofunction:: tvm.relay.transform.Legalize
+
+.. autofunction:: tvm.relay.transform.RewriteAnnotatedOps
+
+.. autofunction:: tvm.relay.transform.ToANormalForm
+
+.. autofunction:: tvm.relay.transform.ToCPS
+
+.. autofunction:: tvm.relay.transform.EtaExpand
+
+.. autofunction:: tvm.relay.transform.ToGraphNormalForm
+
+.. autofunction:: tvm.relay.transform.EliminateCommonSubexpr
+
+.. autofunction:: tvm.relay.transform.PartialEvaluate
+
+.. autofunction:: tvm.relay.transform.CanonicalizeCast
+
+.. autofunction:: tvm.relay.transform.LambdaLift
+
+.. autofunction:: tvm.relay.transform.PrintIR
+
 .. autoclass:: tvm.relay.transform.Pass
     :members:
 

diff --git a/docs/dev/index.rst b/docs/dev/index.rst
@@ -26,11 +26,13 @@ In this part of documentation, we share the rationale for the specific choices m
 
    runtime
    debugger
-   nnvm_json_spec
-   nnvm_overview
    hybrid_script
    relay_intro
    relay_add_op
+   relay_pass_infra
    relay_add_pass
+   virtual_machine
    codebase_walkthrough
    inferbound
+   nnvm_json_spec
+   nnvm_overview
diff --git a/docs/vta/install.md b/docs/vta/install.md
@@ -117,6 +117,9 @@ ssh [email protected]
 cd /home/xilinx/tvm
 mkdir build
 cp cmake/config.cmake build/.
+echo 'set(USE_VTA_FSIM OFF)' >> build/config.cmake
+echo 'set(USE_VTA_TSIM OFF)' >> build/config.cmake
+echo 'set(USE_VTA_FPGA ON)' >> build/config.cmake
 # Copy pynq specific configuration
 cp vta/config/pynq_sample.json vta/config/vta_config.json
 cd build

diff --git a/include/tvm/relay/attrs/nn.h b/include/tvm/relay/attrs/nn.h
@@ -411,6 +411,19 @@ struct PadAttrs : public tvm::AttrsNode<PadAttrs> {
   }
 };
 
+/*! \brief Attributes used for the MirrorPadding operator */
+struct MirrorPadAttrs : public tvm::AttrsNode<MirrorPadAttrs> {
+  std::string mode;
+  Array<Array<IndexExpr> > pad_width;
+
+  TVM_DECLARE_ATTRS(MirrorPadAttrs, "relay.attrs.MirrorPadAttrs") {
+    TVM_ATTR_FIELD(mode).set_default("SYMMETRIC")
+      .describe("Specifies how mirroring should be performed.");
+    TVM_ATTR_FIELD(pad_width)
+      .describe("Number of values padded to the edges of each axis, "
+                "in the format of ((before_1, after_1), ..., (before_N, after_N))");
+  }
+};
 
 /*! \brief Attributes for leaky relu operator */
 struct LeakyReluAttrs : public tvm::AttrsNode<LeakyReluAttrs> {

diff --git a/python/tvm/relay/expr_functor.py b/python/tvm/relay/expr_functor.py
@@ -249,7 +249,7 @@ def visit_constructor(self, con):
         return con
 
     def visit_match(self, m):
-        return Match(self.visit(m.data), [Clause(c.lhs, self.visit(c.rhs)) for c in m.pattern])
+        return Match(self.visit(m.data), [Clause(c.lhs, self.visit(c.rhs)) for c in m.clauses])
 
     def visit_ref_create(self, r):
         return RefCreate(self.visit(r.value))

diff --git a/python/tvm/relay/frontend/tensorflow.py b/python/tvm/relay/frontend/tensorflow.py
@@ -448,11 +448,31 @@ def _impl(inputs, attr, params):
 
 def _batch_matmul():
     def _impl(inputs, attr, params):
+        input_x = inputs[0]
+        input_y = inputs[1]
+        orig_shape_x = attr['_input_shapes'][input_x]
+        orig_shape_y = attr['_input_shapes'][input_y]
+
+        # reshape n-dimensional batch matmul into 3d
+        if len(orig_shape_x) > 3:
+            outer_dims = [orig_shape_x[i] for i in range(0, len(orig_shape_x) - 2)]
+            num_outer_elts = np.prod(outer_dims)
+            new_shape_x = (num_outer_elts, orig_shape_x[-2], orig_shape_x[-1])
+            new_shape_y = (num_outer_elts, orig_shape_y[-2], orig_shape_y[-1])
+            input_x = _op.reshape(input_x, newshape=new_shape_x)
+            input_y = _op.reshape(input_y, newshape=new_shape_y)
+
         adj_x = attr['adj_x']
         adj_y = attr['adj_y']
-        input_x = _op.transpose(inputs[0], axes=[0, 2, 1]) if adj_x else inputs[0]
-        input_y = _op.transpose(inputs[1], axes=[0, 2, 1]) if not adj_y else inputs[1]
+        input_x = _op.transpose(input_x, axes=[0, 2, 1]) if adj_x else input_x
+        input_y = _op.transpose(input_y, axes=[0, 2, 1]) if not adj_y else input_y
         ret = get_relay_op('batch_matmul')(input_x, input_y)
+
+        # reshape result back to n-dimensional
+        if len(orig_shape_x) > 3:
+            final_shape = attr['_output_shapes'][0]
+            ret = _op.reshape(ret, newshape=final_shape)
+
         return ret
     return _impl
 
@@ -569,6 +589,44 @@ def _impl(inputs, attr, params):
     return _impl
 
 
+def _space_to_depth():
+    def _impl(inputs, attr, params):
+        # Need to handle data layouts differently.
+        input_shape = attr['_input_shapes'][inputs[0]]
+        block_size = int(attr['block_size'])
+        if attr['data_format'].decode("utf-8") == 'NHWC':
+            in_n, in_h, in_w, in_c = input_shape
+            new_h = int(in_h / block_size)
+            new_w = int(in_w / block_size)
+
+            # First expand input to larger dimension.
+            expanded = _op.reshape(
+                inputs[0], newshape=(in_n, new_h, block_size, new_w, block_size, in_c))
+            # Now reorder to expand spatial blocks.
+            transposed = _op.transpose(expanded, axes=(0, 1, 3, 2, 4, 5))
+            # Finally reshape to proper output.
+            new_c = in_c * block_size * block_size
+            newshape = (in_n, new_h, new_w, new_c)
+
+        else:  # Handle NCHW layout
+            in_n, in_c, in_h, in_w = input_shape
+            new_h = int(in_h / block_size)
+            new_w = int(in_w / block_size)
+
+            expanded = _op.reshape(
+                inputs[0], newshape=(in_n, in_c, new_h, block_size, new_w, block_size))
+            transposed = _op.transpose(expanded, axes=(0, 3, 5, 1, 2, 4))
+            new_c = int(in_c * block_size * block_size)
+            newshape = (in_n, new_c, new_h, new_w)
+
+        return AttrCvt(
+            op_name="reshape",
+            extras={'newshape': newshape},
+            ignores=['data_format', 'block_size'])([transposed], attr)
+
+    return _impl
+
+
 def _bias_add():
     def _impl(inputs, attr, params):
         # Must expand for proper broadcasting in NCHW.
@@ -851,6 +909,19 @@ def _impl(inputs, attr, params):
             ignores=['Tpaddings'],)(new_inputs, attr)
     return _impl
 
+def _mirror_pad():
+    def _impl(inputs, attr, params):
+        padlist = _get_param(params, inputs[1])
+        paddings = tuple(tuple(l) for l in padlist)
+        attr['pad_width'] = paddings
+        mode = attr['mode'].decode('utf-8')
+        attr['mode'] = mode
+        new_inputs = [inputs[0]]
+        return AttrCvt(
+            op_name='mirror_pad',
+            ignores=['Tpaddings'],)(new_inputs, attr)
+    return _impl
+
 def _transpose():
     def _impl(inputs, attr, params):
         # If perm is not specified, axes is left empty,
@@ -1208,6 +1279,7 @@ def _impl(inputs, attr, params):
     'Mean'                              : _mean(),
     'Min'                               : _reduce('min'),
     'Minimum'                           : _elemwise('minimum'),
+    'MirrorPad'                         : _mirror_pad(),
     'Mod'                               : _elemwise('mod'),
     'Mul'                               : _elemwise('multiply'),
     'Neg'                               : AttrCvt('negative'),
@@ -1240,6 +1312,7 @@ def _impl(inputs, attr, params):
     'Softmax'                           : _softmax(),
     'Softplus'                          : _softplus(),
     'SpaceToBatchND'                    : _space_to_batch_nd(),
+    'SpaceToDepth'                      : _space_to_depth(),
     'Split'                             : _split(False),
     'SplitV'                            : _split(True),
     'Sqrt'                              : AttrCvt('sqrt'),