[VTA][Relay] Relay Compilation + AutoTVM compatible operator librarie…

…s for VTA (#3135)
apache · Jun 28, 2019 · 3818b2a · 3818b2a
1 parent 813a3d5
commit 3818b2a
Show file tree

Hide file tree

Showing 56 changed files with 3,815 additions and 1,186 deletions.
diff --git a/vta/python/vta/top/arm_conv2d.py → apps/pynq_rpc/start_rpc_server_to_tracker.sh b/vta/python/vta/top/arm_conv2d.py → apps/pynq_rpc/start_rpc_server_to_tracker.sh
@@ -1,3 +1,4 @@
+#!/bin/bash
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -14,24 +15,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Reuse conv2d schedule from ARM CPU"""
+PROJROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../" && pwd )"
 
-import tvm
 
-from topi.nn import conv2d, conv2d_alter_layout
-from topi import generic
-
-@conv2d.register(["vtacpu", "vta"])
-def compute(*args, **kwargs):
-    with tvm.target.arm_cpu("vtacpu"):
-        return conv2d(*args, **kwargs)
-
-@generic.schedule_conv2d_nchw.register(["vtacpu", "vta"])
-def schedule(*args, **kwargs):
-    with tvm.target.arm_cpu("vtacpu"):
-        return generic.schedule_conv2d_nchw(*args, **kwargs)
-
-@conv2d_alter_layout.register(["vtacpu", "vta"])
-def alter(*args, **kwargs):
-    with tvm.target.arm_cpu("vtacpu"):
-        return conv2d_alter_layout(*args, **kwargs)
+export PYTHONPATH=${PYTHONPATH}:${PROJROOT}/python:${PROJROOT}/vta/python
+export PYTHONPATH=${PYTHONPATH}:/home/xilinx/pynq
+python3 -m vta.exec.rpc_server --tracker fleet:9190 --key pynq
diff --git a/docs/conf.py b/docs/conf.py
@@ -215,7 +215,10 @@ def run_doxygen(folder):
      '../tutorials/autotvm',
      '../tutorials/dev',
      '../tutorials/topi',
-     '../tutorials/deployment'])
+     '../tutorials/deployment',
+     '../vta/tutorials/frontend',
+     '../vta/tutorials/optimize',
+     '../vta/tutorials/autotvm'])
 
 def generate_doxygen_xml(app):
     """Run the doxygen make commands if we're on the ReadTheDocs server"""

diff --git a/nnvm/python/nnvm/top/nn.py b/nnvm/python/nnvm/top/nn.py
@@ -78,7 +78,7 @@ def schedule_log_softmax(_, outs, target):
 def compute_dense(attrs, inputs, _):
     """Compute definition of dense"""
     if attrs.get_bool("use_bias"):
-        return topi.nn.dense(inputs[0], inputs[1], bias=inputs[2])
+        return topi.nn.dense(inputs[0], inputs[1], inputs[2])
     return topi.nn.dense(inputs[0], inputs[1])
 
 @reg.register_schedule("dense")
@@ -114,25 +114,25 @@ def compute_conv2d(attrs, inputs, _):
     if groups == 1 and layout == 'NCHW4c' and inputs[0].dtype == 'int8':
         # pylint: disable=assignment-from-no-return
         out = topi.nn.conv2d(inputs[0], inputs[1], strides, padding,
-                             dilation, layout, out_dtype=out_dtype)
+                             dilation, layout, out_dtype)
         # pylint: enable=assignment-from-no-return
     elif groups == 1:
         out = topi.nn.conv2d(
-            inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype=out_dtype)
+            inputs[0], inputs[1], strides, padding, dilation, layout, out_dtype)
     elif layout == "NCHW" and \
          groups == get_const_int(inputs[0].shape[1]) and \
          groups == channels:
         out = topi.nn.depthwise_conv2d_nchw(
-            inputs[0], inputs[1], strides, padding, dilation, out_dtype=out_dtype)
+            inputs[0], inputs[1], strides, padding, dilation, out_dtype)
     elif layout in ["NCHW", "NCHW4c"]:
         out = topi.nn.group_conv2d_nchw(inputs[0], inputs[1], strides, padding, dilation, groups,
-                                        out_dtype=out_dtype)
+                                        out_dtype)
     elif layout == "NHWC" and \
          kernel_layout == "HWOI" and \
          groups == get_const_int(inputs[0].shape[3]) and \
          groups == channels:
         out = topi.nn.depthwise_conv2d_nhwc(
-            inputs[0], inputs[1], strides, padding, dilation, out_dtype=out_dtype)
+            inputs[0], inputs[1], strides, padding, dilation, out_dtype)
     else:
         raise ValueError("not support arbitrary group number for now")
 

diff --git a/python/tvm/autotvm/graph_tuner/utils/traverse_graph.py b/python/tvm/autotvm/graph_tuner/utils/traverse_graph.py
@@ -65,18 +65,19 @@ def expr2graph(expr, target_ops, node_dict, node_list):
                                % op_name)
         topi_funcs += OP2COMPUTE[op_name]
     env.reset(topi_funcs)
-    _expr2graph_impl(expr, target_ops, node_dict, node_list)
-    task_pos = 0
-    for node_entry in node_list:
-        if node_entry["op"] in target_ops:
-            task_name, args = env.task_collection[task_pos]
-            task = autotvm.task.create(task_name, args,
-                                       target="llvm",
-                                       target_host=None,
-                                       template_key='direct')
-            node_entry["workloads"] = [task.workload]
-            node_entry["topi_op"] = [task_name]
-            task_pos += 1
+    with env:
+        _expr2graph_impl(expr, target_ops, node_dict, node_list)
+        task_pos = 0
+        for node_entry in node_list:
+            if node_entry["op"] in target_ops:
+                task_name, args = env.task_collection[task_pos]
+                task = autotvm.task.create(task_name, args,
+                                           target="llvm",
+                                           target_host=None,
+                                           template_key='direct')
+                node_entry["workloads"] = [task.workload]
+                node_entry["topi_op"] = [task_name]
+                task_pos += 1
 
 
 def _expr2graph_impl(expr, target_ops, node_dict, node_list):

diff --git a/python/tvm/autotvm/measure/measure_methods.py b/python/tvm/autotvm/measure/measure_methods.py
@@ -86,7 +86,6 @@ def __init__(self, timeout=10, n_parallel=None, build_func='default'):
                 build_func = ndk.create_shared
             else:
                 raise ValueError("Invalid build_func" + build_func)
-
         self.build_func = _wrap_build_func(build_func)
         self.executor = LocalExecutor(timeout=timeout)
         self.tmp_dir = tempfile.mkdtemp()
@@ -360,8 +359,14 @@ def _build_func_common(measure_input, check_gpu=None, cuda_arch=None, build_opti
         if cuda_arch:
             set_cuda_target_arch(cuda_arch)
 
-        with build_config(**opts):
-            func = build(s, args, target_host=task.target_host)
+        # if target is vta, we need to use vta build
+        if hasattr(measure_input.target, 'device_name') and \
+            measure_input.target.device_name == 'vta':
+            import vta
+            func = vta.build(s, args, target_host=task.target_host)
+        else:
+            with build_config(**opts):
+                func = build(s, args, target_host=task.target_host)
     return func, tuple((get_const_tuple(x.shape), x.dtype) for x in args)
 
 
@@ -452,6 +457,12 @@ def run_through_rpc(measure_input, build_result,
     try:
         # upload built module
         remote = request_remote(*remote_args)
+        # Program the FPGA every single time when targeting VTA
+        if hasattr(measure_input.target, 'device_name') and \
+            measure_input.target.device_name == 'vta':
+            from vta import program_fpga, reconfig_runtime
+            program_fpga(remote, None)
+            reconfig_runtime(remote)
         remote.upload(build_result.filename)
         func = remote.load_module(os.path.split(build_result.filename)[1])
         ctx = remote.context(str(measure_input.target), 0)

diff --git a/python/tvm/autotvm/task/nnvm_integration.py b/python/tvm/autotvm/task/nnvm_integration.py
@@ -19,23 +19,22 @@
 Decorator and utilities for the integration with TOPI and NNVM
 
 """
+import threading
 import warnings
 import logging
 
 
-from ... import target as _target
-
 from .task import create
 from .topi_integration import TaskExtractEnv
 
 logger = logging.getLogger('autotvm')
 
 
-def extract_from_graph(graph, shape, dtype, target, symbols, target_host=None):
+def extract_from_graph(graph, shape, dtype, target, symbols, params=None, target_host=None):
     """ Extract tuning tasks from a nnvm graph.
 
     This function collects tuning tasks by building the graph
-    with a "tracing" target and tracing all the calls to topi.
+    and trace all the calls to topi.
 
     Parameters
     ----------
@@ -49,6 +48,8 @@ def extract_from_graph(graph, shape, dtype, target, symbols, target_host=None):
         The compilation target
     symbols : Array of nnvm.symbol
         Array of nnvm symbols want to be tuned
+    params : dict of str to NDArray
+        The parameter dictionary.
     target_host: tvm.target.Target
         The host compilation target
 
@@ -63,8 +64,8 @@ def extract_from_graph(graph, shape, dtype, target, symbols, target_host=None):
 
     env = TaskExtractEnv.get()
 
-    #NOTE: To add more symbols, you only need to change the following lists
-    #nnvm symbol -> topi compute
+    # NOTE: To add more symbols, you only need to change the following lists
+    # nnvm symbol -> topi compute
     SYMBOL2TOPI = {
         nnvm.sym.conv2d: [topi.nn.conv2d, topi.nn.depthwise_conv2d_nchw,
                           topi.nn.group_conv2d_nchw],
@@ -81,29 +82,40 @@ def extract_from_graph(graph, shape, dtype, target, symbols, target_host=None):
 
     # run compiler to collect all TOPI calls during compilation
     env.reset(topi_funcs)
-
-    # disable logger temporarily
-    old_state = logger.disabled
-    logger.disabled = True
-
-    # use a "tracing" target to do a fake compile for collecting topi calls
-    tracing_target = _target.create("llvm -device=tracing")
-    nnvm.compiler.engine.clear_cache()
-    nnvm.compiler.build(graph, target=tracing_target, shape=shape, dtype=dtype)
-
-    logger.disabled = old_state
+    with env:
+        # disable logger temporarily
+        old_state = logger.disabled
+        logger.disabled = True
+
+        nnvm.compiler.engine.clear_cache()
+        # wrap build call in thread to avoid multiprocessing problems
+        build_thread = threading.Thread(target=nnvm.compiler.build,
+                                        args=(graph,
+                                              target,
+                                              shape,
+                                              dtype,
+                                              params,
+                                              target_host))
+        build_thread.start()
+        build_thread.join()
+
+        logger.disabled = old_state
 
     # create tasks for target
     tasks = []
     for task_name, args in env.get_tasks():
-        tasks.append(create(task_name, args,
-                            target=target, target_host=target_host,
-                            template_key='direct'))
+        try:
+            tsk = create(task_name, args,
+                         target=target, target_host=target_host,
+                         template_key='direct')
+            tasks.append(tsk)
+        except topi.InvalidShapeError:
+            print("[Warning] Invalid shape during AutoTVM task creation")
 
     return tasks
 
 
-def extract_from_multiple_graph(graphs, shapes, dtypes, target, symbols, target_host=None):
+def extract_from_multiple_graph(graphs, shapes, dtypes, target, symbols, params, target_host=None):
     """ Extract tuning tasks from multiple nnvm graphs.
 
     This function is the multiple graph version of extract_from_graph
@@ -120,6 +132,8 @@ def extract_from_multiple_graph(graphs, shapes, dtypes, target, symbols, target_
         The compilation target
     symbols : Array of nnvm.symbol
         Array of nnvm symbols want to be tuned
+    params : dict of str to NDArray
+        The parameter dictionary.
     target_host: tvm.target.Target
         The host compilation target
 
@@ -152,25 +166,35 @@ def extract_from_multiple_graph(graphs, shapes, dtypes, target, symbols, target_
 
     # run compiler to collect all TOPI calls during compilation
     env.reset(topi_funcs)
-
-    # disable logger temporarily
-    old_state = logger.disabled
-    logger.disabled = True
-
-    # use a "tracing" target to do a fake compile for collecting topi calls
-    tracing_target = _target.create("llvm -device=tracing")
-
-    nnvm.compiler.engine.clear_cache()
-    for graph, shape, dtype in zip(graphs, shapes, dtypes):
-        nnvm.compiler.build(graph, target=tracing_target, shape=shape, dtype=dtype)
-
-    logger.disabled = old_state
+    with env:
+        # disable logger temporarily
+        old_state = logger.disabled
+        logger.disabled = True
+
+        for graph, shape, dtype in zip(graphs, shapes, dtypes):
+            nnvm.compiler.engine.clear_cache()
+            # wrap build call in thread to avoid multiprocessing problems
+            build_thread = threading.Thread(target=nnvm.compiler.build,
+                                            args=(graph,
+                                                  target,
+                                                  shape,
+                                                  dtype,
+                                                  params,
+                                                  target_host))
+            build_thread.start()
+            build_thread.join()
+
+        logger.disabled = old_state
 
     # create tasks for target
     tasks = []
     for task_name, args in env.get_tasks():
-        tasks.append(create(task_name, args,
-                            target=target, target_host=target_host,
-                            template_key='direct'))
+        try:
+            tsk = create(task_name, args,
+                         target=target, target_host=target_host,
+                         template_key='direct')
+            tasks.append(tsk)
+        except topi.InvalidShapeError:
+            print("[Warning] Invalid shape during AutoTVM task creation")
 
     return tasks