deploy_resnet_on_vta.py

apache · Aug 15, 2019 · ae7fbd8 · ae7fbd8
1 parent c7442a5
commit ae7fbd8
Show file tree

Hide file tree

Showing 2 changed files with 297 additions and 6 deletions.
diff --git a/vta/tutorials/frontend/deploy_resnet_on_vta.py b/vta/tutorials/frontend/deploy_resnet_on_vta.py
@@ -15,12 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 """
-Deploy Pretrained ResNet Model from MxNet on VTA
+Deploy Pretrained Vision Model from MxNet on VTA
 ================================================
 **Author**: `Thierry Moreau <https://homes.cs.washington.edu/~moreau/>`_
 
-This tutorial provides an end-to-end demo, on how to run ResNet-18 inference
-onto the VTA accelerator design to perform ImageNet classification tasks.
+This tutorial provides an end-to-end demo, on how to run ImageNet classification
+inference onto the VTA accelerator design to perform ImageNet classification tasks.
 It showcases Relay as a front end compiler that can perform quantization (VTA
 only supports int8/32 inference) as well as graph packing (in order to enable
 tensorization in the core) to massage the compute graph for the hardware target.
@@ -141,7 +141,7 @@
 ######################################################################
 # Build the inference graph runtime
 # ---------------------------------
-# Grab ResNet-18 model from Gluon model zoo and compile with Relay.
+# Grab vision model from Gluon model zoo and compile with Relay.
 # The compilation steps are:
 #    1) Front end translation from MxNet into Relay module.
 #    2) Apply 8-bit quantization: here we skip the first conv layer,
@@ -156,7 +156,7 @@
 # Load pre-configured AutoTVM schedules
 with autotvm.tophub.context(target):
 
-    # Populate the shape and data type dictionary for ResNet input
+    # Populate the shape and data type dictionary for ImageNet input
     dtype_dict = {"data": 'float32'}
     shape_dict = {"data": (env.BATCH, 3, 224, 224)}
 
@@ -215,7 +215,7 @@
     m = graph_runtime.create(graph, lib, ctx)
 
 ######################################################################
-# Perform ResNet-18 inference
+# Perform image classification
 # ---------------------------
 # We run classification on an image sample from ImageNet
 # We just need to download the categories files, `synset.txt`

diff --git a/vta/tutorials/frontend/deploy_vision_on_vta.py b/vta/tutorials/frontend/deploy_vision_on_vta.py
@@ -0,0 +1,291 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Deploy Pretrained ResNet Model from MxNet on VTA
+================================================
+**Author**: `Thierry Moreau <https://homes.cs.washington.edu/~moreau/>`_
+
+This tutorial provides an end-to-end demo, on how to run ResNet-18 inference
+onto the VTA accelerator design to perform ImageNet classification tasks.
+It showcases Relay as a front end compiler that can perform quantization (VTA
+only supports int8/32 inference) as well as graph packing (in order to enable
+tensorization in the core) to massage the compute graph for the hardware target.
+"""
+
+######################################################################
+# Install dependencies
+# --------------------
+# To use the autotvm package in tvm, we need to install some extra dependencies.
+# (change "3" to "2" if you use python2):
+#
+# .. code-block:: bash
+#
+#   pip3 install --user mxnet requests pillow
+#
+# Now return to the python code. Import packages.
+
+from __future__ import absolute_import, print_function
+
+import argparse, json, os, requests, sys, time
+from io import BytesIO
+from os.path import join, isfile
+from PIL import Image
+
+from mxnet.gluon.model_zoo import vision
+import numpy as np
+from matplotlib import pyplot as plt
+
+import tvm
+from tvm import rpc, autotvm, relay
+from tvm.contrib import graph_runtime, util, download
+from tvm.contrib.debugger import debug_runtime
+from tvm.relay import transform
+
+import vta
+from vta.testing import simulator
+from vta.top import graph_pack
+
+# Make sure that TVM was compiled with RPC=1
+assert tvm.module.enabled("rpc")
+
+######################################################################
+# Define the platform and model targets
+# -------------------------------------
+# Execute on CPU vs. VTA, and define the model.
+
+# Load VTA parameters from the vta/config/vta_config.json file
+env = vta.get_env()
+
+# Set ``device=arm_cpu`` to run inference on the CPU
+# or ``device=vta`` to run inference on the FPGA.
+device = "vta"
+target = env.target if device == "vta" else env.target_vta_cpu
+
+# Dictionary lookup for when to start/end bit packing
+# TODO(zihengjiang, tmoreau89) some quantization will break until #3543 is merged
+pack_dict = {
+    "alexnet": ["nn.max_pool2d", "nn.batch_flatten"],
+    "resnet18_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "resnet34_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "resnet18_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "resnet34_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "resnet50_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "resnet101_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "resnet152_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
+    "vgg11": ["nn.max_pool2d", "nn.batch_flatten"],
+    "vgg13": ["nn.max_pool2d", "nn.batch_flatten"],
+    "vgg16": ["nn.max_pool2d", "nn.batch_flatten"],
+    "vgg19": ["nn.max_pool2d", "nn.batch_flatten"],
+}
+
+# Name of Gluon model to compile
+# The ``start_pack`` and ``stop_pack`` labels indicate where
+# to start and end the graph packing relay pass: in other words
+# where to start and finish offloading to VTA.
+model = "resnet18_v1"
+assert model in pack_dict
+
+######################################################################
+# Obtain an execution remote
+# ---------------------------------
+# When target is 'pynq', reconfigure FPGA and runtime.
+# Otherwise, if target is 'sim', execute locally.
+
+if env.TARGET not in ["sim", "tsim"]:
+
+    # Get remote from tracker node if environment variable is set.
+    # To set up the tracker, you'll need to follow the "Auto-tuning
+    # a convolutional network for VTA" tutorial.
+    tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
+    tracker_port = int(os.environ.get("TVM_TRACKER_PORT", None))
+    # Otherwise if you have a device you want to program directly from
+    # the host, make sure you've set the variables below to the IP of
+    # your board.
+    device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
+    device_port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))
+    if not tracker_host or not tracker_port:
+        remote = rpc.connect(device_host, device_port)
+    else:
+        remote = autotvm.measure.request_remote(env.TARGET, tracker_host, tracker_port, timeout=10000)
+
+    # Reconfigure the JIT runtime and FPGA.
+    # You can program the FPGA with your own custom bitstream
+    # by passing the path to the bitstream file instead of None.
+    reconfig_start = time.time()
+    vta.reconfig_runtime(remote)
+    vta.program_fpga(remote, bitstream=None)
+    reconfig_time = time.time() - reconfig_start
+    print("Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))
+
+# In simulation mode, host the RPC server locally.
+else:
+    remote = rpc.LocalSession()
+
+# Get execution context from remote
+ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
+
+######################################################################
+# Build the inference graph runtime
+# ---------------------------------
+# Grab ResNet-18 model from Gluon model zoo and compile with Relay.
+# The compilation steps are:
+#    1) Front end translation from MxNet into Relay module.
+#    2) Apply 8-bit quantization: here we skip the first conv layer,
+#       and dense layer which will both be executed in fp32 on the CPU.
+#    3) Perform graph packing to alter the data layout for tensorization.
+#    4) Perform constant folding to reduce number of operators (e.g. eliminate
+#       batch norm multiply).
+#    5) Perform relay build to object file.
+#    6) Load the object file onto remote (FPGA device).
+#    7) Generate graph runtime, `m`.
+
+# Load pre-configured AutoTVM schedules
+with autotvm.tophub.context(target):
+
+    # Populate the shape and data type dictionary for ResNet input
+    dtype_dict = {"data": 'float32'}
+    shape_dict = {"data": (env.BATCH, 3, 224, 224)}
+
+    # Get off the shelf gluon model, and convert to relay
+    gluon_model = vision.get_model(model, pretrained=True)
+
+    # Measure build start time
+    build_start = time.time()
+
+    # Start front end compilation
+    mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
+
+    # Update shape and type dictionary
+    shape_dict.update({k: v.shape for k, v in params.items()})
+    dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
+
+    # Perform quantization in Relay
+    with relay.quantize.qconfig(global_scale=8.0,
+                                skip_conv_layers=[0]):
+        relay_prog = relay.quantize.quantize(mod["main"], params=params)
+
+    # Perform graph packing and constant folding for VTA target
+    if target.device_name == "vta":
+        assert env.BLOCK_IN == env.BLOCK_OUT
+        relay_prog = graph_pack(
+            relay_prog,
+            env.BATCH,
+            env.BLOCK_OUT,
+            env.WGT_WIDTH,
+            start_name=pack_dict[model][0],
+            stop_name=pack_dict[model][1])
+
+    # Compile Relay program with AlterOpLayout disabled
+    with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
+        if target.device_name != "vta":
+            graph, lib, params = relay.build(
+                relay_prog, target=target,
+                params=params, target_host=env.target_host)
+        else:
+            with vta.build_config():
+                graph, lib, params = relay.build(
+                    relay_prog, target=target,
+                    params=params, target_host=env.target_host)
+
+    # Measure Relay build time
+    build_time = time.time() - build_start
+    print(model + " inference graph built in {0:.2f}s!".format(build_time))
+
+    # Send the inference library over to the remote RPC server
+    temp = util.tempdir()
+    lib.save(temp.relpath("graphlib.o"))
+    remote.upload(temp.relpath("graphlib.o"))
+    lib = remote.load_module("graphlib.o")
+
+    # Graph runtime
+    m = graph_runtime.create(graph, lib, ctx)
+
+######################################################################
+# Perform ResNet-18 inference
+# ---------------------------
+# We run classification on an image sample from ImageNet
+# We just need to download the categories files, `synset.txt`
+# and an input test image.
+
+# Download ImageNet categories
+categ_url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
+categ_fn = "synset.txt"
+download.download(join(categ_url, categ_fn), categ_fn)
+synset = eval(open(categ_fn).read())
+
+# Download test image
+image_url = 'https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg'
+response = requests.get(image_url)
+
+# Prepare test image for inference
+image = Image.open(BytesIO(response.content)).resize((224, 224))
+plt.imshow(image)
+plt.show()
+image = np.array(image) - np.array([123., 117., 104.])
+image /= np.array([58.395, 57.12, 57.375])
+image = image.transpose((2, 0, 1))
+image = image[np.newaxis, :]
+image = np.repeat(image, env.BATCH, axis=0)
+
+# Set the network parameters and inputs
+m.set_input(**params)
+m.set_input('data', image)
+
+# Perform inference and gather execution statistics
+# More on: https://docs.tvm.ai/api/python/module.html#tvm.module.Module.time_evaluator
+num = 4 # number of times we run module for a single measurement
+rep = 3 # number of measurements (we derive std dev from this)
+timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)
+
+if env.TARGET in ["sim", "tsim"]:
+    simulator.clear_stats()
+    timer()
+    sim_stats = simulator.stats()
+    print("\nExecution statistics:")
+    for k, v in sim_stats.items():
+        # Since we execute the workload many times, we need to normalize stats
+        # Note that there is always one warm up run
+        # Therefore we divide the overall stats by (num * rep + 1)
+        print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
+else:
+    tcost = timer()
+    std = np.std(tcost.results) * 1000 / env.BATCH
+    mean = tcost.mean * 1000 / env.BATCH
+    print("\nPerformed inference in %.2fms/sample (std = %.2f)" % (mean, std))
+
+# Get classification results
+tvm_output = m.get_output(0, tvm.nd.empty((env.BATCH, 1000), "float32", remote.cpu(0)))
+top_categories = np.argsort(tvm_output.asnumpy()[0])
+
+# Report top-5 classification results
+print("\n%s prediction" % model)
+print("\t#1:", synset[top_categories[-1]])
+print("\t#2:", synset[top_categories[-2]])
+print("\t#3:", synset[top_categories[-3]])
+print("\t#4:", synset[top_categories[-4]])
+print("\t#5:", synset[top_categories[-5]])
+
+# This just checks that one of the 5 top categories
+# is one variety of cat; this is by no means an accurate
+# assessment of how quantization affects classification
+# accuracy but is meant to catch changes to the
+# quantization pass that would accuracy in the CI.
+cat_detected = False
+for k in top_categories[-5:]:
+    if "cat" in synset[k]:
+        cat_detected = True
+assert(cat_detected)