diff --git a/vta/tutorials/frontend/deploy_resnet_on_vta.py b/vta/tutorials/frontend/deploy_resnet_on_vta.py
deleted file mode 100644
index f14a0e36b0397..0000000000000
--- a/vta/tutorials/frontend/deploy_resnet_on_vta.py
+++ /dev/null
@@ -1,293 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-Deploy Pretrained Vision Model from MxNet on VTA
-================================================
-**Author**: `Thierry Moreau `_
-
-This tutorial provides an end-to-end demo, on how to run ImageNet classification
-inference onto the VTA accelerator design to perform ImageNet classification tasks.
-It showcases Relay as a front end compiler that can perform quantization (VTA
-only supports int8/32 inference) as well as graph packing (in order to enable
-tensorization in the core) to massage the compute graph for the hardware target.
-"""
-
-######################################################################
-# Install dependencies
-# --------------------
-# To use the autotvm package in tvm, we need to install some extra dependencies.
-# (change "3" to "2" if you use python2):
-#
-# .. code-block:: bash
-#
-# pip3 install --user mxnet requests pillow
-#
-# Now return to the python code. Import packages.
-
-from __future__ import absolute_import, print_function
-
-import argparse, json, os, requests, sys, time
-from io import BytesIO
-from os.path import join, isfile
-from PIL import Image
-
-from mxnet.gluon.model_zoo import vision
-import numpy as np
-from matplotlib import pyplot as plt
-
-import tvm
-from tvm import rpc, autotvm, relay
-from tvm.contrib import graph_runtime, util, download
-from tvm.contrib.debugger import debug_runtime
-from tvm.relay import transform
-
-import vta
-from vta.testing import simulator
-from vta.top import graph_pack
-
-# Make sure that TVM was compiled with RPC=1
-assert tvm.module.enabled("rpc")
-
-######################################################################
-# Define the platform and model targets
-# -------------------------------------
-# Execute on CPU vs. VTA, and define the model.
-
-# Load VTA parameters from the vta/config/vta_config.json file
-env = vta.get_env()
-
-# Set ``device=arm_cpu`` to run inference on the CPU
-# or ``device=vta`` to run inference on the FPGA.
-device = "vta"
-target = env.target if device == "vta" else env.target_vta_cpu
-
-# Dictionary lookup for when to start/end bit packing
-# TODO(zihengjiang, tmoreau89) some quantization will break until #3543 is merged
-pack_dict = {
- "alexnet": ["nn.max_pool2d", "nn.batch_flatten"],
- "resnet18_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet34_v1": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet18_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet34_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet50_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet101_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "resnet152_v2": ["nn.max_pool2d", "nn.global_avg_pool2d"],
- "vgg11": ["nn.max_pool2d", "nn.batch_flatten"],
- "vgg13": ["nn.max_pool2d", "nn.batch_flatten"],
- "vgg16": ["nn.max_pool2d", "nn.batch_flatten"],
- "vgg19": ["nn.max_pool2d", "nn.batch_flatten"],
-}
-
-# Name of Gluon model to compile
-# The ``start_pack`` and ``stop_pack`` labels indicate where
-# to start and end the graph packing relay pass: in other words
-# where to start and finish offloading to VTA.
-model = "resnet18_v1"
-assert model in pack_dict
-
-######################################################################
-# Obtain an execution remote
-# ---------------------------------
-# When target is 'pynq', reconfigure FPGA and runtime.
-# Otherwise, if target is 'sim', execute locally.
-
-if env.TARGET not in ["sim", "tsim"]:
-
- # Get remote from tracker node if environment variable is set.
- # To set up the tracker, you'll need to follow the "Auto-tuning
- # a convolutional network for VTA" tutorial.
- tracker_host = os.environ.get("TVM_TRACKER_HOST", None)
- tracker_port = os.environ.get("TVM_TRACKER_PORT", None)
- # Otherwise if you have a device you want to program directly from
- # the host, make sure you've set the variables below to the IP of
- # your board.
- device_host = os.environ.get("VTA_PYNQ_RPC_HOST", "192.168.2.99")
- device_port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
- if not tracker_host or not tracker_port:
- remote = rpc.connect(device_host, int(device_port))
- else:
- remote = autotvm.measure.request_remote(env.TARGET, tracker_host, int(tracker_port), timeout=10000)
-
- # Reconfigure the JIT runtime and FPGA.
- # You can program the FPGA with your own custom bitstream
- # by passing the path to the bitstream file instead of None.
- reconfig_start = time.time()
- vta.reconfig_runtime(remote)
- vta.program_fpga(remote, bitstream=None)
- reconfig_time = time.time() - reconfig_start
- print("Reconfigured FPGA and RPC runtime in {0:.2f}s!".format(reconfig_time))
-
-# In simulation mode, host the RPC server locally.
-else:
- remote = rpc.LocalSession()
-
-# Get execution context from remote
-ctx = remote.ext_dev(0) if device == "vta" else remote.cpu(0)
-
-######################################################################
-# Build the inference graph runtime
-# ---------------------------------
-# Grab vision model from Gluon model zoo and compile with Relay.
-# The compilation steps are:
-# 1) Front end translation from MxNet into Relay module.
-# 2) Apply 8-bit quantization: here we skip the first conv layer,
-# and dense layer which will both be executed in fp32 on the CPU.
-# 3) Perform graph packing to alter the data layout for tensorization.
-# 4) Perform constant folding to reduce number of operators (e.g. eliminate
-# batch norm multiply).
-# 5) Perform relay build to object file.
-# 6) Load the object file onto remote (FPGA device).
-# 7) Generate graph runtime, `m`.
-
-# Load pre-configured AutoTVM schedules
-with autotvm.tophub.context(target):
-
- # Populate the shape and data type dictionary for ImageNet input
- dtype_dict = {"data": 'float32'}
- shape_dict = {"data": (env.BATCH, 3, 224, 224)}
-
- # Get off the shelf gluon model, and convert to relay
- gluon_model = vision.get_model(model, pretrained=True)
-
- # Measure build start time
- build_start = time.time()
-
- # Start front end compilation
- mod, params = relay.frontend.from_mxnet(gluon_model, shape_dict)
-
- # Update shape and type dictionary
- shape_dict.update({k: v.shape for k, v in params.items()})
- dtype_dict.update({k: str(v.dtype) for k, v in params.items()})
-
- # Perform quantization in Relay
- with relay.quantize.qconfig(global_scale=8.0,
- skip_conv_layers=[0]):
- relay_prog = relay.quantize.quantize(mod["main"], params=params)
-
- # Perform graph packing and constant folding for VTA target
- if target.device_name == "vta":
- assert env.BLOCK_IN == env.BLOCK_OUT
- relay_prog = graph_pack(
- relay_prog,
- env.BATCH,
- env.BLOCK_OUT,
- env.WGT_WIDTH,
- start_name=pack_dict[model][0],
- stop_name=pack_dict[model][1])
-
- # Compile Relay program with AlterOpLayout disabled
- with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}):
- if target.device_name != "vta":
- graph, lib, params = relay.build(
- relay_prog, target=target,
- params=params, target_host=env.target_host)
- else:
- with vta.build_config():
- graph, lib, params = relay.build(
- relay_prog, target=target,
- params=params, target_host=env.target_host)
-
- # Measure Relay build time
- build_time = time.time() - build_start
- print(model + " inference graph built in {0:.2f}s!".format(build_time))
-
- # Send the inference library over to the remote RPC server
- temp = util.tempdir()
- lib.save(temp.relpath("graphlib.o"))
- remote.upload(temp.relpath("graphlib.o"))
- lib = remote.load_module("graphlib.o")
-
- # Graph runtime
- m = graph_runtime.create(graph, lib, ctx)
-
-######################################################################
-# Perform image classification
-# ---------------------------
-# We run classification on an image sample from ImageNet
-# We just need to download the categories files, `synset.txt`
-# and an input test image.
-
-# Download ImageNet categories
-categ_url = "https://github.com/uwsaml/web-data/raw/master/vta/models/"
-categ_fn = "synset.txt"
-download.download(join(categ_url, categ_fn), categ_fn)
-synset = eval(open(categ_fn).read())
-
-# Download test image
-image_url = 'https://homes.cs.washington.edu/~moreau/media/vta/cat.jpg'
-response = requests.get(image_url)
-
-# Prepare test image for inference
-image = Image.open(BytesIO(response.content)).resize((224, 224))
-plt.imshow(image)
-plt.show()
-image = np.array(image) - np.array([123., 117., 104.])
-image /= np.array([58.395, 57.12, 57.375])
-image = image.transpose((2, 0, 1))
-image = image[np.newaxis, :]
-image = np.repeat(image, env.BATCH, axis=0)
-
-# Set the network parameters and inputs
-m.set_input(**params)
-m.set_input('data', image)
-
-# Perform inference and gather execution statistics
-# More on: https://docs.tvm.ai/api/python/module.html#tvm.module.Module.time_evaluator
-num = 4 # number of times we run module for a single measurement
-rep = 3 # number of measurements (we derive std dev from this)
-timer = m.module.time_evaluator("run", ctx, number=num, repeat=rep)
-
-if env.TARGET in ["sim", "tsim"]:
- simulator.clear_stats()
- timer()
- sim_stats = simulator.stats()
- print("\nExecution statistics:")
- for k, v in sim_stats.items():
- # Since we execute the workload many times, we need to normalize stats
- # Note that there is always one warm up run
- # Therefore we divide the overall stats by (num * rep + 1)
- print("\t{:<16}: {:>16}".format(k, v // (num * rep + 1)))
-else:
- tcost = timer()
- std = np.std(tcost.results) * 1000
- mean = tcost.mean * 1000
- print("\nPerformed inference in %.2fms (std = %.2f) for %d samples" % (mean, std, env.BATCH))
- print("Average per sample inference time: %.2fms" % (mean/env.BATCH))
-
-# Get classification results
-tvm_output = m.get_output(0, tvm.nd.empty((env.BATCH, 1000), "float32", remote.cpu(0)))
-for b in range(env.BATCH):
- top_categories = np.argsort(tvm_output.asnumpy()[b])
-
- # Report top-5 classification results
- print("\n{} prediction for sample {}".format(model, b))
- print("\t#1:", synset[top_categories[-1]])
- print("\t#2:", synset[top_categories[-2]])
- print("\t#3:", synset[top_categories[-3]])
- print("\t#4:", synset[top_categories[-4]])
- print("\t#5:", synset[top_categories[-5]])
-
- # This just checks that one of the 5 top categories
- # is one variety of cat; this is by no means an accurate
- # assessment of how quantization affects classification
- # accuracy but is meant to catch changes to the
- # quantization pass that would accuracy in the CI.
- cat_detected = False
- for k in top_categories[-5:]:
- if "cat" in synset[k]:
- cat_detected = True
- assert(cat_detected)
diff --git a/vta/tutorials/frontend/deploy_vision_on_vta.py b/vta/tutorials/frontend/deploy_vision_on_vta.py
index 8229de1d11403..431a8f1f88577 100644
--- a/vta/tutorials/frontend/deploy_vision_on_vta.py
+++ b/vta/tutorials/frontend/deploy_vision_on_vta.py
@@ -15,12 +15,12 @@
# specific language governing permissions and limitations
# under the License.
"""
-Deploy Pretrained ResNet Model from MxNet on VTA
+Deploy Pretrained Vision Model from MxNet on VTA
================================================
**Author**: `Thierry Moreau `_
-This tutorial provides an end-to-end demo, on how to run ResNet-18 inference
-onto the VTA accelerator design to perform ImageNet classification tasks.
+This tutorial provides an end-to-end demo, on how to run ImageNet classification
+inference onto the VTA accelerator design to perform ImageNet classification tasks.
It showcases Relay as a front end compiler that can perform quantization (VTA
only supports int8/32 inference) as well as graph packing (in order to enable
tensorization in the core) to massage the compute graph for the hardware target.
@@ -141,7 +141,7 @@
######################################################################
# Build the inference graph runtime
# ---------------------------------
-# Grab ResNet-18 model from Gluon model zoo and compile with Relay.
+# Grab vision model from Gluon model zoo and compile with Relay.
# The compilation steps are:
# 1) Front end translation from MxNet into Relay module.
# 2) Apply 8-bit quantization: here we skip the first conv layer,
@@ -156,7 +156,7 @@
# Load pre-configured AutoTVM schedules
with autotvm.tophub.context(target):
- # Populate the shape and data type dictionary for ResNet input
+ # Populate the shape and data type dictionary for ImageNet classifier input
dtype_dict = {"data": 'float32'}
shape_dict = {"data": (env.BATCH, 3, 224, 224)}
@@ -215,8 +215,8 @@
m = graph_runtime.create(graph, lib, ctx)
######################################################################
-# Perform ResNet-18 inference
-# ---------------------------
+# Perform image classification inference
+# --------------------------------------
# We run classification on an image sample from ImageNet
# We just need to download the categories files, `synset.txt`
# and an input test image.