[BITSTREAM SERVER] Bitstream server integration (apache#38)

tqchen · Jul 12, 2018 · aa40347 · aa40347
1 parent 2494787
commit aa40347
Show file tree

Hide file tree

Showing 16 changed files with 178 additions and 81 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,12 +1,13 @@
-TVM Change Log
+VTA Change Log
 ==============
 
 This file records the changes in VTA stack in reverse chronological order.
 
 
 ## Initial version
 
-- Vivado based hardware
-- Driver for PYNQ
+- Vivado based hardware.
+- Driver for PYNQ board.
 - Runtime library.
 - TVM compiler stack.
+- Resnet-18 example.
diff --git a/apps/pynq_rpc/README.md b/apps/pynq_rpc/README.md
@@ -31,21 +31,10 @@ From there, clone the VTA repository:
 git clone [email protected]:uwsaml/vta.git --recursive
 ```
 
-Next, clone the TVM repository:
-```bash
-git clone [email protected]:dmlc/tvm.git --recursive
-```
-
-TVM is rapidly changing, and to ensure stability, we keep track of working TVM checkpoints.
-As of now, the TVM checkpoint `e4c2af9abdcb3c7aabafba8084414d7739c17c4c` is known to work with VTA.
-```bash
-git checkout e4c2af9abdcb3c7aabafba8084414d7739c17c4c
-```
-
 Now, ssh into your **Pynq board** to build the TVM runtime with the following commands:
 ```bash
 ssh [email protected] # ssh if you haven't done so
-cd ~/tvm
+cd ~/vta/nnvm/tvm
 cp make/config.mk .
 echo USE_RPC=1 >> config.mk
 make runtime -j2
@@ -57,7 +46,6 @@ We're now ready to build the Pynq RPC server on the Pynq board.
 ```bash
 ssh [email protected] # ssh if you haven't done so
 cd ~/vta
-export TVM_PATH = /home/xilinx/tvm
 make
 ```
 

diff --git a/examples/resnet18/pynq/README.md b/examples/resnet18/pynq/README.md
@@ -4,6 +4,7 @@ In order to run this example you'll need to have:
 * VTA installed
 * TVM installed
 * NNVM installed
+* MxNet installed
 * A Pynq-based RPC server running
 
 ## VTA installation
@@ -26,9 +27,9 @@ git clone [email protected]:dmlc/tvm.git --recursive
 ```
 
 TVM is rapidly changing, and to ensure stability, we keep track of working TVM checkpoints.
-As of now, the TVM checkpoint `e4c2af9abdcb3c7aabafba8084414d7739c17c4c` is known to work with VTA.
+As of now, the TVM checkpoint `168f099155106d1188dbc54ac00acc02900a3c6f` is known to work with VTA.
 ```bash
-git checkout e4c2af9abdcb3c7aabafba8084414d7739c17c4c
+git checkout 168f099155106d1188dbc54ac00acc02900a3c6f
 ```
 
 Before building TVM, copy the `make/config.mk` file into the root TVM directory:
@@ -59,7 +60,7 @@ Clone the NNVM repository from `tqchen` in the directory of your choosing:
 git clone [email protected]:tqchen/nnvm.git --recursive
 ```
 
-To run this example, we rely on a special branch of NNVM: `qt`:
+To run this example, we rely on a special branch of NNVM until these changes get merged back into the main repo: `qt`:
 ```bash
 cd <nnvm root>
 git checkout qt
@@ -76,6 +77,10 @@ Finally update your `~/.bashrc` file to include the NNVM python libraries in you
 export PYTHONPATH=<nnvm root>/python:${PYTHONPATH}
 ```
 
+## MxNet Installation
+
+Follow the [MxNet Installation Instructions](https://mxnet.incubator.apache.org)
+
 ## Pynq RPC Server Setup
                                                        
 Follow the [Pynq RPC Server Guide](https://github.com/uwsaml/vta/tree/master/apps/pynq_rpc/README.md)

diff --git a/examples/resnet18/pynq/imagenet_predict.py b/examples/resnet18/pynq/imagenet_predict.py
@@ -15,8 +15,6 @@
 
 bfactor = 1
 cfactor = 16
-host = "pynq"
-port = 9091
 verbose = False
 # only run fpga component, mark non-conv ops as nop
 debug_fpga_only = False
@@ -27,8 +25,7 @@
 CATEG_FILE = 'synset.txt'
 RESNET_GRAPH_FILE = 'quantize_graph.json'
 RESNET_PARAMS_FILE = 'quantize_params.pkl'
-BITSTREAM_FILE = 'vta.bit'
-for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE, BITSTREAM_FILE]:
+for file in [TEST_FILE, CATEG_FILE, RESNET_GRAPH_FILE, RESNET_PARAMS_FILE]:
     if not os.path.isfile(file):
         print ("Downloading {}".format(file))
         wget.download(url+file)
@@ -43,7 +40,6 @@
 if vta.get_env().TARGET == "sim":
     target_host = "llvm"
 
-
 synset = eval(open(os.path.join(CATEG_FILE)).read())
 image = Image.open(os.path.join(TEST_FILE)).resize((224, 224))
 
@@ -138,8 +134,17 @@ def mark_nop(graph, conv_layer=-1, skip_conv_layer=()):
     remote = rpc.LocalSession()
     print("local session")
 else:
+    host = os.environ.get("VTA_PYNQ_RPC_HOST", None)
+    assert host
+    port = os.environ.get("VTA_PYNQ_RPC_PORT", "9091")
+    port = int(port)
     remote = rpc.connect(host, port)
 
+# Program FPGA, and build runtime if necessary
+# Overwrite bitstream with a path to your own if you built it yourself
+vta.reconfig_runtime(remote)
+vta.program_fpga(remote, bitstream=None)
+
 remote.upload(temp.relpath("graphlib.o"))
 lib = remote.load_module("graphlib.o")
 ctx = remote.ext_dev(0) if target.device_name == "vta" else remote.cpu(0)

diff --git a/hardware/xilinx/Makefile b/hardware/xilinx/Makefile
@@ -45,26 +45,21 @@ VTA_WGT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-wgtbuffsize)
 VTA_ACC_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-accbuffsize)
 VTA_OUT_BUFF_SIZE := $(shell ${VTA_CONFIG} --get-outbuffsize)
 
+#---------------------
+# FPGA Parameters
+#--------------------
+VTA_CLOCK_FREQ = $(shell ${VTA_CONFIG} --get-fpgafreq)
+VTA_TARGET_PER = $(shell ${VTA_CONFIG} --get-fpgaper)
+
 #---------------------
 # Compilation parameters
 #--------------------
 
 #  Number of threads during compilation
 VTA_HW_COMP_THREADS = 8
 
-#  Target Frequency
-VTA_HW_COMP_CLOCK_FREQ = 100
-
-#  Timing closure compensation (0 for none, 3 for highest)
-VTA_HW_COMP_TIMING_COMP = 0
-
-# Derive clock target period
-TARGET_PER = \
-$(shell echo "$$(( (1000 + $(VTA_HW_COMP_CLOCK_FREQ) - 1) / $(VTA_HW_COMP_CLOCK_FREQ) - $(VTA_HW_COMP_TIMING_COMP)))" )
-
 # Derive config name
-CONF_ROOT = $(shell ${VTA_CONFIG} --cfg-str)
-CONF = $(CONF_ROOT)_$(VTA_HW_COMP_CLOCK_FREQ)MHz_$(TARGET_PER)ns
+CONF = $(shell ${VTA_CONFIG} --cfg-str)
 IP_BUILD_PATH = $(BUILD_DIR)/hls/$(CONF)
 HW_BUILD_PATH = $(BUILD_DIR)/vivado/$(CONF)
 
@@ -90,7 +85,7 @@ $(IP_PATH): $(SRC_DIR)/*
 	cd $(IP_BUILD_PATH) && \
 		$(VIVADO_HLS) -f $(SCRIPT_DIR)/hls.tcl \
 		-tclargs $(SRC_DIR) $(SIM_DIR) $(TEST_DIR) $(INCLUDE_DIR) \
-		$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(TARGET_PER) \
+		$(MODE) $(DEBUG) $(NO_DSP) $(NO_ALU) $(VTA_TARGET_PER) \
 		$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_ACC_WIDTH) $(VTA_OUT_WIDTH) \
 		$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \
 		$(VTA_UOP_BUFF_SIZE) $(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) \
@@ -104,7 +99,7 @@ $(BIT_PATH): $(IP_PATH)
 	mkdir -p $(HW_BUILD_PATH)
 	cd $(HW_BUILD_PATH) && \
 		$(VIVADO) -mode tcl -source $(SCRIPT_DIR)/vivado.tcl \
-		-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_HW_COMP_CLOCK_FREQ) \
+		-tclargs $(BUILD_DIR)/hls/$(CONF) $(VTA_HW_COMP_THREADS) $(VTA_CLOCK_FREQ) \
 		$(VTA_INP_WIDTH) $(VTA_WGT_WIDTH) $(VTA_OUT_WIDTH) \
 		$(VTA_BATCH) $(VTA_IN_BLOCK) $(VTA_OUT_BLOCK) \
 		$(VTA_INP_BUFF_SIZE) $(VTA_WGT_BUFF_SIZE) $(VTA_OUT_BUFF_SIZE)

diff --git a/hardware/xilinx/README.md b/hardware/xilinx/README.md
@@ -67,7 +67,5 @@ make
 
 The local `Makefile` containts several variables that can be tweaked by the user:
 * `VTA_HW_COMP_THREADS`: determines the number of threads used for the Vivado compilation job (default 8 threads).
-* `VTA_HW_COMP_CLOCK_FREQ`: determines the target frequency of the VTA design (default 100MHz). It can only be set to 100, 142, 167 or 200MHz.
-* `VTA_HW_COMP_TIMING_COMP`: determines how much additional slack must be provided to close timing (default 0ns). Generally when utilization is high for an FPGA design, setting this paramter to 1, 2 or 3 can help close timing.
 
 Once the compilation completes, the generated bitstream can be found under `<vta root>/build/hardware/xilinx/vivado/<design name>/export/vta.bit`. 
diff --git a/make/config.json b/make/config.json
@@ -1,13 +1,16 @@
 {
   "TARGET" : "pynq",
+  "HW_FREQ" : 100,
+  "HW_CLK_TARGET" : 8,
+  "HW_VER" : "0.0.0",
   "LOG_INP_WIDTH" : 3,
   "LOG_WGT_WIDTH" : 3,
   "LOG_ACC_WIDTH" : 5,
   "LOG_OUT_WIDTH" : 3,
   "LOG_BATCH" : 0,
   "LOG_BLOCK_IN" : 4,
   "LOG_BLOCK_OUT" : 4,
-  "LOG_UOP_BUFF_SIZE" : 14,
+  "LOG_UOP_BUFF_SIZE" : 15,
   "LOG_INP_BUFF_SIZE" : 15,
   "LOG_WGT_BUFF_SIZE" : 18,
   "LOG_ACC_BUFF_SIZE" : 17

diff --git a/make/sim_sample.json b/make/sim_sample.json
@@ -1,13 +1,16 @@
 {
   "TARGET" : "sim",
+  "HW_FREQ" : 100,
+  "HW_CLK_TARGET" : 8,
+  "HW_VER" : "0.0.0",
   "LOG_INP_WIDTH" : 3,
   "LOG_WGT_WIDTH" : 3,
   "LOG_ACC_WIDTH" : 5,
   "LOG_OUT_WIDTH" : 3,
   "LOG_BATCH" : 0,
   "LOG_BLOCK_IN" : 4,
   "LOG_BLOCK_OUT" : 4,
-  "LOG_UOP_BUFF_SIZE" : 14,
+  "LOG_UOP_BUFF_SIZE" : 15,
   "LOG_INP_BUFF_SIZE" : 15,
   "LOG_WGT_BUFF_SIZE" : 18,
   "LOG_ACC_BUFF_SIZE" : 17

diff --git a/make/vta_config.py b/make/vta_config.py
@@ -54,6 +54,10 @@ def main():
                         help="returns log of accum buffer size in B")
     parser.add_argument("--get-outbuffsize", action="store_true",
                         help="returns log of output buffer size in B")
+    parser.add_argument("--get-fpgafreq", action="store_true",
+                        help="returns FPGA frequency")
+    parser.add_argument("--get-fpgaper", action="store_true",
+                        help="returns HLS target clock period")
     args = parser.parse_args()
 
     if len(sys.argv) == 1:
@@ -91,7 +95,8 @@ def main():
         print(pkg.cfg_json)
 
     if args.cfg_str:
-        cfg_str = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}".format(
+        # Needs to match the BITSTREAM string in python/vta/environment.py
+        cfg_str = "{}x{}x{}_{}bx{}b_{}_{}_{}_{}_{}MHz_{}ns_v{}".format(
             (1 << cfg["LOG_BATCH"]),
             (1 << cfg["LOG_BLOCK_IN"]),
             (1 << cfg["LOG_BLOCK_OUT"]),
@@ -100,8 +105,11 @@ def main():
             cfg["LOG_UOP_BUFF_SIZE"],
             cfg["LOG_INP_BUFF_SIZE"],
             cfg["LOG_WGT_BUFF_SIZE"],
-            cfg["LOG_ACC_BUFF_SIZE"])
-        print cfg_str
+            cfg["LOG_ACC_BUFF_SIZE"],
+            cfg["HW_FREQ"],
+            cfg["HW_CLK_TARGET"],
+            cfg["HW_VER"].replace('.', '_'))
+        print(cfg_str)
 
     if args.get_inpwidth:
         print(cfg["LOG_INP_WIDTH"])
@@ -139,5 +147,11 @@ def main():
     if args.get_accbuffsize:
         print(cfg["LOG_ACC_BUFF_SIZE"])
 
+    if args.get_fpgafreq:
+        print(cfg["HW_FREQ"])
+
+    if args.get_fpgaper:
+        print(cfg["HW_CLK_TARGET"])
+
 if __name__ == "__main__":
     main()
diff --git a/python/vta/__init__.py b/python/vta/__init__.py
@@ -8,11 +8,10 @@
 
 __version__ = "0.1.0"
 
-
+from .bitstream import get_bitstream_path, download_bitstream
 from .environment import get_env, Environment
 from .rpc_client import reconfig_runtime, program_fpga
 
-
 try:
     from . import top
     from .build_module import build_config, lower, build

diff --git a/python/vta/bitstream.py b/python/vta/bitstream.py
@@ -0,0 +1,55 @@
+"""VTA specific bitstream management library."""
+from __future__ import absolute_import as _abs
+
+import os
+import urllib
+from .environment import get_env
+
+# bitstream repo
+BITSTREAM_URL = "https://github.com/uwsaml/vta-distro/raw/master/bitstreams/"
+
+def get_bitstream_path():
+    """Returns the path to the cached bitstream corresponding to the current config
+
+    Returns
+    -------
+    bit_path: str
+        Corresponding to the filepath of the bitstream
+    """
+
+    env = get_env()
+
+    # Derive destination path
+    cache_dir = os.getenv("VTA_CACHE_PATH", os.path.join(os.getenv("HOME"), ".vta_cache/"))
+    cache_dir = os.path.join(cache_dir, env.TARGET)
+    # Create the directory if it didn't exist
+    if not os.path.exists(cache_dir):
+        os.makedirs(cache_dir)
+    bit_path = os.path.join(cache_dir, env.BITSTREAM)
+
+    return bit_path
+
+
+def download_bitstream():
+    """Downloads a cached bitstream corresponding to the current config
+    """
+
+    env = get_env()
+
+    success = False
+    bit = get_bitstream_path()
+    url = os.path.join(BITSTREAM_URL, env.TARGET)
+    url = os.path.join(url, env.HW_VER)
+    url = os.path.join(url, env.BITSTREAM)
+    # Check that the bitstream is accessible from the server
+    if urllib.urlopen(url).getcode() == 404:
+        # Raise error - the solution when this happens it to build your own bitstream and add it
+        # to your VTA_CACHE_PATH
+        raise RuntimeError(
+            "Error: {} is not available. It appears that this configuration has not been built."
+            .format(url))
+    else:
+        urllib.urlretrieve(url, bit)
+        success = True
+
+    return success