From 978c739370437b60307ac010acd0629b099ced94 Mon Sep 17 00:00:00 2001
From: Thierry Moreau <moreau@cs.washington.edu>
Date: Sun, 22 Jul 2018 21:54:45 -0700
Subject: [PATCH] [DOC, HARDWARE] Hardware developer guide, migrating to use
 Vivado 2018.2 (#1473)

---
 hardware/xilinx/scripts/vivado.tcl | 33 ++++++++++++++++--------------
 tutorials/convolution_opt.py       |  4 ++--
 tutorials/matrix_multiply.py       | 10 ++++-----
 tutorials/matrix_multiply_opt.py   |  4 ++--
 tutorials/resnet.py                |  4 ++--
 tutorials/vta_get_started.py       | 10 ++++-----
 6 files changed, 34 insertions(+), 31 deletions(-)
diff --git a/hardware/xilinx/scripts/vivado.tcl b/hardware/xilinx/scripts/vivado.tcl
index 58a1f5008e6d..b519d540bd4e 100644
--- a/hardware/xilinx/scripts/vivado.tcl
+++ b/hardware/xilinx/scripts/vivado.tcl
@@ -6,7 +6,7 @@
 #
 
 # Check if script is running in correct Vivado version.
-set scripts_vivado_version 2017.1
+set scripts_vivado_version 2018.2
 set current_vivado_version [version -short]
 
 if { [string first $scripts_vivado_version $current_vivado_version] == -1 } {
@@ -53,7 +53,8 @@ if { [llength $argv] eq 12 } {
   }
 } else {
   puts "Arg list incomplete: <path to ip dir> <num threads> <clock freq> \
-    <inp width> <wgt_width> <out_width> <batch> <in_block / 1024> <out_block>"
+    <inp width> <wgt_width> <out_width> <batch> <batch> <out_block> <in_block
+    <inp_mem_size> <wgt_mem_size> <out_mem_size>"
   return 1
 }
 
@@ -66,6 +67,7 @@ if {[expr $inp_part == 0]} {
   set inp_bus_width $inp_mem_width
 }
 set inp_mem_depth [expr $inp_mem_size * 8 / ($inp_mem_width * $inp_part)]
+
 # Derive weight mem parameters
 set wgt_mem_width [expr $wgt_width * $out_block * $in_block]
 set wgt_bus_width 1024
@@ -75,6 +77,7 @@ if {[expr $wgt_part == 0]} {
   set wgt_bus_width $wgt_mem_width
 }
 set wgt_mem_depth [expr $wgt_mem_size * 8 / ($wgt_mem_width * $wgt_part)]
+
 # Derive output mem parameters
 set out_mem_width [expr $out_width * $batch * $out_block]
 set out_bus_width 1024
@@ -252,7 +255,7 @@ proc create_root_design { parentCell clk inp_part wgt_part out_part inp_bus_widt
   ] $fetch_0
 
   # Create instance: g2l_queue, and set properties
-  set g2l_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 g2l_queue ]
+  set g2l_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 g2l_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {1022} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -273,7 +276,7 @@ proc create_root_design { parentCell clk inp_part wgt_part out_part inp_bus_widt
   ] $g2l_queue
 
   # Create instance: g2s_queue, and set properties
-  set g2s_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 g2s_queue ]
+  set g2s_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 g2s_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {1022} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -294,7 +297,7 @@ proc create_root_design { parentCell clk inp_part wgt_part out_part inp_bus_widt
   ] $g2s_queue
 
   # Create instance: gemm_queue, and set properties
-  set gemm_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 gemm_queue ]
+  set gemm_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 gemm_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {510} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -318,7 +321,7 @@ proc create_root_design { parentCell clk inp_part wgt_part out_part inp_bus_widt
   ] $gemm_queue
 
   # Create instance: l2g_queue, and set properties
-  set l2g_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 l2g_queue ]
+  set l2g_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 l2g_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {1022} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -345,7 +348,7 @@ proc create_root_design { parentCell clk inp_part wgt_part out_part inp_bus_widt
   ] $load_0
 
   # Create instance: load_queue, and set properties
-  set load_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 load_queue ]
+  set load_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 load_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {510} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -406,7 +409,7 @@ proc create_root_design { parentCell clk inp_part wgt_part out_part inp_bus_widt
   ] $processing_system7_1
 
   # Create instance: s2g_queue, and set properties
-  set s2g_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 s2g_queue ]
+  set s2g_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 s2g_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {1022} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -433,7 +436,7 @@ CONFIG.C_M_AXI_DATA_PORT_CACHE_VALUE {"1111"} \
   ] $store_0
 
   # Create instance: store_queue, and set properties
-  set store_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.1 store_queue ]
+  set store_queue [ create_bd_cell -type ip -vlnv xilinx.com:ip:fifo_generator:13.2 store_queue ]
   set_property -dict [ list \
     CONFIG.Empty_Threshold_Assert_Value_axis {510} \
     CONFIG.Empty_Threshold_Assert_Value_rach {14} \
@@ -466,7 +469,7 @@ CONFIG.NUM_PORTS {5} \
   if {${inp_part} > 1} {
     for {set i 0} {$i < ${inp_part}} {incr i} {
       # Create instance: inp_mem, and set properties
-      set inp_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.3 inp_mem_${i} ]
+      set inp_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 inp_mem_${i} ]
       set_property -dict [ list \
         CONFIG.Byte_Size {8} \
         CONFIG.Enable_32bit_Address {true} \
@@ -494,7 +497,7 @@ CONFIG.NUM_PORTS {5} \
     }
   } else {
       # Create instance: inp_mem, and set properties
-      set inp_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.3 inp_mem ]
+      set inp_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 inp_mem ]
       set_property -dict [ list \
         CONFIG.Byte_Size {8} \
         CONFIG.Enable_32bit_Address {true} \
@@ -525,7 +528,7 @@ CONFIG.NUM_PORTS {5} \
   if {${wgt_part} > 1} {
     for {set i 0} {$i < ${wgt_part}} {incr i} {
       # Create instance: wgt_mem, and set properties
-      set wgt_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.3 wgt_mem_${i} ]
+      set wgt_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 wgt_mem_${i} ]
       set_property -dict [ list \
         CONFIG.Assume_Synchronous_Clk {true} \
         CONFIG.Byte_Size {8} \
@@ -553,7 +556,7 @@ CONFIG.NUM_PORTS {5} \
     }
   } else {
       # Create instance: wgt_mem, and set properties
-      set wgt_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.3 wgt_mem ]
+      set wgt_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 wgt_mem ]
       set_property -dict [ list \
         CONFIG.Assume_Synchronous_Clk {true} \
         CONFIG.Byte_Size {8} \
@@ -584,7 +587,7 @@ CONFIG.NUM_PORTS {5} \
   if {${out_part} > 1} {
     for {set i 0} {$i < ${out_part}} {incr i} {
       # Create instance: out_mem, and set properties
-      set out_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.3 out_mem_${i} ]
+      set out_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 out_mem_${i} ]
       set_property -dict [ list \
         CONFIG.Byte_Size {8} \
         CONFIG.Enable_32bit_Address {true} \
@@ -612,7 +615,7 @@ CONFIG.NUM_PORTS {5} \
     }
   } else {
       # Create instance: out_mem, and set properties
-      set out_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.3 out_mem ]
+      set out_mem [ create_bd_cell -type ip -vlnv xilinx.com:ip:blk_mem_gen:8.4 out_mem ]
       set_property -dict [ list \
         CONFIG.Byte_Size {8} \
         CONFIG.Enable_32bit_Address {true} \
diff --git a/tutorials/convolution_opt.py b/tutorials/convolution_opt.py
index e581b0c4082f..8e4b77d8b491 100644
--- a/tutorials/convolution_opt.py
+++ b/tutorials/convolution_opt.py
@@ -30,7 +30,7 @@
 from tvm.contrib import util
 from vta.testing import simulator
 
-# Load VTA parameters from the config.json file
+# Load VTA parameters from the vta/config/vta_config.json file
 env = vta.get_env()
 
 # We read the Pynq RPC host IP address and port number from the OS environment
@@ -38,7 +38,7 @@
 port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))
 
 # We configure both the bitstream and the runtime system on the Pynq
-# to match the VTA configuration specified by the config.json file.
+# to match the VTA configuration specified by the vta_config.json file.
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
diff --git a/tutorials/matrix_multiply.py b/tutorials/matrix_multiply.py
index f7e9e9f66f2d..4c8e716ff665 100644
--- a/tutorials/matrix_multiply.py
+++ b/tutorials/matrix_multiply.py
@@ -26,7 +26,7 @@
 from tvm.contrib import util
 from vta.testing import simulator
 
-# Load VTA parameters from the config.json file
+# Load VTA parameters from the vta/config/vta_config.json file
 env = vta.get_env()
 
 # We read the Pynq RPC host IP address and port number from the OS environment
@@ -34,7 +34,7 @@
 port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))
 
 # We configure both the bitstream and the runtime system on the Pynq
-# to match the VTA configuration specified by the config.json file.
+# to match the VTA configuration specified by the vta_config.json file.
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
@@ -95,7 +95,7 @@
 #        :width: 480px
 #
 #   The dimensions of that matrix-matrix multiplication are specified in
-#   the :code:`config.json` configuration file.
+#   the :code:`vta_config.json` configuration file.
 #   The activation matrix has a :code:`(BATCH, BLOCK_IN)` shape
 #   and the transposed weight matrix has a :code:`(BLOCK_OUT, BLOCK_IN)` shape,
 #   thus inferring that the resulting output matrix has a
@@ -131,7 +131,7 @@
 #   dimension of VTA's tensor core, but also to match the specific data types
 #   expected by VTA.
 #   VTA for now only supports fixed point data types, which integer width is
-#   specified in the :code:`config.json` file by :code:`INP_WIDTH` and
+#   specified in the :code:`vta_config.json` file by :code:`INP_WIDTH` and
 #   :code:`WGT_WIDTH` for the activations and weights data types respectively.
 #   In addition, the accumulator data type integer width is specified by
 #   :code:`ACC_WIDTH`.
@@ -284,7 +284,7 @@
 #      that stores input matrices of shape :code:`(env.BATCH, env.BLOCK_IN)`
 #      of type :code:`env.inp_dtype`. The input buffer contains
 #      `2 ^ LOG_INP_BUFF_SIZE` matrix elements (as specified in the
-#      :code:`config.json` file).
+#      :code:`vta_config.json` file).
 #    - :code:`env.wgt_scope`: Weight buffer, which is a read-only SRAM buffer
 #      that stores weight matrices of shape :code:`(env.BLOCK_OUT, env.BLOCK_IN)`
 #      of type :code:`env.wgt_dtype`. The weight buffer contains
diff --git a/tutorials/matrix_multiply_opt.py b/tutorials/matrix_multiply_opt.py
index 0150f1ed460b..4e1f4167f84b 100644
--- a/tutorials/matrix_multiply_opt.py
+++ b/tutorials/matrix_multiply_opt.py
@@ -29,7 +29,7 @@
 from tvm.contrib import util
 from vta.testing import simulator
 
-# Load VTA parameters from the config.json file
+# Load VTA parameters from the vta/config/vta_config.json file
 env = vta.get_env()
 
 # We read the Pynq RPC host IP address and port number from the OS environment
@@ -37,7 +37,7 @@
 port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))
 
 # We configure both the bitstream and the runtime system on the Pynq
-# to match the VTA configuration specified by the config.json file.
+# to match the VTA configuration specified by the vta_config.json file.
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
diff --git a/tutorials/resnet.py b/tutorials/resnet.py
index d33085c8dc73..7a2b0ab50925 100644
--- a/tutorials/resnet.py
+++ b/tutorials/resnet.py
@@ -38,7 +38,7 @@
 from matplotlib import pyplot as plt
 from PIL import Image
 
-# Load VTA parameters from the config.json file
+# Load VTA parameters from the vta/config/vta_config.json file
 env = vta.get_env()
 
 # Helper to crop an image to a square (224, 224)
@@ -180,7 +180,7 @@ def generate_graph(graph_fn, params_fn, device="vta"):
 port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))
 
 # We configure both the bitstream and the runtime system on the Pynq
-# to match the VTA configuration specified by the config.json file.
+# to match the VTA configuration specified by the vta_config.json file.
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
diff --git a/tutorials/vta_get_started.py b/tutorials/vta_get_started.py
index 67018372fba6..73f6e2bd5472 100644
--- a/tutorials/vta_get_started.py
+++ b/tutorials/vta_get_started.py
@@ -29,12 +29,12 @@
 # VTA is a modular and customizable design. Consequently, the user
 # is free to modify high-level hardware parameters that affect
 # the hardware design layout.
-# These parameters are specified in the :code:`config.json` file by their
+# These parameters are specified in the :code:`vta_config.json` file by their
 # :code:`log2` values.
 # These VTA parameters can be loaded with the :code:`vta.get_env`
 # function.
 #
-# Finally, the TVM target is specified in the :code:`config.json` file.
+# Finally, the TVM target is also specified in the :code:`vta_config.json` file.
 # When set to *sim*, execution will take place inside of a behavioral
 # VTA simulator.
 # If you want to run this tutorial on the Pynq FPGA development platform,
@@ -58,7 +58,7 @@
 port = int(os.environ.get("VTA_PYNQ_RPC_PORT", "9091"))
 
 # We configure both the bitstream and the runtime system on the Pynq
-# to match the VTA configuration specified by the config.json file.
+# to match the VTA configuration specified by the vta_config.json file.
 if env.TARGET == "pynq":
 
     # Make sure that TVM was compiled with RPC=1
@@ -110,11 +110,11 @@
 # For VTA's general purpose operations such as vector adds, the tile size is
 # :code:`(env.BATCH, env.BLOCK_OUT)`.
 # The dimensions are specified in
-# the :code:`config.json` configuration file and are set by default to
+# the :code:`vta_config.json` configuration file and are set by default to
 # a (1, 16) vector.
 #
 # In addition, A and B's data types also needs to match the :code:`env.acc_dtype`
-# which is set by the :code:`config.json` file to be a 32-bit integer.
+# which is set by the :code:`vta_config.json` file to be a 32-bit integer.
 
 # Output channel factor m - total 64 x 16 = 1024 output channels
 m = 64