Skip to content

Commit

Permalink
[VTA] Refactor to increase platform coverage (Ultra96 etc.) (apache#3496
Browse files Browse the repository at this point in the history
)

* hardware refactor for increased FPGA coverage, small optimizations

* fix header

* cleaning up parameters that won't be needed for now

* streamlining makefile, and simplifying tcl scripts

* moving parameter derivation into pkg_config.py, keeping tcl scripts lightweight

* refactoring tcl script to avoid global variables

* deriving AXI signals in pkg_config.py

* unifying address map definition for hardware and software drivers

* single channel design for ultra96 to simplify build

* enable alu by default, no mul opcode for now

* hardware fix

* new bitstream; vta version

* avoid error when env variable is not set

* ultra96 cleanup

* further cleaning up tcl script for bitstream generation

* preliminary rpc server support on ultra96

* rpc server tracker scripts

* ultra96 ldflag

* ultra96 support

* ultra96 support

* cleanup line

* cmake support for ultra96

* simplify memory instantiation

* cleaning up IP parameter initialization

* fix queue instantiation

* 2019.1 transition

* fix macro def

* removing bus width from config

* cleanup

* fix

* turning off testing for now

* cleanup ultra96 ps insantiation

* minor refactor

* adding comments

* upgrading to tophub v0.6

* model used in TVM target now refers to a specific version of VTA for better autoTVM scheduling

* revert change due to bug

* rename driver files to be for zynq-type devices

* streamlining address mapping

* unifying register map offset values between driver and hardware generator

* rely on cma library for cache flush/invalidation

* coherence management

* not make buffer packing depend on data types that can be wider than 64bits

* refactor config derivation to minimize free parameters

* fix environment/pkg config interaction

* adding cfg dump property to pkgconfig:

* fix rpc reconfig

* fix spacing

* cleanup

* fix spacing

* long line fix

* fix spacing and lint

* fix line length

* cmake fix

* environment fix

* renaming after pynq since the driver stack relies on the pynq library - see pynq.io

* update doc

* adding parameterization to  name

* space

* removing reg width

* vta RPC

* update doc on how to edit vta_config.json

* fix path

* fix path
  • Loading branch information
tmoreau89 authored and wweic committed Sep 6, 2019
1 parent d0ec91d commit 427afd4
Show file tree
Hide file tree
Showing 33 changed files with 1,610 additions and 2,329 deletions.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
# under the License.
PROJROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../" && pwd )"

# Derive target specified by vta_config.json
VTA_CONFIG=${PROJROOT}/vta/config/vta_config.py
TARGET=$(python ${VTA_CONFIG} --target)

export PYTHONPATH=${PYTHONPATH}:${PROJROOT}/python:${PROJROOT}/vta/python
export PYTHONPATH=${PYTHONPATH}:/home/xilinx/pynq
python3 -m vta.exec.rpc_server --tracker fleet:9190 --key pynq
python3 -m vta.exec.rpc_server --tracker fleet:9190 --key $TARGET
17 changes: 12 additions & 5 deletions cmake/modules/VTA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,25 @@ elseif(PYTHON)
string(REGEX MATCHALL "(^| )-D[A-Za-z0-9_=.]*" VTA_DEFINITIONS "${__vta_defs}")

file(GLOB VTA_RUNTIME_SRCS vta/src/*.cc)
file(GLOB __vta_target_srcs vta/src/${VTA_TARGET}/*.cc)
# Add sim driver sources
if(${VTA_TARGET} STREQUAL "sim")
file(GLOB __vta_target_srcs vta/src/sim/*.cc)
endif()
# Add pynq driver sources
if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
file(GLOB __vta_target_srcs vta/src/pynq/*.cc)
endif()
list(APPEND VTA_RUNTIME_SRCS ${__vta_target_srcs})

add_library(vta SHARED ${VTA_RUNTIME_SRCS})

# Add tsim driver sources
if(${VTA_TARGET} STREQUAL "tsim")
target_compile_definitions(vta PUBLIC USE_TSIM)
include_directories("vta/include")
file(GLOB RUNTIME_DPI_SRCS vta/src/dpi/module.cc)
list(APPEND RUNTIME_SRCS ${RUNTIME_DPI_SRCS})
endif()

add_library(vta SHARED ${VTA_RUNTIME_SRCS})

target_include_directories(vta PUBLIC vta/include)

foreach(__def ${VTA_DEFINITIONS})
Expand All @@ -62,7 +69,7 @@ elseif(PYTHON)
endif(APPLE)

# PYNQ rules for Pynq v2.4
if(${VTA_TARGET} STREQUAL "pynq")
if(${VTA_TARGET} STREQUAL "pynq" OR ${VTA_TARGET} STREQUAL "ultra96")
find_library(__cma_lib NAMES cma PATH /usr/lib)
target_link_libraries(vta ${__cma_lib})
endif()
Expand Down
23 changes: 5 additions & 18 deletions docs/vta/dev/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ below.
+=======================+============+========================================================+
| ``TARGET`` | String | The TVM device target. |
+-----------------------+------------+--------------------------------------------------------+
| ``HW_TARGET`` | Int | FPGA frequency in MHz. |
+-----------------------+------------+--------------------------------------------------------+
| ``HW_CLK_TARGET`` | Int | FPGA clock period in ns target for HLS tool. |
+-----------------------+------------+--------------------------------------------------------+
| ``HW_VER`` | String | VTA hardware version number. |
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_INP_WIDTH`` | Int (log2) | Input data type signed integer width. |
Expand All @@ -48,13 +44,9 @@ below.
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_ACC_WIDTH`` | Int (log2) | Accumulator data type signed integer width. |
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_OUT_WIDTH`` | Int (log2) | Output data type signed integer width. |
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_BATCH`` | Int (log2) | VTA matrix multiply intrinsic output dimension 0. |
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_BLOCK_IN`` | Int (log2) | VTA matrix multiply reduction dimension. |
| ``LOG_BATCH`` | Int (log2) | VTA matrix multiply intrinsic input/output dimension 0.|
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_BLOCK_OUT`` | Int (log2) | VTA matrix multiply intrinsic output dimension 1. |
| ``LOG_BLOCK`` | Int (log2) | VTA matrix multiply inner dimensions. |
+-----------------------+------------+--------------------------------------------------------+
| ``LOG_UOP_BUFF_SIZE`` | Int (log2) | Micro-op on-chip buffer in Bytes. |
+-----------------------+------------+--------------------------------------------------------+
Expand All @@ -75,13 +67,8 @@ below.

We provide additional detail below regarding each parameter:

- ``TARGET``: Can be set to ``"pynq"`` or ``"sim"``.
- ``HW_TARGET``: In pynq mode, can be set to ``100``, ``142``, ``167``, or ``200`` MHz.
- ``HW_CLK_TARGET``: The lower the target, the more pipeline stages HLS will insert to achieve timing closure during place and route (this can also slightly decrease performance).
- ``TARGET``: Can be set to ``"pynq"``, ``"ultra96"``, ``"sim"`` (fast simulator), or ``"tsim"`` (cycle accurate sim with verilator).
- ``HW_VER``: Hardware version which increments everytime the VTA hardware design changes. This parameter is used to uniquely idenfity hardware bitstreams.
- ``LOG_OUT_WIDTH``: We recommend matching ``LOG_OUT_WIDTH`` to ``LOG_INP_WIDTH``.
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension.
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension.
- ``LOG_BLOCK_IN``: Equivalent to B in multiplication of shape (A, B) x (B, C), or typically, the input channel dimension.
- ``LOG_BLOCK_OUT``: Equivalent to C in multiplication of shape (A, B) x (B, C), or typically, the output channel dimension.
- ``LOG_BATCH``: Equivalent to A in multiplication of shape (A, B) x (B, C), or typically, the batch dimension of inner tensor computation.
- ``LOG_BLOCK``: Equivalent to B and C in multiplication of shape (A, B) x (B, C), or typically, the input/output channel dimensions of the innter tensor computation.

25 changes: 12 additions & 13 deletions docs/vta/install.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ To do so,

```bash
cd <tvm root>
cp vta/config/vta_config.json vta_config.json
vim vta/config/vta_config.json
# edit vta_config.json
make vta
```
Expand Down Expand Up @@ -118,7 +118,7 @@ cd /home/xilinx/tvm
mkdir build
cp cmake/config.cmake build/.
# Copy pynq specific configuration
cp vta/config/pynq_sample.json build/vta_config.json
cp vta/config/pynq_sample.json vta/config/vta_config.json
cd build
cmake ..
make runtime vta -j2
Expand Down Expand Up @@ -147,13 +147,12 @@ export VTA_PYNQ_RPC_PORT=9091
```

In addition, you'll need to edit the `vta_config.json` file on the host to indicate that we are targeting the Pynq platform, by setting the `TARGET` field to `"pynq"`.
Alternatively, you can copy the default `vta/config/pynq_sample.json` into the TVM root as `vta_config.json`.
> Note: in contrast to our simulation setup, there are no libraries to compile on the host side since the host offloads all of the computation to the Pynq board.
```bash
# On the Host-side
cd <tvm root>
cp vta/config/pynq_sample.json vta_config.json
cp vta/config/pynq_sample.json vta/config/vta_config.json
```

This time again, we will run the 2D convolution testbench.
Expand Down Expand Up @@ -187,28 +186,28 @@ This third and last guide allows users to generate custom VTA bitstreams using f

### Xilinx Toolchain Installation

We recommend using `Vivado 2018.2` since our scripts have been tested to work on this version of the Xilinx toolchains.
We recommend using `Vivado 2019.1` since our scripts have been tested to work on this version of the Xilinx toolchains.
Our guide is written for Linux (Ubuntu) installation.

You’ll need to install Xilinx’ FPGA compilation toolchain, [Vivado HL WebPACK 2018.2](https://www.xilinx.com/products/design-tools/vivado.html), which a license-free version of the Vivado HLx toolchain.
You’ll need to install Xilinx’ FPGA compilation toolchain, [Vivado HL WebPACK 2019.1](https://www.xilinx.com/products/design-tools/vivado.html), which a license-free version of the Vivado HLx toolchain.

#### Obtaining and Launching the Vivado GUI Installer

1. Go to the [download webpage](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2018-2.html), and download the Linux Self Extracting Web Installer for Vivado HLx 2018.2: WebPACK and Editions.
1. Go to the [download webpage](https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2019-1.html), and download the Linux Self Extracting Web Installer for Vivado HLx 2019.1: WebPACK and Editions.
2. You’ll have to sign in with a Xilinx account. This requires a Xilinx account creation that will take 2 minutes.
3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called `Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin`.
3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called `Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin`.
4. Now that the file is downloaded, go to your `Downloads` directory, and change the file permissions so it can be executed:
```bash
chmod u+x Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin
chmod u+x Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin
```
5. Now you can execute the binary:
```bash
./Xilinx_Vivado_SDK_Web_2018.2_0614_1954_Lin64.bin
./Xilinx_Vivado_SDK_Web_2019.1_0524_1430_Lin64.bin
```

#### Xilinx Vivado GUI Installer Steps

At this point you've launched the Vivado 2018.2 Installer GUI program.
At this point you've launched the Vivado 2019.1 Installer GUI program.

1. Click “Next” on the *Welcome* screen.
2. On the *Select Install Type* screen, enter your Xilinx user credentials under the “User Authentication” box and select the “Download and Install Now” option before clicking “Next” .
Expand All @@ -230,8 +229,8 @@ At this point you've launched the Vivado 2018.2 Installer GUI program.

The last step is to update your `~/.bashrc` with the following lines. This will include all of the Xilinx binary paths so you can launch compilation scripts from the command line.
```bash
# Xilinx Vivado 2018.2 environment
export XILINX_VIVADO=${XILINX_PATH}/Vivado/2018.2
# Xilinx Vivado 2019.1 environment
export XILINX_VIVADO=${XILINX_PATH}/Vivado/2019.1
export PATH=${XILINX_VIVADO}/bin:${PATH}
```

Expand Down
2 changes: 1 addition & 1 deletion python/tvm/autotvm/tophub.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
'opencl': "v0.02",
'mali': "v0.05",

'vta': "v0.05",
'vta': "v0.06",
}

logger = logging.getLogger('autotvm')
Expand Down
10 changes: 3 additions & 7 deletions vta/config/pynq_sample.json
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
{
"TARGET" : "pynq",
"HW_FREQ" : 100,
"HW_CLK_TARGET" : 8,
"HW_VER" : "0.0.0",
"HW_VER" : "0.0.1",
"LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5,
"LOG_OUT_WIDTH" : 3,
"LOG_BATCH" : 0,
"LOG_BLOCK_IN" : 4,
"LOG_BLOCK_OUT" : 4,
"LOG_BLOCK" : 4,
"LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" :15,
"LOG_WGT_BUFF_SIZE" : 18,
"LOG_ACC_BUFF_SIZE" : 17
}
13 changes: 13 additions & 0 deletions vta/config/ultra96_sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"TARGET" : "ultra96",
"HW_VER" : "0.0.1",
"LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5,
"LOG_BATCH" : 0,
"LOG_BLOCK" : 4,
"LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" :15,
"LOG_WGT_BUFF_SIZE" : 18,
"LOG_ACC_BUFF_SIZE" : 17
}
8 changes: 2 additions & 6 deletions vta/config/vta_config.json
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
{
"TARGET" : "sim",
"HW_FREQ" : 100,
"HW_CLK_TARGET" : 7,
"HW_VER" : "0.0.0",
"HW_VER" : "0.0.1",
"LOG_INP_WIDTH" : 3,
"LOG_WGT_WIDTH" : 3,
"LOG_ACC_WIDTH" : 5,
"LOG_OUT_WIDTH" : 3,
"LOG_BATCH" : 0,
"LOG_BLOCK_IN" : 4,
"LOG_BLOCK_OUT" : 4,
"LOG_BLOCK" : 4,
"LOG_UOP_BUFF_SIZE" : 15,
"LOG_INP_BUFF_SIZE" : 15,
"LOG_WGT_BUFF_SIZE" : 18,
Expand Down
Loading

0 comments on commit 427afd4

Please sign in to comment.