Skip to content

Commit

Permalink
unifying register map offset values between driver and hardware gener…
Browse files Browse the repository at this point in the history
…ator
  • Loading branch information
tmoreau89 committed Jul 23, 2019
1 parent 5220153 commit cdaa87f
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 35 deletions.
4 changes: 2 additions & 2 deletions vta/hardware/xilinx/src/vta.cc
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ void fetch(
hls::stream<insn_T> &load_queue,
hls::stream<insn_T> &gemm_queue,
hls::stream<insn_T> &store_queue) {
#pragma HLS INTERFACE s_axilite port = insn_count bundle = CONTROL_BUS
PRAGMA_HLS(HLS INTERFACE s_axilite port = insn_count bundle = CONTROL_BUS offset = VTA_FETCH_INSN_COUNT_OFFSET)
#pragma HLS INTERFACE m_axi port = insns offset = slave bundle = ins_port
#pragma HLS INTERFACE axis port = load_queue
#pragma HLS INTERFACE axis port = gemm_queue
Expand Down Expand Up @@ -424,7 +424,7 @@ void compute(
bus_T inp_mem[VTA_INP_BUFF_DEPTH][INP_MAT_AXI_RATIO],
bus_T wgt_mem[VTA_WGT_BUFF_DEPTH][WGT_MAT_AXI_RATIO],
bus_T out_mem[VTA_ACC_BUFF_DEPTH][OUT_MAT_AXI_RATIO]) {
#pragma HLS INTERFACE s_axilite port = done bundle = CONTROL_BUS
PRAGMA_HLS(HLS INTERFACE s_axilite port = done bundle = CONTROL_BUS offset = VTA_COMPUTE_DONE_WR_OFFSET)
#pragma HLS INTERFACE m_axi port = uops offset = slave bundle = uop_port
#pragma HLS INTERFACE m_axi port = biases offset = slave bundle = data_port
#pragma HLS INTERFACE axis port = gemm_queue
Expand Down
26 changes: 25 additions & 1 deletion vta/python/vta/pkg_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,28 @@ def __init__(self, cfg, proj_root):
self.fpga_per = 7
self.fpga_log_axi_bus_width = 6
self.axi_cache_bits = '1111'
self.axi_prot_bits = '010'
self.axi_prot_bits = '000'
# IP register address map
self.ip_reg_map_range = "0x1000"
self.fetch_base_addr = "0x43C00000"
self.load_base_addr = "0x43C01000"
self.compute_base_addr = "0x43C02000"
self.store_base_addr = "0x43C03000"

# Define IP memory mapped registers offsets.
# In HLS 0x00-0x0C is reserved for block-level I/O protocol.
# Make sure to leave 8B between register offsets to maintain
# compatibility with 64bit systems.
self.fetch_insn_count_offset = 0x10
self.fetch_insn_addr_offset = self.fetch_insn_count_offset + 0x08
self.load_inp_addr_offset = 0x10
self.load_wgt_addr_offset = self.load_inp_addr_offset + 0x08
self.compute_done_wr_offet = 0x10
self.compute_done_rd_offet = self.compute_done_wr_offet + 0x08
self.compute_uop_addr_offset = self.compute_done_rd_offet + 0x08
self.compute_bias_addr_offset = self.compute_uop_addr_offset + 0x08
self.store_out_addr_offset = 0x10

# Derive SRAM parameters
# The goal here is to determine how many memory banks are needed,
# how deep and wide each bank needs to be. This is derived from
Expand Down Expand Up @@ -199,6 +213,16 @@ def __init__(self, cfg, proj_root):
self.macro_defs.append("-DVTA_LOAD_ADDR=%s" % (self.load_base_addr))
self.macro_defs.append("-DVTA_COMPUTE_ADDR=%s" % (self.compute_base_addr))
self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr))
# IP register offsets
self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % (self.fetch_insn_count_offset))
self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % (self.fetch_insn_addr_offset))
self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % (self.load_inp_addr_offset))
self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % (self.load_wgt_addr_offset))
self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % (self.compute_done_wr_offet))
self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % (self.compute_done_rd_offet))
self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % (self.compute_uop_addr_offset))
self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % (self.compute_bias_addr_offset))
self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % (self.store_out_addr_offset))


@property
Expand Down
25 changes: 8 additions & 17 deletions vta/src/zynq/zynq_driver.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,22 +112,13 @@ class VTADevice {
int Run(vta_phy_addr_t insn_phy_addr,
uint32_t insn_count,
uint32_t wait_cycles) {
// NOTE: Register address map is derived from the auto-generated
// driver files available under hardware/build/vivado/<design>/export/driver
// FETCH @ 0x10 : Data signal of insn_count_V
VTAWriteMappedReg(vta_fetch_handle_, 0x10, insn_count);
// FETCH @ 0x18 : Data signal of insns_V
VTAWriteMappedReg(vta_fetch_handle_, 0x18, insn_phy_addr);
// LOAD @ 0x10 : Data signal of inputs_V
VTAWriteMappedReg(vta_load_handle_, 0x10, 0);
// LOAD @ 0x18 : Data signal of weight_V
VTAWriteMappedReg(vta_load_handle_, 0x18, 0);
// COMPUTE @ 0x20 : Data signal of uops_V
VTAWriteMappedReg(vta_compute_handle_, 0x20, 0);
// COMPUTE @ 0x28 : Data signal of biases_V
VTAWriteMappedReg(vta_compute_handle_, 0x28, 0);
// STORE @ 0x10 : Data signal of outputs_V
VTAWriteMappedReg(vta_store_handle_, 0x10, 0);
VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_COUNT_OFFSET, insn_count);
VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy_addr);
VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_INP_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_WGT_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_UOP_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_BIAS_ADDR_OFFSET, 0);
VTAWriteMappedReg(vta_store_handle_, VTA_STORE_OUT_ADDR_OFFSET, 0);

// VTA start
VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START);
Expand All @@ -138,7 +129,7 @@ class VTADevice {
// Loop until the VTA is done
unsigned t, flag = 0;
for (t = 0; t < wait_cycles; ++t) {
flag = VTAReadMappedReg(vta_compute_handle_, 0x18);
flag = VTAReadMappedReg(vta_compute_handle_, VTA_COMPUTE_DONE_RD_OFFSET);
if (flag == VTA_DONE) break;
std::this_thread::yield();
}
Expand Down
23 changes: 8 additions & 15 deletions vta/tests/hardware/common/test_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,20 +71,13 @@ uint64_t vta(

clock_gettime(CLOCK_REALTIME, &start);

// FETCH @ 0x10 : Data signal of insn_count_V
VTAWriteMappedReg(vta_fetch_handle, 0x10, insn_count);
// FETCH @ 0x18 : Data signal of insns_V
if (insns) VTAWriteMappedReg(vta_fetch_handle, 0x18, insn_phy);
// LOAD @ 0x10 : Data signal of inputs_V
if (inputs) VTAWriteMappedReg(vta_load_handle, 0x10, input_phy);
// LOAD @ 0x18 : Data signal of weight_V
if (weights) VTAWriteMappedReg(vta_load_handle, 0x18, weight_phy);
// COMPUTE @ 0x20 : Data signal of uops_V
if (uops) VTAWriteMappedReg(vta_compute_handle, 0x20, uop_phy);
// COMPUTE @ 0x28 : Data signal of biases_V
if (biases) VTAWriteMappedReg(vta_compute_handle, 0x28, bias_phy);
// STORE @ 0x10 : Data signal of outputs_V
if (outputs) VTAWriteMappedReg(vta_store_handle, 0x10, output_phy);
VTAWriteMappedReg(vta_fetch_handle, VTA_FETCH_INSN_COUNT_OFFSET, insn_count);
if (insns) VTAWriteMappedReg(vta_fetch_handle, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy);
if (inputs) VTAWriteMappedReg(vta_load_handle, VTA_LOAD_INP_ADDR_OFFSET, input_phy);
if (weights) VTAWriteMappedReg(vta_load_handle, VTA_LOAD_WGT_ADDR_OFFSET, weight_phy);
if (uops) VTAWriteMappedReg(vta_compute_handle, VTA_COMPUTE_UOP_ADDR_OFFSET, uop_phy);
if (biases) VTAWriteMappedReg(vta_compute_handle, VTA_COMPUTE_BIAS_ADDR_OFFSET, bias_phy);
if (outputs) VTAWriteMappedReg(vta_store_handle, VTA_STORE_OUT_ADDR_OFFSET, output_phy);

// VTA start
VTAWriteMappedReg(vta_fetch_handle, 0x0, 0x1);
Expand All @@ -94,7 +87,7 @@ uint64_t vta(

int flag = 0, t = 0;
for (t = 0; t < 10000000; ++t) {
flag = VTAReadMappedReg(vta_compute_handle, 0x18);
flag = VTAReadMappedReg(vta_compute_handle, VTA_COMPUTE_DONE_RD_OFFSET);
if (flag & VTA_DONE) break;
}

Expand Down

0 comments on commit cdaa87f

Please sign in to comment.