From 3fdf90de5cb6e5b0c3017d1310fd6bb37d5ac8cd Mon Sep 17 00:00:00 2001 From: Thierry Moreau Date: Mon, 22 Jul 2019 10:47:44 -0700 Subject: [PATCH] unifying register map offset values between driver and hardware generator --- vta/hardware/xilinx/src/vta.cc | 4 ++-- vta/python/vta/pkg_config.py | 26 +++++++++++++++++++++++++- vta/src/zynq/zynq_driver.cc | 25 ++++++++----------------- vta/tests/hardware/common/test_lib.cc | 23 ++++++++--------------- 4 files changed, 43 insertions(+), 35 deletions(-) diff --git a/vta/hardware/xilinx/src/vta.cc b/vta/hardware/xilinx/src/vta.cc index c7aa905b5d6f0..626026b84a5ee 100644 --- a/vta/hardware/xilinx/src/vta.cc +++ b/vta/hardware/xilinx/src/vta.cc @@ -135,7 +135,7 @@ void fetch( hls::stream &load_queue, hls::stream &gemm_queue, hls::stream &store_queue) { -#pragma HLS INTERFACE s_axilite port = insn_count bundle = CONTROL_BUS +PRAGMA_HLS(HLS INTERFACE s_axilite port = insn_count bundle = CONTROL_BUS offset = VTA_FETCH_INSN_COUNT_OFFSET) #pragma HLS INTERFACE m_axi port = insns offset = slave bundle = ins_port #pragma HLS INTERFACE axis port = load_queue #pragma HLS INTERFACE axis port = gemm_queue @@ -424,7 +424,7 @@ void compute( bus_T inp_mem[VTA_INP_BUFF_DEPTH][INP_MAT_AXI_RATIO], bus_T wgt_mem[VTA_WGT_BUFF_DEPTH][WGT_MAT_AXI_RATIO], bus_T out_mem[VTA_ACC_BUFF_DEPTH][OUT_MAT_AXI_RATIO]) { -#pragma HLS INTERFACE s_axilite port = done bundle = CONTROL_BUS +PRAGMA_HLS(HLS INTERFACE s_axilite port = done bundle = CONTROL_BUS offset = VTA_COMPUTE_DONE_WR_OFFSET) #pragma HLS INTERFACE m_axi port = uops offset = slave bundle = uop_port #pragma HLS INTERFACE m_axi port = biases offset = slave bundle = data_port #pragma HLS INTERFACE axis port = gemm_queue diff --git a/vta/python/vta/pkg_config.py b/vta/python/vta/pkg_config.py index 5b120f994f3f4..95582a54da767 100644 --- a/vta/python/vta/pkg_config.py +++ b/vta/python/vta/pkg_config.py @@ -131,7 +131,7 @@ def __init__(self, cfg, proj_root): self.fpga_per = 7 self.fpga_log_axi_bus_width = 6 self.axi_cache_bits = '1111' - self.axi_prot_bits = '010' + self.axi_prot_bits = '000' # IP register address map self.ip_reg_map_range = "0x1000" self.fetch_base_addr = "0x43C00000" @@ -139,6 +139,20 @@ def __init__(self, cfg, proj_root): self.compute_base_addr = "0x43C02000" self.store_base_addr = "0x43C03000" + # Define IP memory mapped registers offsets. + # In HLS 0x00-0x0C is reserved for block-level I/O protocol. + # Make sure to leave 8B between register offsets to maintain + # compatibility with 64bit systems. + self.fetch_insn_count_offset = 0x10 + self.fetch_insn_addr_offset = self.fetch_insn_count_offset + 0x08 + self.load_inp_addr_offset = 0x10 + self.load_wgt_addr_offset = self.load_inp_addr_offset + 0x08 + self.compute_done_wr_offet = 0x10 + self.compute_done_rd_offet = self.compute_done_wr_offet + 0x08 + self.compute_uop_addr_offset = self.compute_done_rd_offet + 0x08 + self.compute_bias_addr_offset = self.compute_uop_addr_offset + 0x08 + self.store_out_addr_offset = 0x10 + # Derive SRAM parameters # The goal here is to determine how many memory banks are needed, # how deep and wide each bank needs to be. This is derived from @@ -199,6 +213,16 @@ def __init__(self, cfg, proj_root): self.macro_defs.append("-DVTA_LOAD_ADDR=%s" % (self.load_base_addr)) self.macro_defs.append("-DVTA_COMPUTE_ADDR=%s" % (self.compute_base_addr)) self.macro_defs.append("-DVTA_STORE_ADDR=%s" % (self.store_base_addr)) + # IP register offsets + self.macro_defs.append("-DVTA_FETCH_INSN_COUNT_OFFSET=%s" % (self.fetch_insn_count_offset)) + self.macro_defs.append("-DVTA_FETCH_INSN_ADDR_OFFSET=%s" % (self.fetch_insn_addr_offset)) + self.macro_defs.append("-DVTA_LOAD_INP_ADDR_OFFSET=%s" % (self.load_inp_addr_offset)) + self.macro_defs.append("-DVTA_LOAD_WGT_ADDR_OFFSET=%s" % (self.load_wgt_addr_offset)) + self.macro_defs.append("-DVTA_COMPUTE_DONE_WR_OFFSET=%s" % (self.compute_done_wr_offet)) + self.macro_defs.append("-DVTA_COMPUTE_DONE_RD_OFFSET=%s" % (self.compute_done_rd_offet)) + self.macro_defs.append("-DVTA_COMPUTE_UOP_ADDR_OFFSET=%s" % (self.compute_uop_addr_offset)) + self.macro_defs.append("-DVTA_COMPUTE_BIAS_ADDR_OFFSET=%s" % (self.compute_bias_addr_offset)) + self.macro_defs.append("-DVTA_STORE_OUT_ADDR_OFFSET=%s" % (self.store_out_addr_offset)) @property diff --git a/vta/src/zynq/zynq_driver.cc b/vta/src/zynq/zynq_driver.cc index d76f77ef6fc77..3edd8a7ba973f 100644 --- a/vta/src/zynq/zynq_driver.cc +++ b/vta/src/zynq/zynq_driver.cc @@ -112,22 +112,13 @@ class VTADevice { int Run(vta_phy_addr_t insn_phy_addr, uint32_t insn_count, uint32_t wait_cycles) { - // NOTE: Register address map is derived from the auto-generated - // driver files available under hardware/build/vivado//export/driver - // FETCH @ 0x10 : Data signal of insn_count_V - VTAWriteMappedReg(vta_fetch_handle_, 0x10, insn_count); - // FETCH @ 0x18 : Data signal of insns_V - VTAWriteMappedReg(vta_fetch_handle_, 0x18, insn_phy_addr); - // LOAD @ 0x10 : Data signal of inputs_V - VTAWriteMappedReg(vta_load_handle_, 0x10, 0); - // LOAD @ 0x18 : Data signal of weight_V - VTAWriteMappedReg(vta_load_handle_, 0x18, 0); - // COMPUTE @ 0x20 : Data signal of uops_V - VTAWriteMappedReg(vta_compute_handle_, 0x20, 0); - // COMPUTE @ 0x28 : Data signal of biases_V - VTAWriteMappedReg(vta_compute_handle_, 0x28, 0); - // STORE @ 0x10 : Data signal of outputs_V - VTAWriteMappedReg(vta_store_handle_, 0x10, 0); + VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_COUNT_OFFSET, insn_count); + VTAWriteMappedReg(vta_fetch_handle_, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy_addr); + VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_INP_ADDR_OFFSET, 0); + VTAWriteMappedReg(vta_load_handle_, VTA_LOAD_WGT_ADDR_OFFSET, 0); + VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_UOP_ADDR_OFFSET, 0); + VTAWriteMappedReg(vta_compute_handle_, VTA_COMPUTE_BIAS_ADDR_OFFSET, 0); + VTAWriteMappedReg(vta_store_handle_, VTA_STORE_OUT_ADDR_OFFSET, 0); // VTA start VTAWriteMappedReg(vta_fetch_handle_, 0x0, VTA_START); @@ -138,7 +129,7 @@ class VTADevice { // Loop until the VTA is done unsigned t, flag = 0; for (t = 0; t < wait_cycles; ++t) { - flag = VTAReadMappedReg(vta_compute_handle_, 0x18); + flag = VTAReadMappedReg(vta_compute_handle_, VTA_COMPUTE_DONE_RD_OFFSET); if (flag == VTA_DONE) break; std::this_thread::yield(); } diff --git a/vta/tests/hardware/common/test_lib.cc b/vta/tests/hardware/common/test_lib.cc index 26d87efb5e0d1..5a4d709d7dbb7 100644 --- a/vta/tests/hardware/common/test_lib.cc +++ b/vta/tests/hardware/common/test_lib.cc @@ -71,20 +71,13 @@ uint64_t vta( clock_gettime(CLOCK_REALTIME, &start); - // FETCH @ 0x10 : Data signal of insn_count_V - VTAWriteMappedReg(vta_fetch_handle, 0x10, insn_count); - // FETCH @ 0x18 : Data signal of insns_V - if (insns) VTAWriteMappedReg(vta_fetch_handle, 0x18, insn_phy); - // LOAD @ 0x10 : Data signal of inputs_V - if (inputs) VTAWriteMappedReg(vta_load_handle, 0x10, input_phy); - // LOAD @ 0x18 : Data signal of weight_V - if (weights) VTAWriteMappedReg(vta_load_handle, 0x18, weight_phy); - // COMPUTE @ 0x20 : Data signal of uops_V - if (uops) VTAWriteMappedReg(vta_compute_handle, 0x20, uop_phy); - // COMPUTE @ 0x28 : Data signal of biases_V - if (biases) VTAWriteMappedReg(vta_compute_handle, 0x28, bias_phy); - // STORE @ 0x10 : Data signal of outputs_V - if (outputs) VTAWriteMappedReg(vta_store_handle, 0x10, output_phy); + VTAWriteMappedReg(vta_fetch_handle, VTA_FETCH_INSN_COUNT_OFFSET, insn_count); + if (insns) VTAWriteMappedReg(vta_fetch_handle, VTA_FETCH_INSN_ADDR_OFFSET, insn_phy); + if (inputs) VTAWriteMappedReg(vta_load_handle, VTA_LOAD_INP_ADDR_OFFSET, input_phy); + if (weights) VTAWriteMappedReg(vta_load_handle, VTA_LOAD_WGT_ADDR_OFFSET, weight_phy); + if (uops) VTAWriteMappedReg(vta_compute_handle, VTA_COMPUTE_UOP_ADDR_OFFSET, uop_phy); + if (biases) VTAWriteMappedReg(vta_compute_handle, VTA_COMPUTE_BIAS_ADDR_OFFSET, bias_phy); + if (outputs) VTAWriteMappedReg(vta_store_handle, VTA_STORE_OUT_ADDR_OFFSET, output_phy); // VTA start VTAWriteMappedReg(vta_fetch_handle, 0x0, 0x1); @@ -94,7 +87,7 @@ uint64_t vta( int flag = 0, t = 0; for (t = 0; t < 10000000; ++t) { - flag = VTAReadMappedReg(vta_compute_handle, 0x18); + flag = VTAReadMappedReg(vta_compute_handle, VTA_COMPUTE_DONE_RD_OFFSET); if (flag & VTA_DONE) break; }