diff --git a/vta/apps/tsim_example/hardware/chisel/.scalafmt.conf b/vta/apps/tsim_example/hardware/chisel/.scalafmt.conf deleted file mode 100644 index 9172d5e9854a..000000000000 --- a/vta/apps/tsim_example/hardware/chisel/.scalafmt.conf +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -maxColumn = 100 -rewrite.rules = [SortModifiers, SortImports] diff --git a/vta/apps/tsim_example/hardware/chisel/Makefile b/vta/apps/tsim_example/hardware/chisel/Makefile index 0f979450c694..342e6896b000 100644 --- a/vta/apps/tsim_example/hardware/chisel/Makefile +++ b/vta/apps/tsim_example/hardware/chisel/Makefile @@ -94,7 +94,8 @@ endif default: lint lib lint: - sbt scalafmt + cp $(vta_dir)/hardware/chisel/scalastyle-config.xml . + sbt scalastyle lib: $(lib_path) $(lib_path): $(verilator_build_dir)/V$(TOP).cpp diff --git a/vta/apps/tsim_example/hardware/chisel/project/plugins.sbt b/vta/apps/tsim_example/hardware/chisel/project/plugins.sbt index e14e694d6f67..19ae5c9d49b9 100644 --- a/vta/apps/tsim_example/hardware/chisel/project/plugins.sbt +++ b/vta/apps/tsim_example/hardware/chisel/project/plugins.sbt @@ -18,4 +18,4 @@ */ logLevel := Level.Warn -addSbtPlugin("com.geirsson" % "sbt-scalafmt" % "1.5.1") +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") diff --git a/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Accel.scala b/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Accel.scala index b90c72943c42..7ba1e633629f 100644 --- a/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Accel.scala +++ b/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Accel.scala @@ -23,18 +23,18 @@ import chisel3._ import vta.dpi._ /** Add-by-one accelerator. - * - * ___________ ___________ - * | | | | - * | HostDPI | <--> | RegFile | <->| - * |_________| |_________| | - * | - * ___________ ___________ | - * | | | | | - * | MemDPI | <--> | Compute | <->| - * |_________| |_________| - * - */ + * + * ___________ ___________ + * | | | | + * | HostDPI | <--> | RegFile | <->| + * |_________| |_________| | + * | + * ___________ ___________ | + * | | | | | + * | MemDPI | <--> | Compute | <->| + * |_________| |_________| + * + */ case class AccelConfig() { val nCtrl = 1 val nECnt = 1 diff --git a/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Compute.scala b/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Compute.scala index 7ad965c67f86..3ef2e7e69bdb 100644 --- a/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Compute.scala +++ b/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/Compute.scala @@ -24,17 +24,17 @@ import chisel3.util._ import vta.dpi._ /** Compute - * - * Add-by-one procedure: - * - * 1. Wait for launch to be asserted - * 2. Issue a read request for 8-byte value at inp_baddr address - * 3. Wait for the value - * 4. Issue a write request for 8-byte value at out_baddr address - * 5. Increment read-address and write-address for next value - * 6. Check if counter (cnt) is equal to length to assert finish, - * otherwise go to step 2. - */ + * + * Add-by-one procedure: + * + * 1. Wait for launch to be asserted + * 2. Issue a read request for 8-byte value at inp_baddr address + * 3. Wait for the value + * 4. Issue a write request for 8-byte value at out_baddr address + * 5. Increment read-address and write-address for next value + * 6. Check if counter (cnt) is equal to length to assert finish, + * otherwise go to step 2. + */ class Compute(implicit config: AccelConfig) extends Module { val io = IO(new Bundle { val launch = Input(Bool()) diff --git a/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/RegFile.scala b/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/RegFile.scala index 1982f186a0e1..2764510a68ee 100644 --- a/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/RegFile.scala +++ b/vta/apps/tsim_example/hardware/chisel/src/main/scala/accel/RegFile.scala @@ -24,29 +24,29 @@ import chisel3.util._ import vta.dpi._ /** Register File. - * - * Six 32-bit register file. - * - * ------------------------------- - * Register description | addr - * -------------------------|----- - * Control status register | 0x00 - * Cycle counter | 0x04 - * Constant value | 0x08 - * Vector length | 0x0c - * Input pointer lsb | 0x10 - * Input pointer msb | 0x14 - * Output pointer lsb | 0x18 - * Output pointer msb | 0x1c - * ------------------------------- - - * ------------------------------ - * Control status register | bit - * ------------------------------ - * Launch | 0 - * Finish | 1 - * ------------------------------ - */ + * + * Six 32-bit register file. + * + * ------------------------------- + * Register description | addr + * -------------------------|----- + * Control status register | 0x00 + * Cycle counter | 0x04 + * Constant value | 0x08 + * Vector length | 0x0c + * Input pointer lsb | 0x10 + * Input pointer msb | 0x14 + * Output pointer lsb | 0x18 + * Output pointer msb | 0x1c + * ------------------------------- + * + * ------------------------------ + * Control status register | bit + * ------------------------------ + * Launch | 0 + * Finish | 1 + * ------------------------------ + */ class RegFile(implicit config: AccelConfig) extends Module { val io = IO(new Bundle { val launch = Output(Bool()) @@ -98,9 +98,8 @@ class RegFile(implicit config: AccelConfig) extends Module { } for (i <- 0 until (config.nVals + (2 * config.nPtrs))) { - when( - state === sIdle && io.host.req.valid && - io.host.req.opcode && addr(vo + i).U === io.host.req.addr) { + when(state === sIdle && io.host.req.valid && + io.host.req.opcode && addr(vo + i).U === io.host.req.addr) { reg(vo + i) := io.host.req.value } } diff --git a/vta/hardware/chisel/.scalafmt.conf b/vta/hardware/chisel/.scalafmt.conf deleted file mode 100644 index 9172d5e9854a..000000000000 --- a/vta/hardware/chisel/.scalafmt.conf +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -maxColumn = 100 -rewrite.rules = [SortModifiers, SortImports] diff --git a/vta/hardware/chisel/Makefile b/vta/hardware/chisel/Makefile index a98c52c77078..9804230074ac 100644 --- a/vta/hardware/chisel/Makefile +++ b/vta/hardware/chisel/Makefile @@ -112,7 +112,7 @@ endif default: lint lib lint: - sbt scalafmt --test + sbt scalastyle lib: $(lib_path) diff --git a/vta/hardware/chisel/project/plugins.sbt b/vta/hardware/chisel/project/plugins.sbt index e14e694d6f67..19ae5c9d49b9 100644 --- a/vta/hardware/chisel/project/plugins.sbt +++ b/vta/hardware/chisel/project/plugins.sbt @@ -18,4 +18,4 @@ */ logLevel := Level.Warn -addSbtPlugin("com.geirsson" % "sbt-scalafmt" % "1.5.1") +addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") diff --git a/vta/hardware/chisel/scalastyle-config.xml b/vta/hardware/chisel/scalastyle-config.xml new file mode 100644 index 000000000000..ae7c8e6b588a --- /dev/null +++ b/vta/hardware/chisel/scalastyle-config.xml @@ -0,0 +1,128 @@ + + Scalastyle standard configuration + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 2 + 2 + 4 + + + diff --git a/vta/hardware/chisel/src/main/scala/core/Compute.scala b/vta/hardware/chisel/src/main/scala/core/Compute.scala index 7751bf7ddf48..c605a1a1a824 100644 --- a/vta/hardware/chisel/src/main/scala/core/Compute.scala +++ b/vta/hardware/chisel/src/main/scala/core/Compute.scala @@ -25,13 +25,13 @@ import vta.util.config._ import vta.shell._ /** Compute. - * - * The compute unit is in charge of the following: - * - Loading micro-ops from memory (loadUop module) - * - Loading biases (acc) from memory (tensorAcc module) - * - Compute ALU instructions (tensorAlu module) - * - Compute GEMM instructions (tensorGemm module) - */ + * + * The compute unit is in charge of the following: + * - Loading micro-ops from memory (loadUop module) + * - Loading biases (acc) from memory (tensorAcc module) + * - Compute ALU instructions (tensorAlu module) + * - Compute GEMM instructions (tensorGemm module) + */ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { val mp = p(ShellKey).memParams val io = IO(new Bundle { @@ -65,10 +65,10 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { val inst_type = Cat(dec.io.isFinish, - dec.io.isAlu, - dec.io.isGemm, - dec.io.isLoadAcc, - dec.io.isLoadUop).asUInt + dec.io.isAlu, + dec.io.isGemm, + dec.io.isLoadAcc, + dec.io.isLoadUop).asUInt val sprev = inst_q.io.deq.valid & Mux(dec.io.pop_prev, s(0).io.sready, true.B) val snext = inst_q.io.deq.valid & Mux(dec.io.pop_next, s(1).io.sready, true.B) @@ -116,20 +116,14 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { loadUop.io.inst := inst_q.io.deq.bits loadUop.io.baddr := io.uop_baddr io.vme_rd(0) <> loadUop.io.vme_rd - loadUop.io.uop.idx <> Mux(dec.io.isGemm, - tensorGemm.io.uop.idx, - tensorAlu.io.uop.idx) + loadUop.io.uop.idx <> Mux(dec.io.isGemm, tensorGemm.io.uop.idx, tensorAlu.io.uop.idx) // acc tensorAcc.io.start := state === sIdle & start & dec.io.isLoadAcc tensorAcc.io.inst := inst_q.io.deq.bits tensorAcc.io.baddr := io.acc_baddr - tensorAcc.io.tensor.rd.idx <> Mux(dec.io.isGemm, - tensorGemm.io.acc.rd.idx, - tensorAlu.io.acc.rd.idx) - tensorAcc.io.tensor.wr <> Mux(dec.io.isGemm, - tensorGemm.io.acc.wr, - tensorAlu.io.acc.wr) + tensorAcc.io.tensor.rd.idx <> Mux(dec.io.isGemm, tensorGemm.io.acc.rd.idx, tensorAlu.io.acc.rd.idx) + tensorAcc.io.tensor.wr <> Mux(dec.io.isGemm, tensorGemm.io.acc.wr, tensorAlu.io.acc.wr) io.vme_rd(1) <> tensorAcc.io.vme_rd // gemm @@ -156,8 +150,8 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { // out io.out.rd.idx <> Mux(dec.io.isGemm, - tensorGemm.io.out.rd.idx, - tensorAlu.io.out.rd.idx) + tensorGemm.io.out.rd.idx, + tensorAlu.io.out.rd.idx) io.out.wr <> Mux(dec.io.isGemm, tensorGemm.io.out.wr, tensorAlu.io.out.wr) // semaphore @@ -178,20 +172,16 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { when(dec.io.isSync) { printf("[Compute] start sync\n") }.elsewhen(dec.io.isLoadUop) { - printf("[Compute] start load uop\n") - } - .elsewhen(dec.io.isLoadAcc) { - printf("[Compute] start load acc\n") - } - .elsewhen(dec.io.isGemm) { - printf("[Compute] start gemm\n") - } - .elsewhen(dec.io.isAlu) { - printf("[Compute] start alu\n") - } - .elsewhen(dec.io.isFinish) { - printf("[Compute] start finish\n") - } + printf("[Compute] start load uop\n") + }.elsewhen(dec.io.isLoadAcc) { + printf("[Compute] start load acc\n") + }.elsewhen(dec.io.isGemm) { + printf("[Compute] start gemm\n") + }.elsewhen(dec.io.isAlu) { + printf("[Compute] start alu\n") + }.elsewhen(dec.io.isFinish) { + printf("[Compute] start finish\n") + } } // done when(state === sSync) { @@ -202,17 +192,14 @@ class Compute(debug: Boolean = false)(implicit p: Parameters) extends Module { when(dec.io.isLoadUop) { printf("[Compute] done load uop\n") }.elsewhen(dec.io.isLoadAcc) { - printf("[Compute] done load acc\n") - } - .elsewhen(dec.io.isGemm) { - printf("[Compute] done gemm\n") - } - .elsewhen(dec.io.isAlu) { - printf("[Compute] done alu\n") - } - .elsewhen(dec.io.isFinish) { - printf("[Compute] done finish\n") - } + printf("[Compute] done load acc\n") + }.elsewhen(dec.io.isGemm) { + printf("[Compute] done gemm\n") + }.elsewhen(dec.io.isAlu) { + printf("[Compute] done alu\n") + }.elsewhen(dec.io.isFinish) { + printf("[Compute] done finish\n") + } } } } diff --git a/vta/hardware/chisel/src/main/scala/core/Configs.scala b/vta/hardware/chisel/src/main/scala/core/Configs.scala index de7012b5cef3..4ab7d8503a0a 100644 --- a/vta/hardware/chisel/src/main/scala/core/Configs.scala +++ b/vta/hardware/chisel/src/main/scala/core/Configs.scala @@ -22,28 +22,27 @@ package vta.core import vta.util.config._ /** CoreConfig. - * - * This is one supported configuration for VTA. This file will - * be eventually filled out with class configurations that can be - * mixed/matched with Shell configurations for different backends. - */ -class CoreConfig - extends Config((site, here, up) => { - case CoreKey => - CoreParams( - batch = 1, - blockOut = 16, - blockIn = 16, - inpBits = 8, - wgtBits = 8, - uopBits = 32, - accBits = 32, - outBits = 8, - uopMemDepth = 2048, - inpMemDepth = 2048, - wgtMemDepth = 1024, - accMemDepth = 2048, - outMemDepth = 2048, - instQueueEntries = 512 - ) - }) + * + * This is one supported configuration for VTA. This file will + * be eventually filled out with class configurations that can be + * mixed/matched with Shell configurations for different backends. + */ +class CoreConfig extends Config((site, here, up) => { + case CoreKey => + CoreParams( + batch = 1, + blockOut = 16, + blockIn = 16, + inpBits = 8, + wgtBits = 8, + uopBits = 32, + accBits = 32, + outBits = 8, + uopMemDepth = 2048, + inpMemDepth = 2048, + wgtMemDepth = 1024, + accMemDepth = 2048, + outMemDepth = 2048, + instQueueEntries = 512 + ) +}) diff --git a/vta/hardware/chisel/src/main/scala/core/Core.scala b/vta/hardware/chisel/src/main/scala/core/Core.scala index a7228ee898fa..6bfffdc212b7 100644 --- a/vta/hardware/chisel/src/main/scala/core/Core.scala +++ b/vta/hardware/chisel/src/main/scala/core/Core.scala @@ -41,23 +41,23 @@ case class CoreParams( instQueueEntries: Int = 32 ) { require(uopBits % 8 == 0, - s"\n\n[VTA] [CoreParams] uopBits must be byte aligned\n\n") + s"\n\n[VTA] [CoreParams] uopBits must be byte aligned\n\n") } case object CoreKey extends Field[CoreParams] /** Core. - * - * The core defines the current VTA architecture by connecting memory and - * compute modules together such as load/store and compute. Most of the - * connections in the core are bulk (<>), and we should try to keep it this - * way, because it is easier to understand what is going on. - * - * Also, the core must be instantiated by a shell using the - * VTA Control Register (VCR) and the VTA Memory Engine (VME) interfaces. - * More info about these interfaces and modules can be found in the shell - * directory. - */ + * + * The core defines the current VTA architecture by connecting memory and + * compute modules together such as load/store and compute. Most of the + * connections in the core are bulk (<>), and we should try to keep it this + * way, because it is easier to understand what is going on. + * + * Also, the core must be instantiated by a shell using the + * VTA Control Register (VCR) and the VTA Memory Engine (VME) interfaces. + * More info about these interfaces and modules can be found in the shell + * directory. + */ class Core(implicit p: Parameters) extends Module { val io = IO(new Bundle { val vcr = new VCRClient diff --git a/vta/hardware/chisel/src/main/scala/core/Decode.scala b/vta/hardware/chisel/src/main/scala/core/Decode.scala index a49ddce5367a..37f6ab40584c 100644 --- a/vta/hardware/chisel/src/main/scala/core/Decode.scala +++ b/vta/hardware/chisel/src/main/scala/core/Decode.scala @@ -25,16 +25,16 @@ import chisel3.util._ import ISA._ /** MemDecode. - * - * Decode memory instructions with a Bundle. This is similar to an union, - * therefore order matters when declaring fields. These are the instructions - * decoded with this bundle: - * - LUOP - * - LWGT - * - LINP - * - LACC - * - SOUT - */ + * + * Decode memory instructions with a Bundle. This is similar to an union, + * therefore order matters when declaring fields. These are the instructions + * decoded with this bundle: + * - LUOP + * - LWGT + * - LINP + * - LACC + * - SOUT + */ class MemDecode extends Bundle { val xpad_1 = UInt(M_PAD_BITS.W) val xpad_0 = UInt(M_PAD_BITS.W) @@ -55,10 +55,10 @@ class MemDecode extends Bundle { } /** GemmDecode. - * - * Decode GEMM instruction with a Bundle. This is similar to an union, - * therefore order matters when declaring fields. - */ + * + * Decode GEMM instruction with a Bundle. This is similar to an union, + * therefore order matters when declaring fields. + */ class GemmDecode extends Bundle { val wgt_1 = UInt(C_WIDX_BITS.W) val wgt_0 = UInt(C_WIDX_BITS.W) @@ -80,15 +80,15 @@ class GemmDecode extends Bundle { } /** AluDecode. - * - * Decode ALU instructions with a Bundle. This is similar to an union, - * therefore order matters when declaring fields. These are the instructions - * decoded with this bundle: - * - VMIN - * - VMAX - * - VADD - * - VSHX - */ + * + * Decode ALU instructions with a Bundle. This is similar to an union, + * therefore order matters when declaring fields. These are the instructions + * decoded with this bundle: + * - VMIN + * - VMAX + * - VADD + * - VSHX + */ class AluDecode extends Bundle { val empty_1 = Bool() val alu_imm = UInt(C_ALU_IMM_BITS.W) @@ -112,9 +112,9 @@ class AluDecode extends Bundle { } /** UopDecode. - * - * Decode micro-ops (uops). - */ + * + * Decode micro-ops (uops). + */ class UopDecode extends Bundle { val u2 = UInt(10.W) val u1 = UInt(11.W) @@ -122,9 +122,9 @@ class UopDecode extends Bundle { } /** FetchDecode. - * - * Partial decoding for dispatching instructions to Load, Compute, and Store. - */ + * + * Partial decoding for dispatching instructions to Load, Compute, and Store. + */ class FetchDecode extends Module { val io = IO(new Bundle { val inst = Input(UInt(INST_BITS.W)) @@ -159,9 +159,9 @@ class FetchDecode extends Module { } /** LoadDecode. - * - * Decode dependencies, type and sync for Load module. - */ + * + * Decode dependencies, type and sync for Load module. + */ class LoadDecode extends Module { val io = IO(new Bundle { val inst = Input(UInt(INST_BITS.W)) @@ -180,9 +180,9 @@ class LoadDecode extends Module { } /** ComputeDecode. - * - * Decode dependencies, type and sync for Compute module. - */ + * + * Decode dependencies, type and sync for Compute module. + */ class ComputeDecode extends Module { val io = IO(new Bundle { val inst = Input(UInt(INST_BITS.W)) @@ -211,9 +211,9 @@ class ComputeDecode extends Module { } /** StoreDecode. - * - * Decode dependencies, type and sync for Store module. - */ + * + * Decode dependencies, type and sync for Store module. + */ class StoreDecode extends Module { val io = IO(new Bundle { val inst = Input(UInt(INST_BITS.W)) diff --git a/vta/hardware/chisel/src/main/scala/core/EventCounters.scala b/vta/hardware/chisel/src/main/scala/core/EventCounters.scala index 8990aefd6054..f9fd7f0be105 100644 --- a/vta/hardware/chisel/src/main/scala/core/EventCounters.scala +++ b/vta/hardware/chisel/src/main/scala/core/EventCounters.scala @@ -25,21 +25,20 @@ import vta.util.config._ import vta.shell._ /** EventCounters. - * - * This unit contains all the event counting logic. One common event tracked in - * hardware is the number of clock cycles taken to achieve certain task. We - * can count the total number of clock cycles spent in a VTA run by checking - * launch and finish signals. - * - * The event counter value is passed to the VCR module via the ecnt port, so - * they can be accessed by the host. The number of event counters (nECnt) is - * defined in the Shell VCR module as a parameter, see VCRParams. - * - * If one would like to add an event counter, then the value of nECnt must be - * changed in VCRParams together with the corresponding counting logic here. - */ -class EventCounters(debug: Boolean = false)(implicit p: Parameters) - extends Module { + * + * This unit contains all the event counting logic. One common event tracked in + * hardware is the number of clock cycles taken to achieve certain task. We + * can count the total number of clock cycles spent in a VTA run by checking + * launch and finish signals. + * + * The event counter value is passed to the VCR module via the ecnt port, so + * they can be accessed by the host. The number of event counters (nECnt) is + * defined in the Shell VCR module as a parameter, see VCRParams. + * + * If one would like to add an event counter, then the value of nECnt must be + * changed in VCRParams together with the corresponding counting logic here. + */ +class EventCounters(debug: Boolean = false)(implicit p: Parameters) extends Module { val vp = p(ShellKey).vcrParams val io = IO(new Bundle { val launch = Input(Bool()) diff --git a/vta/hardware/chisel/src/main/scala/core/Fetch.scala b/vta/hardware/chisel/src/main/scala/core/Fetch.scala index 9baf1cc33984..0ea35a3e653a 100644 --- a/vta/hardware/chisel/src/main/scala/core/Fetch.scala +++ b/vta/hardware/chisel/src/main/scala/core/Fetch.scala @@ -25,20 +25,20 @@ import vta.util.config._ import vta.shell._ /** Fetch. - * - * The fetch unit reads instructions (tasks) from memory (i.e. DRAM), using the - * VTA Memory Engine (VME), and push them into an instruction queue called - * inst_q. Once the instruction queue is full, instructions are dispatched to - * the Load, Compute and Store module queues based on the instruction opcode. - * After draining the queue, the fetch unit checks if there are more instructions - * via the ins_count register which is written by the host. - * - * Additionally, instructions are read into two chunks (see sReadLSB and sReadMSB) - * because we are using a DRAM payload of 8-bytes or half of a VTA instruction. - * This should be configurable for larger payloads, i.e. 64-bytes, which can load - * more than one instruction at the time. Finally, the instruction queue is - * sized (entries_q), depending on the maximum burst allowed in the memory. - */ + * + * The fetch unit reads instructions (tasks) from memory (i.e. DRAM), using the + * VTA Memory Engine (VME), and push them into an instruction queue called + * inst_q. Once the instruction queue is full, instructions are dispatched to + * the Load, Compute and Store module queues based on the instruction opcode. + * After draining the queue, the fetch unit checks if there are more instructions + * via the ins_count register which is written by the host. + * + * Additionally, instructions are read into two chunks (see sReadLSB and sReadMSB) + * because we are using a DRAM payload of 8-bytes or half of a VTA instruction. + * This should be configurable for larger payloads, i.e. 64-bytes, which can load + * more than one instruction at the time. Finally, the instruction queue is + * sized (entries_q), depending on the maximum burst allowed in the memory. + */ class Fetch(debug: Boolean = false)(implicit p: Parameters) extends Module { val vp = p(ShellKey).vcrParams val mp = p(ShellKey).memParams @@ -112,17 +112,16 @@ class Fetch(debug: Boolean = false)(implicit p: Parameters) extends Module { when(xrem === 0.U) { state := sIdle }.elsewhen(xrem < xmax) { - state := sReadCmd - rlen := xrem - ilen := xrem >> 1.U - xrem := 0.U - } - .otherwise { - state := sReadCmd - rlen := xmax - 1.U - ilen := (xmax >> 1.U) - 1.U - xrem := xrem - xmax - } + state := sReadCmd + rlen := xrem + ilen := xrem >> 1.U + xrem := 0.U + }.otherwise { + state := sReadCmd + rlen := xmax - 1.U + ilen := (xmax >> 1.U) - 1.U + xrem := xrem - xmax + } } } } @@ -165,12 +164,12 @@ class Fetch(debug: Boolean = false)(implicit p: Parameters) extends Module { val deq_sel = Cat(dec.io.isCompute, dec.io.isStore, dec.io.isLoad).asUInt val deq_ready = MuxLookup(deq_sel, - false.B, // default - Array( - "h_01".U -> io.inst.ld.ready, - "h_02".U -> io.inst.st.ready, - "h_04".U -> io.inst.co.ready - )) + false.B, // default + Array( + "h_01".U -> io.inst.ld.ready, + "h_02".U -> io.inst.st.ready, + "h_04".U -> io.inst.co.ready + )) // dequeue instruction inst_q.io.deq.ready := deq_ready & inst_q.io.deq.valid & state === sDrain diff --git a/vta/hardware/chisel/src/main/scala/core/ISA.scala b/vta/hardware/chisel/src/main/scala/core/ISA.scala index edc182311adb..bfe89ebb41f5 100644 --- a/vta/hardware/chisel/src/main/scala/core/ISA.scala +++ b/vta/hardware/chisel/src/main/scala/core/ISA.scala @@ -24,9 +24,9 @@ import chisel3.util._ import scala.collection.mutable.HashMap /** ISAConstants. - * - * These constants are used for decoding (parsing) fields on instructions. - */ + * + * These constants are used for decoding (parsing) fields on instructions. + */ trait ISAConstants { val INST_BITS = 128 @@ -70,13 +70,13 @@ trait ISAConstants { } /** ISA. - * - * This is the VTA task ISA - * - * TODO: Add VXOR to clear accumulator - * TODO: Use ISA object for decoding as well - * TODO: Eventually deprecate ISAConstants - */ + * + * This is the VTA task ISA + * + * TODO: Add VXOR to clear accumulator + * TODO: Use ISA object for decoding as well + * TODO: Eventually deprecate ISAConstants + */ object ISA { private val xLen = 128 private val depBits = 4 @@ -86,19 +86,19 @@ object ISA { private val taskId: HashMap[String, String] = HashMap(("load", "000"), - ("store", "001"), - ("gemm", "010"), - ("finish", "011"), - ("alu", "100")) + ("store", "001"), + ("gemm", "010"), + ("finish", "011"), + ("alu", "100")) private val memId: HashMap[String, String] = HashMap(("uop", "00"), ("wgt", "01"), ("inp", "10"), ("acc", "11")) private val aluId: HashMap[String, String] = HashMap(("minpool", "00"), - ("maxpool", "01"), - ("add", "10"), - ("shift", "11")) + ("maxpool", "01"), + ("add", "10"), + ("shift", "11")) private def dontCare(bits: Int): String = "?" * bits diff --git a/vta/hardware/chisel/src/main/scala/core/Load.scala b/vta/hardware/chisel/src/main/scala/core/Load.scala index 7c79498bd8f0..50c26bb8e8ed 100644 --- a/vta/hardware/chisel/src/main/scala/core/Load.scala +++ b/vta/hardware/chisel/src/main/scala/core/Load.scala @@ -25,12 +25,12 @@ import vta.util.config._ import vta.shell._ /** Load. - * - * Load inputs and weights from memory (DRAM) into scratchpads (SRAMs). - * This module instantiate the TensorLoad unit which is in charge of - * loading 1D and 2D tensors to scratchpads, so it can be used by - * other modules such as Compute. - */ + * + * Load inputs and weights from memory (DRAM) into scratchpads (SRAMs). + * This module instantiate the TensorLoad unit which is in charge of + * loading 1D and 2D tensors to scratchpads, so it can be used by + * other modules such as Compute. + */ class Load(debug: Boolean = false)(implicit p: Parameters) extends Module { val mp = p(ShellKey).memParams val io = IO(new Bundle { @@ -110,11 +110,10 @@ class Load(debug: Boolean = false)(implicit p: Parameters) extends Module { when(dec.io.isSync) { printf("[Load] start sync\n") }.elsewhen(dec.io.isInput) { - printf("[Load] start input\n") - } - .elsewhen(dec.io.isWeight) { - printf("[Load] start weight\n") - } + printf("[Load] start input\n") + }.elsewhen(dec.io.isWeight) { + printf("[Load] start weight\n") + } } // done when(state === sSync) { diff --git a/vta/hardware/chisel/src/main/scala/core/LoadUop.scala b/vta/hardware/chisel/src/main/scala/core/LoadUop.scala index c7d0f4439858..274581f475b3 100644 --- a/vta/hardware/chisel/src/main/scala/core/LoadUop.scala +++ b/vta/hardware/chisel/src/main/scala/core/LoadUop.scala @@ -25,11 +25,11 @@ import vta.util.config._ import vta.shell._ /** UopMaster. - * - * Uop interface used by a master module, i.e. TensorAlu or TensorGemm, - * to request a micro-op (uop) from the uop-scratchpad. The index (idx) is - * used as an address to find the uop in the uop-scratchpad. - */ + * + * Uop interface used by a master module, i.e. TensorAlu or TensorGemm, + * to request a micro-op (uop) from the uop-scratchpad. The index (idx) is + * used as an address to find the uop in the uop-scratchpad. + */ class UopMaster(implicit p: Parameters) extends Bundle { val addrBits = log2Ceil(p(CoreKey).uopMemDepth) val idx = ValidIO(UInt(addrBits.W)) @@ -38,11 +38,11 @@ class UopMaster(implicit p: Parameters) extends Bundle { } /** UopClient. - * - * Uop interface used by a client module, i.e. LoadUop, to receive - * a request from a master module, i.e. TensorAlu or TensorGemm. - * The index (idx) is used as an address to find the uop in the uop-scratchpad. - */ + * + * Uop interface used by a client module, i.e. LoadUop, to receive + * a request from a master module, i.e. TensorAlu or TensorGemm. + * The index (idx) is used as an address to find the uop in the uop-scratchpad. + */ class UopClient(implicit p: Parameters) extends Bundle { val addrBits = log2Ceil(p(CoreKey).uopMemDepth) val idx = Flipped(ValidIO(UInt(addrBits.W))) @@ -51,12 +51,12 @@ class UopClient(implicit p: Parameters) extends Bundle { } /** LoadUop. - * - * Load micro-ops (uops) from memory, i.e. DRAM, and store them in the - * uop-scratchpad. Currently, micro-ops are 32-bit wide and loaded in - * group of 2 given the fact that the DRAM payload is 8-bytes. This module - * should be modified later on to support different DRAM sizes efficiently. - */ + * + * Load micro-ops (uops) from memory, i.e. DRAM, and store them in the + * uop-scratchpad. Currently, micro-ops are 32-bit wide and loaded in + * group of 2 given the fact that the DRAM payload is 8-bytes. This module + * should be modified later on to support different DRAM sizes efficiently. + */ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { val mp = p(ShellKey).memParams val io = IO(new Bundle { @@ -113,15 +113,14 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { when(xrem === 0.U) { state := sIdle }.elsewhen(xrem < xmax) { - state := sReadCmd - xlen := xrem - xrem := 0.U - } - .otherwise { - state := sReadCmd - xlen := xmax - 1.U - xrem := xrem - xmax - } + state := sReadCmd + xlen := xrem + xrem := 0.U + }.otherwise { + state := sReadCmd + xlen := xmax - 1.U + xrem := xrem - xmax + } } } } @@ -166,19 +165,18 @@ class LoadUop(debug: Boolean = false)(implicit p: Parameters) extends Module { when(sizeIsEven) { wmask := "b_11".U.asTypeOf(wmask) }.elsewhen(io.vme_rd.cmd.fire()) { - when(dec.xsize === 1.U) { - wmask := "b_01".U.asTypeOf(wmask) - }.otherwise { - wmask := "b_11".U.asTypeOf(wmask) - } + when(dec.xsize === 1.U) { + wmask := "b_01".U.asTypeOf(wmask) + }.otherwise { + wmask := "b_11".U.asTypeOf(wmask) } - .elsewhen(io.vme_rd.data.fire()) { - when((xcnt === xlen - 1.U) && (xrem === 0.U)) { - wmask := "b_01".U.asTypeOf(wmask) - }.otherwise { - wmask := "b_11".U.asTypeOf(wmask) - } + }.elsewhen(io.vme_rd.data.fire()) { + when((xcnt === xlen - 1.U) && (xrem === 0.U)) { + wmask := "b_01".U.asTypeOf(wmask) + }.otherwise { + wmask := "b_11".U.asTypeOf(wmask) } + } }.otherwise { when(io.vme_rd.cmd.fire()) { wmask := "b_10".U.asTypeOf(wmask) diff --git a/vta/hardware/chisel/src/main/scala/core/Semaphore.scala b/vta/hardware/chisel/src/main/scala/core/Semaphore.scala index f268e79b228b..efc895bc673e 100644 --- a/vta/hardware/chisel/src/main/scala/core/Semaphore.scala +++ b/vta/hardware/chisel/src/main/scala/core/Semaphore.scala @@ -23,14 +23,13 @@ import chisel3._ import chisel3.util._ /** Semaphore. - * - * This semaphore is used instead of push/pop fifo, used in the initial - * version of VTA. This semaphore is incremented (spost) or decremented (swait) - * depending on the push and pop fields on instructions to prevent RAW and WAR - * hazards. - */ -class Semaphore(counterBits: Int = 1, counterInitValue: Int = 1) - extends Module { + * + * This semaphore is used instead of push/pop fifo, used in the initial + * version of VTA. This semaphore is incremented (spost) or decremented (swait) + * depending on the push and pop fields on instructions to prevent RAW and WAR + * hazards. + */ +class Semaphore(counterBits: Int = 1, counterInitValue: Int = 1) extends Module { val io = IO(new Bundle { val spost = Input(Bool()) val swait = Input(Bool()) diff --git a/vta/hardware/chisel/src/main/scala/core/Store.scala b/vta/hardware/chisel/src/main/scala/core/Store.scala index 04bc7f5be60b..025a0a24696b 100644 --- a/vta/hardware/chisel/src/main/scala/core/Store.scala +++ b/vta/hardware/chisel/src/main/scala/core/Store.scala @@ -25,11 +25,11 @@ import vta.util.config._ import vta.shell._ /** Store. - * - * Store results back to memory (DRAM) from scratchpads (SRAMs). - * This module instantiate the TensorStore unit which is in charge - * of storing 1D and 2D tensors to main memory. - */ + * + * Store results back to memory (DRAM) from scratchpads (SRAMs). + * This module instantiate the TensorStore unit which is in charge + * of storing 1D and 2D tensors to main memory. + */ class Store(debug: Boolean = false)(implicit p: Parameters) extends Module { val mp = p(ShellKey).memParams val io = IO(new Bundle { diff --git a/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala b/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala index 21e57f2dab94..a6feffaa18dc 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorAlu.scala @@ -39,11 +39,8 @@ class Alu(implicit p: Parameters) extends Module { val m = ~ub(width - 1, 0) + 1.U val n = ub(width - 1, 0) - val fop = Seq(Mux(io.a < io.b, io.a, io.b), - Mux(io.a < io.b, io.b, io.a), - io.a + io.b, - io.a >> n, - io.a << m) + val fop = Seq(Mux(io.a < io.b, io.a, io.b), Mux(io.a < io.b, io.b, io.a), + io.a + io.b, io.a >> n, io.a << m) val opmux = Seq.tabulate(ALU_OP_NUM)(i => ALU_OP(i) -> fop(i)) io.y := MuxLookup(io.opcode, io.a, opmux) @@ -101,12 +98,12 @@ class AluVector(implicit p: Parameters) extends Module { } /** TensorAlu. - * - * This unit instantiate the ALU vector unit (AluVector) and go over the - * micro-ops (uops) which are used to read the source operands (vectors) - * from the acc-scratchpad and then they are written back the same - * acc-scratchpad. - */ + * + * This unit instantiate the ALU vector unit (AluVector) and go over the + * micro-ops (uops) which are used to read the source operands (vectors) + * from the acc-scratchpad and then they are written back the same + * acc-scratchpad. + */ class TensorAlu(debug: Boolean = false)(implicit p: Parameters) extends Module { val aluBits = p(CoreKey).accBits val io = IO(new Bundle { @@ -200,18 +197,14 @@ class TensorAlu(debug: Boolean = false)(implicit p: Parameters) extends Module { dst_i := 0.U src_i := 0.U }.elsewhen(state === sReadUop && cnt_i === dec.lp_1) { - cnt_i := 0.U - dst_i := dst_o - src_i := src_o - } - .elsewhen( - state === sExe && - alu.io.out.data.valid && - uop_idx === uop_end - 1.U) { - cnt_i := cnt_i + 1.U - dst_i := dst_i + dec.dst_1 - src_i := src_i + dec.src_1 - } + cnt_i := 0.U + dst_i := dst_o + src_i := src_o + }.elsewhen(state === sExe && alu.io.out.data.valid && uop_idx === uop_end - 1.U) { + cnt_i := cnt_i + 1.U + dst_i := dst_i + dec.dst_1 + src_i := src_i + dec.src_1 + } when(state === sComputeIdx && io.uop.data.valid) { uop_dst := io.uop.data.bits.u0 + dst_i @@ -232,7 +225,7 @@ class TensorAlu(debug: Boolean = false)(implicit p: Parameters) extends Module { tensorImm.data.bits.foreach { b => b.foreach { c => c := Mux(dec.alu_imm(C_ALU_IMM_BITS - 1), - Cat(-1.S((aluBits - C_ALU_IMM_BITS).W), dec.alu_imm), dec.alu_imm) + Cat(-1.S((aluBits - C_ALU_IMM_BITS).W), dec.alu_imm), dec.alu_imm) } } @@ -244,11 +237,11 @@ class TensorAlu(debug: Boolean = false)(implicit p: Parameters) extends Module { alu.io.acc_a.data.valid := io.acc.rd.data.valid & state === sReadTensorB alu.io.acc_a.data.bits <> io.acc.rd.data.bits alu.io.acc_b.data.valid := Mux(dec.alu_use_imm, - tensorImm.data.valid, - io.acc.rd.data.valid & state === sExe) + tensorImm.data.valid, + io.acc.rd.data.valid & state === sExe) alu.io.acc_b.data.bits <> Mux(dec.alu_use_imm, - tensorImm.data.bits, - io.acc.rd.data.bits) + tensorImm.data.bits, + io.acc.rd.data.bits) // acc_o io.acc.wr.valid := alu.io.acc_y.data.valid diff --git a/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala b/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala index f982176c1e81..f2d295f66220 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala @@ -47,9 +47,9 @@ class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) extends Module { } /** PipeAdder - * - * This unit loads input bits into register and performs addition in the next cycle - */ + * + * This unit loads input bits into register and performs addition in the next cycle + */ class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends Module { val outBits = Math.max(aBits, bBits) + 1 val io = IO(new Bundle { @@ -65,10 +65,10 @@ class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends Module { } /** Adder - * - * This unit wires input bits to an adder directly. - * The output comes out of combinational logic without waiting for another cycle. - */ + * + * This unit wires input bits to an adder directly. + * The output comes out of combinational logic without waiting for another cycle. + */ class Adder(aBits: Int = 8, bBits: Int = 8) extends Module { val outBits = Math.max(aBits, bBits) + 1 val io = IO(new Bundle { @@ -86,8 +86,7 @@ class Adder(aBits: Int = 8, bBits: Int = 8) extends Module { } /** Pipelined DotProduct based on MAC and PipeAdder */ -class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16) - extends Module { +class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16) extends Module { val errorMsg = s"\n\n[VTA] [DotProduct] size must be greater than 4 and a power of 2\n\n" require(size >= 2 && isPow2(size), errorMsg) @@ -175,16 +174,15 @@ class MatrixVectorMultiplication(implicit p: Parameters) extends Module { } /** TensorGemm. - * - * This unit instantiate the MatrixVectorMultiplication and go over the - * micro-ops (uops) which are used to read inputs, weights and biases, - * and writes results back to the acc and out scratchpads. - * - * Also, the TensorGemm uses the reset field in the Gemm instruction to - * clear or zero-out the acc-scratchpad locations based on the micro-ops. - */ -class TensorGemm(debug: Boolean = false)(implicit p: Parameters) - extends Module { + * + * This unit instantiate the MatrixVectorMultiplication and go over the + * micro-ops (uops) which are used to read inputs, weights and biases, + * and writes results back to the acc and out scratchpads. + * + * Also, the TensorGemm uses the reset field in the Gemm instruction to + * clear or zero-out the acc-scratchpad locations based on the micro-ops. + */ +class TensorGemm(debug: Boolean = false)(implicit p: Parameters) extends Module { val io = IO(new Bundle { val start = Input(Bool()) val done = Output(Bool()) @@ -268,11 +266,10 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters) when((state === sReadTensor) && mvc.io.acc_o.data.valid) { // issue & commit inflight := inflight }.elsewhen(state === sReadTensor) { // issue a tensor - inflight := inflight + 1.U - } - .elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor - inflight := inflight - 1.U - } + inflight := inflight + 1.U + }.elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor + inflight := inflight - 1.U + } } when( @@ -305,17 +302,16 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters) inp_i := 0.U wgt_i := 0.U }.elsewhen(state === sReadUop && cnt_i === dec.lp_1) { - cnt_i := 0.U - acc_i := acc_o - inp_i := inp_o - wgt_i := wgt_o - } - .elsewhen(state === sExe && uop_idx === uop_end - 1.U) { - cnt_i := cnt_i + 1.U - acc_i := acc_i + dec.acc_1 - inp_i := inp_i + dec.inp_1 - wgt_i := wgt_i + dec.wgt_1 - } + cnt_i := 0.U + acc_i := acc_o + inp_i := inp_o + wgt_i := wgt_o + }.elsewhen(state === sExe && uop_idx === uop_end - 1.U) { + cnt_i := cnt_i + 1.U + acc_i := acc_i + dec.acc_1 + inp_i := inp_i + dec.inp_1 + wgt_i := wgt_i + dec.wgt_1 + } when(state === sComputeIdx && io.uop.data.valid) { uop_acc := io.uop.data.bits.u0 + acc_i @@ -351,9 +347,8 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters) mvc.io.acc_i.data <> io.acc.rd.data // acc_o - io.acc.wr.valid := mvc.io.acc_o.data.valid & Mux(dec.reset, - true.B, - wrpipe.io.deq.valid) + io.acc.wr.valid := mvc.io.acc_o.data.valid & + Mux(dec.reset, true.B, wrpipe.io.deq.valid) io.acc.wr.bits.idx := Mux(dec.reset, uop_acc, wrpipe.io.deq.bits) io.acc.wr.bits.data <> mvc.io.acc_o.data.bits @@ -371,10 +366,7 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters) } when(state === sReadTensor && ~dec.reset) { - printf("[TensorGemm] [uop] acc:%x inp:%x wgt:%x\n", - uop_acc, - uop_inp, - uop_wgt) + printf("[TensorGemm] [uop] acc:%x inp:%x wgt:%x\n", uop_acc, uop_inp, uop_wgt) } io.inp.rd.data.bits.zipWithIndex.foreach { diff --git a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala index d184cd2c286a..f5cc849547a6 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorLoad.scala @@ -25,13 +25,13 @@ import vta.util.config._ import vta.shell._ /** TensorLoad. - * - * Load 1D and 2D tensors from main memory (DRAM) to input/weight - * scratchpads (SRAM). Also, there is support for zero padding, while - * doing the load. Zero-padding works on the y and x axis, and it is - * managed by TensorPadCtrl. The TensorDataCtrl is in charge of - * handling the way tensors are stored on the scratchpads. - */ + * + * Load 1D and 2D tensors from main memory (DRAM) to input/weight + * scratchpads (SRAM). Also, there is support for zero padding, while + * doing the load. Zero-padding works on the y and x axis, and it is + * managed by TensorPadCtrl. The TensorDataCtrl is in charge of + * handling the way tensors are stored on the scratchpads. + */ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( implicit p: Parameters) extends Module { @@ -71,11 +71,10 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( when(dec.ypad_0 =/= 0.U) { state := sYPad0 }.elsewhen(dec.xpad_0 =/= 0.U) { - state := sXPad0 - } - .otherwise { - state := sReadCmd - } + state := sXPad0 + }.otherwise { + state := sReadCmd + } } } is(sYPad0) { @@ -213,13 +212,12 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( waddr_cur := dec.sram_offset waddr_nxt := dec.sram_offset }.elsewhen((io.vme_rd.data - .fire() || isZeroPad) && set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U) { - waddr_cur := waddr_cur + 1.U - } - .elsewhen(dataCtrl.io.stride) { - waddr_cur := waddr_nxt + dec.xsize - waddr_nxt := waddr_nxt + dec.xsize - } + .fire() || isZeroPad) && set === (tp.tensorLength - 1).U && tag === (tp.numMemBlock - 1).U) { + waddr_cur := waddr_cur + 1.U + }.elsewhen(dataCtrl.io.stride) { + waddr_cur := waddr_nxt + dec.xsize + waddr_nxt := waddr_nxt + dec.xsize + } val tensorFile = Seq.fill(tp.tensorLength) { SyncReadMem(tp.memDepth, Vec(tp.numMemBlock, UInt(tp.memBlockBits.W))) @@ -241,8 +239,8 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( val tdata = io.tensor.wr.bits.data(i).asUInt.asTypeOf(wdata(i)) val muxWen = Mux(state === sIdle, - io.tensor.wr.valid, - (io.vme_rd.data.fire() | isZeroPad) & set === i.U) + io.tensor.wr.valid, + (io.vme_rd.data.fire() | isZeroPad) & set === i.U) val muxWaddr = Mux(state === sIdle, io.tensor.wr.bits.idx, waddr_cur) val muxWdata = Mux(state === sIdle, tdata, wdata(i)) val muxWmask = Mux(state === sIdle, no_mask, wmask(i)) @@ -274,8 +272,8 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( if (tensorType == "inp") { when(io.vme_rd.cmd.fire()) { printf("[TensorLoad] [inp] cmd addr:%x len:%x\n", - dataCtrl.io.addr, - dataCtrl.io.len) + dataCtrl.io.addr, + dataCtrl.io.len) } when(state === sYPad0) { printf("[TensorLoad] [inp] sYPad0\n") @@ -292,14 +290,14 @@ class TensorLoad(tensorType: String = "none", debug: Boolean = false)( } else if (tensorType == "wgt") { when(io.vme_rd.cmd.fire()) { printf("[TensorLoad] [wgt] cmd addr:%x len:%x\n", - dataCtrl.io.addr, - dataCtrl.io.len) + dataCtrl.io.addr, + dataCtrl.io.len) } } else if (tensorType == "acc") { when(io.vme_rd.cmd.fire()) { printf("[TensorLoad] [acc] cmd addr:%x len:%x\n", - dataCtrl.io.addr, - dataCtrl.io.len) + dataCtrl.io.addr, + dataCtrl.io.len) } } } diff --git a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala index 083a70c5b119..439023be0934 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorStore.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorStore.scala @@ -25,9 +25,9 @@ import vta.util.config._ import vta.shell._ /** TensorStore. - * - * Store 1D and 2D tensors from out-scratchpad (SRAM) to main memory (DRAM). - */ + * + * Store 1D and 2D tensors from out-scratchpad (SRAM) to main memory (DRAM). + */ class TensorStore(tensorType: String = "none", debug: Boolean = false)( implicit p: Parameters) extends Module { @@ -112,15 +112,14 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( } } }.elsewhen(xrem < xmax) { - state := sWriteCmd - xlen := xrem - xrem := 0.U - } - .otherwise { - state := sWriteCmd - xlen := xmax - 1.U - xrem := xrem - xmax - } + state := sWriteCmd + xlen := xrem + xrem := 0.U + }.otherwise { + state := sWriteCmd + xlen := xmax - 1.U + xrem := xrem - xmax + } } } } @@ -176,13 +175,12 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( raddr_cur := dec.sram_offset raddr_nxt := dec.sram_offset }.elsewhen(io.vme_wr.data - .fire() && set === (tensorLength - 1).U && tag === (numMemBlock - 1).U) { - raddr_cur := raddr_cur + 1.U - } - .elsewhen(stride) { - raddr_cur := raddr_nxt + dec.xsize - raddr_nxt := raddr_nxt + dec.xsize - } + .fire() && set === (tensorLength - 1).U && tag === (numMemBlock - 1).U) { + raddr_cur := raddr_cur + 1.U + }.elsewhen(stride) { + raddr_cur := raddr_nxt + dec.xsize + raddr_nxt := raddr_nxt + dec.xsize + } val tread = Seq.tabulate(tensorLength) { i => i.U -> @@ -199,14 +197,11 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( waddr_nxt := io.baddr | (maskOffset & (dec.dram_offset << log2Ceil( elemBytes))) }.elsewhen(state === sWriteAck && io.vme_wr.ack && xrem =/= 0.U) { - waddr_cur := waddr_cur + xmax_bytes - } - .elsewhen(stride) { - waddr_cur := waddr_nxt + (dec.xstride << log2Ceil( - tensorLength * tensorWidth)) - waddr_nxt := waddr_nxt + (dec.xstride << log2Ceil( - tensorLength * tensorWidth)) - } + waddr_cur := waddr_cur + xmax_bytes + }.elsewhen(stride) { + waddr_cur := waddr_nxt + (dec.xstride << log2Ceil(tensorLength * tensorWidth)) + waddr_nxt := waddr_nxt + (dec.xstride << log2Ceil(tensorLength * tensorWidth)) + } io.vme_wr.cmd.valid := state === sWriteCmd io.vme_wr.cmd.bits.addr := waddr_cur @@ -231,12 +226,7 @@ class TensorStore(tensorType: String = "none", debug: Boolean = false)( if (debug) { when(io.vme_wr.cmd.fire()) { printf("[TensorStore] ysize:%x ycnt:%x raddr:%x waddr:%x len:%x rem:%x\n", - ysize, - ycnt, - raddr_cur, - waddr_cur, - xlen, - xrem) + ysize, ycnt, raddr_cur, waddr_cur, xlen, xrem) } when(io.vme_wr.data.fire()) { printf("[TensorStore] data:%x\n", io.vme_wr.data.bits) diff --git a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala index 99e9012b71b8..6e6f7e776c0e 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorUtil.scala @@ -25,19 +25,18 @@ import vta.util.config._ import vta.shell._ /** TensorParams. - * - * This Bundle derives parameters for each tensorType, including inputs (inp), - * weights (wgt), biases (acc), and outputs (out). This is used to avoid - * doing the same boring calculations over and over again. - */ -class TensorParams(tensorType: String = "none")(implicit p: Parameters) - extends Bundle { + * + * This Bundle derives parameters for each tensorType, including inputs (inp), + * weights (wgt), biases (acc), and outputs (out). This is used to avoid + * doing the same boring calculations over and over again. + */ +class TensorParams(tensorType: String = "none")(implicit p: Parameters) extends Bundle { val errorMsg = s"\n\n[VTA] [TensorParams] only inp, wgt, acc, and out supported\n\n" require(tensorType == "inp" || tensorType == "wgt" - || tensorType == "acc" || tensorType == "out", - errorMsg) + || tensorType == "acc" || tensorType == "out", + errorMsg) val (tensorLength, tensorWidth, tensorElemBits) = if (tensorType == "inp") @@ -66,14 +65,14 @@ class TensorParams(tensorType: String = "none")(implicit p: Parameters) } /** TensorMaster. - * - * This interface issue read and write tensor-requests to scratchpads. For example, - * The TensorGemm unit uses this interface for managing the inputs (inp), weights (wgt), - * biases (acc), and outputs (out). - * - */ -class TensorMaster(tensorType: String = "none")(implicit p: Parameters) - extends TensorParams(tensorType) { + * + * This interface issue read and write tensor-requests to scratchpads. For example, + * The TensorGemm unit uses this interface for managing the inputs (inp), weights (wgt), + * biases (acc), and outputs (out). + * + */ +class TensorMaster(tensorType: String = "none") + (implicit p: Parameters) extends TensorParams(tensorType) { val rd = new Bundle { val idx = ValidIO(UInt(memAddrBits.W)) val data = Flipped( @@ -101,13 +100,13 @@ class TensorMaster(tensorType: String = "none")(implicit p: Parameters) } /** TensorClient. - * - * This interface receives read and write tensor-requests to scratchpads. For example, - * The TensorLoad unit uses this interface for receiving read and write requests from - * the TensorGemm unit. - */ -class TensorClient(tensorType: String = "none")(implicit p: Parameters) - extends TensorParams(tensorType) { + * + * This interface receives read and write tensor-requests to scratchpads. For example, + * The TensorLoad unit uses this interface for receiving read and write requests from + * the TensorGemm unit. + */ +class TensorClient(tensorType: String = "none") + (implicit p: Parameters) extends TensorParams(tensorType) { val rd = new Bundle { val idx = Flipped(ValidIO(UInt(memAddrBits.W))) val data = ValidIO( @@ -130,13 +129,13 @@ class TensorClient(tensorType: String = "none")(implicit p: Parameters) } /** TensorMasterData. - * - * This interface is only used for datapath only purposes and the direction convention - * is based on the TensorMaster interface, which means this is an input. This interface - * is used on datapath only module such MatrixVectorCore or AluVector. - */ -class TensorMasterData(tensorType: String = "none")(implicit p: Parameters) - extends TensorParams(tensorType) { + * + * This interface is only used for datapath only purposes and the direction convention + * is based on the TensorMaster interface, which means this is an input. This interface + * is used on datapath only module such MatrixVectorCore or AluVector. + */ +class TensorMasterData(tensorType: String = "none") + (implicit p: Parameters) extends TensorParams(tensorType) { val data = Flipped( ValidIO(Vec(tensorLength, Vec(tensorWidth, UInt(tensorElemBits.W))))) override def cloneType = @@ -144,13 +143,13 @@ class TensorMasterData(tensorType: String = "none")(implicit p: Parameters) } /** TensorClientData. - * - * This interface is only used for datapath only purposes and the direction convention - * is based on the TensorClient interface, which means this is an output. This interface - * is used on datapath only module such MatrixVectorCore or AluVector. - */ -class TensorClientData(tensorType: String = "none")(implicit p: Parameters) - extends TensorParams(tensorType) { + * + * This interface is only used for datapath only purposes and the direction convention + * is based on the TensorClient interface, which means this is an output. This interface + * is used on datapath only module such MatrixVectorCore or AluVector. + */ +class TensorClientData(tensorType: String = "none") + (implicit p: Parameters) extends TensorParams(tensorType) { val data = ValidIO( Vec(tensorLength, Vec(tensorWidth, UInt(tensorElemBits.W)))) override def cloneType = @@ -158,13 +157,12 @@ class TensorClientData(tensorType: String = "none")(implicit p: Parameters) } /** TensorPadCtrl. Zero-padding controller for TensorLoad. */ -class TensorPadCtrl(padType: String = "none", sizeFactor: Int = 1) - extends Module { +class TensorPadCtrl(padType: String = "none", sizeFactor: Int = 1) extends Module { val errorMsg = s"\n\n\n[VTA-ERROR] only YPad0, YPad1, XPad0, or XPad1 supported\n\n\n" require(padType == "YPad0" || padType == "YPad1" - || padType == "XPad0" || padType == "XPad1", - errorMsg) + || padType == "XPad0" || padType == "XPad1", + errorMsg) val io = IO(new Bundle { val start = Input(Bool()) @@ -233,9 +231,7 @@ class TensorPadCtrl(padType: String = "none", sizeFactor: Int = 1) /** TensorDataCtrl. Data controller for TensorLoad. */ class TensorDataCtrl(tensorType: String = "none", - sizeFactor: Int = 1, - strideFactor: Int = 1)(implicit p: Parameters) - extends Module { + sizeFactor: Int = 1, strideFactor: Int = 1)(implicit p: Parameters) extends Module { val mp = p(ShellKey).memParams val io = IO(new Bundle { val start = Input(Bool()) diff --git a/vta/hardware/chisel/src/main/scala/dpi/VTAHostDPI.scala b/vta/hardware/chisel/src/main/scala/dpi/VTAHostDPI.scala index 3318251fa454..a42891661851 100644 --- a/vta/hardware/chisel/src/main/scala/dpi/VTAHostDPI.scala +++ b/vta/hardware/chisel/src/main/scala/dpi/VTAHostDPI.scala @@ -32,9 +32,9 @@ trait VTAHostDPIParams { } /** Host master interface. - * - * This interface is tipically used by the Host - */ + * + * This interface is tipically used by the Host + */ class VTAHostDPIMaster extends Bundle with VTAHostDPIParams { val req = new Bundle { val valid = Output(Bool()) @@ -47,9 +47,9 @@ class VTAHostDPIMaster extends Bundle with VTAHostDPIParams { } /** Host client interface. - * - * This interface is tipically used by the Accelerator - */ + * + * This interface is tipically used by the Accelerator + */ class VTAHostDPIClient extends Bundle with VTAHostDPIParams { val req = new Bundle { val valid = Input(Bool()) @@ -62,9 +62,9 @@ class VTAHostDPIClient extends Bundle with VTAHostDPIParams { } /** Host DPI module. - * - * Wrapper for Host Verilog DPI module. - */ + * + * Wrapper for Host Verilog DPI module. + */ class VTAHostDPI extends BlackBox with HasBlackBoxResource { val io = IO(new Bundle { val clock = Input(Clock()) @@ -75,11 +75,10 @@ class VTAHostDPI extends BlackBox with HasBlackBoxResource { } /** Host DPI to AXI Converter. - * - * Convert Host DPI to AXI for VTAShell - */ -class VTAHostDPIToAXI(debug: Boolean = false)(implicit p: Parameters) - extends Module { + * + * Convert Host DPI to AXI for VTAShell + */ +class VTAHostDPIToAXI(debug: Boolean = false)(implicit p: Parameters) extends Module { val io = IO(new Bundle { val dpi = new VTAHostDPIClient val axi = new AXILiteMaster(p(ShellKey).hostParams) diff --git a/vta/hardware/chisel/src/main/scala/dpi/VTAMemDPI.scala b/vta/hardware/chisel/src/main/scala/dpi/VTAMemDPI.scala index f46b778966fc..bffbc1c651cf 100644 --- a/vta/hardware/chisel/src/main/scala/dpi/VTAMemDPI.scala +++ b/vta/hardware/chisel/src/main/scala/dpi/VTAMemDPI.scala @@ -33,9 +33,9 @@ trait VTAMemDPIParams { } /** Memory master interface. - * - * This interface is tipically used by the Accelerator - */ + * + * This interface is tipically used by the Accelerator + */ class VTAMemDPIMaster extends Bundle with VTAMemDPIParams { val req = new Bundle { val valid = Output(Bool()) @@ -48,9 +48,9 @@ class VTAMemDPIMaster extends Bundle with VTAMemDPIParams { } /** Memory client interface. - * - * This interface is tipically used by the Host - */ + * + * This interface is tipically used by the Host + */ class VTAMemDPIClient extends Bundle with VTAMemDPIParams { val req = new Bundle { val valid = Input(Bool()) @@ -63,9 +63,9 @@ class VTAMemDPIClient extends Bundle with VTAMemDPIParams { } /** Memory DPI module. - * - * Wrapper for Memory Verilog DPI module. - */ + * + * Wrapper for Memory Verilog DPI module. + */ class VTAMemDPI extends BlackBox with HasBlackBoxResource { val io = IO(new Bundle { val clock = Input(Clock()) @@ -75,8 +75,7 @@ class VTAMemDPI extends BlackBox with HasBlackBoxResource { setResource("/verilog/VTAMemDPI.v") } -class VTAMemDPIToAXI(debug: Boolean = false)(implicit p: Parameters) - extends Module { +class VTAMemDPIToAXI(debug: Boolean = false)(implicit p: Parameters) extends Module { val io = IO(new Bundle { val dpi = new VTAMemDPIMaster val axi = new AXIClient(p(ShellKey).memParams) @@ -173,13 +172,13 @@ class VTAMemDPIToAXI(debug: Boolean = false)(implicit p: Parameters) } when(io.axi.r.fire()) { printf("[VTAMemDPIToAXI] [R] last:%x data:%x\n", - io.axi.r.bits.last, - io.axi.r.bits.data) + io.axi.r.bits.last, + io.axi.r.bits.data) } when(io.axi.w.fire()) { printf("[VTAMemDPIToAXI] [W] last:%x data:%x\n", - io.axi.w.bits.last, - io.axi.w.bits.data) + io.axi.w.bits.last, + io.axi.w.bits.data) } } } diff --git a/vta/hardware/chisel/src/main/scala/dpi/VTASimDPI.scala b/vta/hardware/chisel/src/main/scala/dpi/VTASimDPI.scala index 33b1101e55f8..2f2532804eec 100644 --- a/vta/hardware/chisel/src/main/scala/dpi/VTASimDPI.scala +++ b/vta/hardware/chisel/src/main/scala/dpi/VTASimDPI.scala @@ -26,9 +26,9 @@ import vta.interface.axi._ import vta.shell._ /** Sim DPI module. - * - * Wrapper for Sim Verilog DPI module. - */ + * + * Wrapper for Sim Verilog DPI module. + */ class VTASimDPI extends BlackBox with HasBlackBoxResource { val io = IO(new Bundle { val clock = Input(Clock()) diff --git a/vta/hardware/chisel/src/main/scala/interface/axi/AXI.scala b/vta/hardware/chisel/src/main/scala/interface/axi/AXI.scala index 8fd0fa896b61..515159075602 100644 --- a/vta/hardware/chisel/src/main/scala/interface/axi/AXI.scala +++ b/vta/hardware/chisel/src/main/scala/interface/axi/AXI.scala @@ -55,7 +55,7 @@ case class AXIParams( } abstract class AXIBase(params: AXIParams) - extends GenericParameterizedBundle(params) + extends GenericParameterizedBundle(params) // AXILite diff --git a/vta/hardware/chisel/src/main/scala/shell/Configs.scala b/vta/hardware/chisel/src/main/scala/shell/Configs.scala index 3c271f56fa17..b0c54029b1a4 100644 --- a/vta/hardware/chisel/src/main/scala/shell/Configs.scala +++ b/vta/hardware/chisel/src/main/scala/shell/Configs.scala @@ -25,59 +25,56 @@ import vta.util.config._ import vta.interface.axi._ /** PynqConfig. Shell configuration for Pynq */ -class PynqConfig - extends Config((site, here, up) => { - case ShellKey => - ShellParams( - hostParams = AXIParams(coherent = false, - addrBits = 16, - dataBits = 32, - lenBits = 8, - userBits = 1), - memParams = AXIParams(coherent = true, - addrBits = 32, - dataBits = 64, - lenBits = 8, - userBits = 1), - vcrParams = VCRParams(), - vmeParams = VMEParams() - ) - }) +class PynqConfig extends Config((site, here, up) => { + case ShellKey => + ShellParams( + hostParams = AXIParams(coherent = false, + addrBits = 16, + dataBits = 32, + lenBits = 8, + userBits = 1), + memParams = AXIParams(coherent = true, + addrBits = 32, + dataBits = 64, + lenBits = 8, + userBits = 1), + vcrParams = VCRParams(), + vmeParams = VMEParams() + ) +}) /** F1Config. Shell configuration for F1 */ -class F1Config - extends Config((site, here, up) => { - case ShellKey => - ShellParams( - hostParams = AXIParams(coherent = false, - addrBits = 16, - dataBits = 32, - lenBits = 8, - userBits = 1), - memParams = AXIParams(coherent = false, - addrBits = 64, - dataBits = 64, - lenBits = 8, - userBits = 1), - vcrParams = VCRParams(), - vmeParams = VMEParams() - ) - }) +class F1Config extends Config((site, here, up) => { + case ShellKey => + ShellParams( + hostParams = AXIParams(coherent = false, + addrBits = 16, + dataBits = 32, + lenBits = 8, + userBits = 1), + memParams = AXIParams(coherent = false, + addrBits = 64, + dataBits = 64, + lenBits = 8, + userBits = 1), + vcrParams = VCRParams(), + vmeParams = VMEParams() + ) +}) /** De10Config. Shell configuration for De10 */ -class De10Config - extends Config((site, here, up) => { - case ShellKey => - ShellParams( - hostParams = - AXIParams(addrBits = 16, dataBits = 32, idBits = 13, lenBits = 4), - memParams = AXIParams( - addrBits = 32, - dataBits = 64, - userBits = 5, - lenBits = 4, // limit to 16 beats, instead of 256 beats in AXI4 - coherent = true), - vcrParams = VCRParams(), - vmeParams = VMEParams() - ) - }) +class De10Config extends Config((site, here, up) => { + case ShellKey => + ShellParams( + hostParams = + AXIParams(addrBits = 16, dataBits = 32, idBits = 13, lenBits = 4), + memParams = AXIParams( + addrBits = 32, + dataBits = 64, + userBits = 5, + lenBits = 4, // limit to 16 beats, instead of 256 beats in AXI4 + coherent = true), + vcrParams = VCRParams(), + vmeParams = VMEParams() + ) +}) diff --git a/vta/hardware/chisel/src/main/scala/shell/IntelShell.scala b/vta/hardware/chisel/src/main/scala/shell/IntelShell.scala index 6eb22224e8d7..e1b6995decd0 100644 --- a/vta/hardware/chisel/src/main/scala/shell/IntelShell.scala +++ b/vta/hardware/chisel/src/main/scala/shell/IntelShell.scala @@ -25,10 +25,10 @@ import vta.interface.axi._ import vta.core._ /** IntelShell. - * - * The IntelShell is based on a VME, VCR and core. This creates a complete VTA - * system that can be used for simulation or real hardware. - */ + * + * The IntelShell is based on a VME, VCR and core. This creates a complete VTA + * system that can be used for simulation or real hardware. + */ class IntelShell(implicit p: Parameters) extends Module { val io = IO(new Bundle { val host = new AXIClient(p(ShellKey).hostParams) diff --git a/vta/hardware/chisel/src/main/scala/shell/SimShell.scala b/vta/hardware/chisel/src/main/scala/shell/SimShell.scala index 30b84d63dbc4..0909d1bfe47e 100644 --- a/vta/hardware/chisel/src/main/scala/shell/SimShell.scala +++ b/vta/hardware/chisel/src/main/scala/shell/SimShell.scala @@ -27,11 +27,11 @@ import vta.shell._ import vta.dpi._ /** VTAHost. - * - * This module translate the DPI protocol into AXI. This is a simulation only - * module and used to test host-to-VTA communication. This module should be updated - * for testing hosts using a different bus protocol, other than AXI. - */ + * + * This module translate the DPI protocol into AXI. This is a simulation only + * module and used to test host-to-VTA communication. This module should be updated + * for testing hosts using a different bus protocol, other than AXI. + */ class VTAHost(implicit p: Parameters) extends Module { val io = IO(new Bundle { val axi = new AXILiteMaster(p(ShellKey).hostParams) @@ -45,11 +45,11 @@ class VTAHost(implicit p: Parameters) extends Module { } /** VTAMem. - * - * This module translate the DPI protocol into AXI. This is a simulation only - * module and used to test VTA-to-memory communication. This module should be updated - * for testing memories using a different bus protocol, other than AXI. - */ + * + * This module translate the DPI protocol into AXI. This is a simulation only + * module and used to test VTA-to-memory communication. This module should be updated + * for testing memories using a different bus protocol, other than AXI. + */ class VTAMem(implicit p: Parameters) extends Module { val io = IO(new Bundle { val axi = new AXIClient(p(ShellKey).memParams) @@ -63,12 +63,12 @@ class VTAMem(implicit p: Parameters) extends Module { } /** VTASim. - * - * This module is used to handle hardware simulation thread, such as halting - * or terminating the simulation thread. The sim_wait port is used to halt - * the simulation thread when it is asserted and resume it when it is - * de-asserted. - */ + * + * This module is used to handle hardware simulation thread, such as halting + * or terminating the simulation thread. The sim_wait port is used to halt + * the simulation thread when it is asserted and resume it when it is + * de-asserted. + */ class VTASim(implicit p: Parameters) extends MultiIOModule { val sim_wait = IO(Output(Bool())) val sim = Module(new VTASimDPI) @@ -78,11 +78,11 @@ class VTASim(implicit p: Parameters) extends MultiIOModule { } /** SimShell. - * - * The simulation shell instantiate the sim, host and memory DPI modules that - * are connected to the VTAShell. An extra clock, sim_clock, is used to eval - * the VTASim DPI function when the main simulation clock is on halt state. - */ + * + * The simulation shell instantiate the sim, host and memory DPI modules that + * are connected to the VTAShell. An extra clock, sim_clock, is used to eval + * the VTASim DPI function when the main simulation clock is on halt state. + */ class SimShell(implicit p: Parameters) extends MultiIOModule { val mem = IO(new AXIClient(p(ShellKey).memParams)) val host = IO(new AXILiteMaster(p(ShellKey).hostParams)) diff --git a/vta/hardware/chisel/src/main/scala/shell/VCR.scala b/vta/hardware/chisel/src/main/scala/shell/VCR.scala index 517f58179b5c..3e74a256d537 100644 --- a/vta/hardware/chisel/src/main/scala/shell/VCR.scala +++ b/vta/hardware/chisel/src/main/scala/shell/VCR.scala @@ -26,9 +26,9 @@ import vta.util.genericbundle._ import vta.interface.axi._ /** VCR parameters. - * - * These parameters are used on VCR interfaces and modules. - */ + * + * These parameters are used on VCR interfaces and modules. + */ case class VCRParams() { val nCtrl = 1 val nECnt = 1 @@ -38,14 +38,13 @@ case class VCRParams() { } /** VCRBase. Parametrize base class. */ -abstract class VCRBase(implicit p: Parameters) - extends GenericParameterizedBundle(p) +abstract class VCRBase(implicit p: Parameters) extends GenericParameterizedBundle(p) /** VCRMaster. - * - * This is the master interface used by VCR in the VTAShell to control - * the Core unit. - */ + * + * This is the master interface used by VCR in the VTAShell to control + * the Core unit. + */ class VCRMaster(implicit p: Parameters) extends VCRBase { val vp = p(ShellKey).vcrParams val mp = p(ShellKey).memParams @@ -57,10 +56,10 @@ class VCRMaster(implicit p: Parameters) extends VCRBase { } /** VCRClient. - * - * This is the client interface used by the Core module to communicate - * to the VCR in the VTAShell. - */ + * + * This is the client interface used by the Core module to communicate + * to the VCR in the VTAShell. + */ class VCRClient(implicit p: Parameters) extends VCRBase { val vp = p(ShellKey).vcrParams val mp = p(ShellKey).memParams @@ -72,12 +71,12 @@ class VCRClient(implicit p: Parameters) extends VCRBase { } /** VTA Control Registers (VCR). - * - * This unit provides control registers (32 and 64 bits) to be used by a control' - * unit, typically a host processor. These registers are read-only by the core - * at the moment but this will likely change once we add support to general purpose - * registers that could be used as event counters by the Core unit. - */ + * + * This unit provides control registers (32 and 64 bits) to be used by a control' + * unit, typically a host processor. These registers are read-only by the core + * at the moment but this will likely change once we add support to general purpose + * registers that could be used as event counters by the Core unit. + */ class VCR(implicit p: Parameters) extends Module { val io = IO(new Bundle { val host = new AXILiteClient(p(ShellKey).hostParams) diff --git a/vta/hardware/chisel/src/main/scala/shell/VME.scala b/vta/hardware/chisel/src/main/scala/shell/VME.scala index 949929a179fc..41b24d1ba7aa 100644 --- a/vta/hardware/chisel/src/main/scala/shell/VME.scala +++ b/vta/hardware/chisel/src/main/scala/shell/VME.scala @@ -26,27 +26,26 @@ import vta.util.genericbundle._ import vta.interface.axi._ /** VME parameters. - * - * These parameters are used on VME interfaces and modules. - */ + * + * These parameters are used on VME interfaces and modules. + */ case class VMEParams() { val nReadClients: Int = 5 val nWriteClients: Int = 1 require(nReadClients > 0, - s"\n\n[VTA] [VMEParams] nReadClients must be larger than 0\n\n") + s"\n\n[VTA] [VMEParams] nReadClients must be larger than 0\n\n") require( nWriteClients == 1, s"\n\n[VTA] [VMEParams] nWriteClients must be 1, only one-write-client support atm\n\n") } /** VMEBase. Parametrize base class. */ -abstract class VMEBase(implicit p: Parameters) - extends GenericParameterizedBundle(p) +abstract class VMEBase(implicit p: Parameters) extends GenericParameterizedBundle(p) /** VMECmd. - * - * This interface is used for creating write and read requests to memory. - */ + * + * This interface is used for creating write and read requests to memory. + */ class VMECmd(implicit p: Parameters) extends VMEBase { val addrBits = p(ShellKey).memParams.addrBits val lenBits = p(ShellKey).memParams.lenBits @@ -55,10 +54,10 @@ class VMECmd(implicit p: Parameters) extends VMEBase { } /** VMEReadMaster. - * - * This interface is used by modules inside the core to generate read requests - * and receive responses from VME. - */ + * + * This interface is used by modules inside the core to generate read requests + * and receive responses from VME. + */ class VMEReadMaster(implicit p: Parameters) extends Bundle { val dataBits = p(ShellKey).memParams.dataBits val cmd = Decoupled(new VMECmd) @@ -68,10 +67,10 @@ class VMEReadMaster(implicit p: Parameters) extends Bundle { } /** VMEReadClient. - * - * This interface is used by the VME to receive read requests and generate - * responses to modules inside the core. - */ + * + * This interface is used by the VME to receive read requests and generate + * responses to modules inside the core. + */ class VMEReadClient(implicit p: Parameters) extends Bundle { val dataBits = p(ShellKey).memParams.dataBits val cmd = Flipped(Decoupled(new VMECmd)) @@ -81,10 +80,10 @@ class VMEReadClient(implicit p: Parameters) extends Bundle { } /** VMEWriteMaster. - * - * This interface is used by modules inside the core to generate write requests - * to the VME. - */ + * + * This interface is used by modules inside the core to generate write requests + * to the VME. + */ class VMEWriteMaster(implicit p: Parameters) extends Bundle { val dataBits = p(ShellKey).memParams.dataBits val cmd = Decoupled(new VMECmd) @@ -95,10 +94,10 @@ class VMEWriteMaster(implicit p: Parameters) extends Bundle { } /** VMEWriteClient. - * - * This interface is used by the VME to handle write requests from modules inside - * the core. - */ + * + * This interface is used by the VME to handle write requests from modules inside + * the core. + */ class VMEWriteClient(implicit p: Parameters) extends Bundle { val dataBits = p(ShellKey).memParams.dataBits val cmd = Flipped(Decoupled(new VMECmd)) @@ -109,10 +108,10 @@ class VMEWriteClient(implicit p: Parameters) extends Bundle { } /** VMEMaster. - * - * Pack nRd number of VMEReadMaster interfaces and nWr number of VMEWriteMaster - * interfaces. - */ + * + * Pack nRd number of VMEReadMaster interfaces and nWr number of VMEWriteMaster + * interfaces. + */ class VMEMaster(implicit p: Parameters) extends Bundle { val nRd = p(ShellKey).vmeParams.nReadClients val nWr = p(ShellKey).vmeParams.nWriteClients @@ -121,10 +120,10 @@ class VMEMaster(implicit p: Parameters) extends Bundle { } /** VMEClient. - * - * Pack nRd number of VMEReadClient interfaces and nWr number of VMEWriteClient - * interfaces. - */ + * + * Pack nRd number of VMEReadClient interfaces and nWr number of VMEWriteClient + * interfaces. + */ class VMEClient(implicit p: Parameters) extends Bundle { val nRd = p(ShellKey).vmeParams.nReadClients val nWr = p(ShellKey).vmeParams.nWriteClients @@ -133,10 +132,10 @@ class VMEClient(implicit p: Parameters) extends Bundle { } /** VTA Memory Engine (VME). - * - * This unit multiplexes the memory controller interface for the Core. Currently, - * it supports single-writer and multiple-reader mode and it is also based on AXI. - */ + * + * This unit multiplexes the memory controller interface for the Core. Currently, + * it supports single-writer and multiple-reader mode and it is also based on AXI. + */ class VME(implicit p: Parameters) extends Module { val io = IO(new Bundle { val mem = new AXIMaster(p(ShellKey).memParams) diff --git a/vta/hardware/chisel/src/main/scala/shell/VTAShell.scala b/vta/hardware/chisel/src/main/scala/shell/VTAShell.scala index 782aeae351e1..650a1c56c352 100644 --- a/vta/hardware/chisel/src/main/scala/shell/VTAShell.scala +++ b/vta/hardware/chisel/src/main/scala/shell/VTAShell.scala @@ -35,10 +35,10 @@ case class ShellParams( case object ShellKey extends Field[ShellParams] /** VTAShell. - * - * The VTAShell is based on a VME, VCR and core. This creates a complete VTA - * system that can be used for simulation or real hardware. - */ + * + * The VTAShell is based on a VME, VCR and core. This creates a complete VTA + * system that can be used for simulation or real hardware. + */ class VTAShell(implicit p: Parameters) extends Module { val io = IO(new Bundle { val host = new AXILiteClient(p(ShellKey).hostParams) diff --git a/vta/hardware/chisel/src/main/scala/shell/XilinxShell.scala b/vta/hardware/chisel/src/main/scala/shell/XilinxShell.scala index ec7bffb89657..28f95ea36bc4 100644 --- a/vta/hardware/chisel/src/main/scala/shell/XilinxShell.scala +++ b/vta/hardware/chisel/src/main/scala/shell/XilinxShell.scala @@ -25,10 +25,10 @@ import vta.util.config._ import vta.interface.axi._ /** XilinxShell. - * - * This is a wrapper shell mostly used to match Xilinx convention naming, - * therefore we can pack VTA as an IP for IPI based flows. - */ + * + * This is a wrapper shell mostly used to match Xilinx convention naming, + * therefore we can pack VTA as an IP for IPI based flows. + */ class XilinxShell(implicit p: Parameters) extends RawModule { val hp = p(ShellKey).hostParams diff --git a/vta/hardware/chisel/src/main/scala/util/Config.scala b/vta/hardware/chisel/src/main/scala/util/Config.scala index 41104c44810a..d63d95665571 100644 --- a/vta/hardware/chisel/src/main/scala/util/Config.scala +++ b/vta/hardware/chisel/src/main/scala/util/Config.scala @@ -46,7 +46,7 @@ abstract class Parameters extends View { new ChainParameters(this, x) final def alter( - f: (View, View, View) => PartialFunction[Any, Any]): Parameters = + f: (View, View, View) => PartialFunction[Any, Any]): Parameters = Parameters(f) ++ this final def alterPartial(f: PartialFunction[Any, Any]): Parameters = @@ -56,8 +56,8 @@ abstract class Parameters extends View { new MapParameters(m) ++ this protected[config] def chain[T](site: View, - tail: View, - pname: Field[T]): Option[T] + tail: View, + pname: Field[T]): Option[T] protected[config] def find[T](pname: Field[T], site: View) = chain(site, new TerminalView, pname) } diff --git a/vta/hardware/chisel/src/main/scala/util/GenericParameterizedBundle.scala b/vta/hardware/chisel/src/main/scala/util/GenericParameterizedBundle.scala index db8f5d241222..063e76673396 100644 --- a/vta/hardware/chisel/src/main/scala/util/GenericParameterizedBundle.scala +++ b/vta/hardware/chisel/src/main/scala/util/GenericParameterizedBundle.scala @@ -23,8 +23,8 @@ package vta.util.genericbundle import chisel3._ -abstract class GenericParameterizedBundle[+T <: Object](val params: T) - extends Bundle { +abstract class GenericParameterizedBundle[+T <: Object] + (val params: T) extends Bundle { override def cloneType = { try { this.getClass.getConstructors.head diff --git a/vta/hardware/chisel/src/main/scala/vta/Configs.scala b/vta/hardware/chisel/src/main/scala/vta/Configs.scala index f137ab6c5330..350379b2ec22 100644 --- a/vta/hardware/chisel/src/main/scala/vta/Configs.scala +++ b/vta/hardware/chisel/src/main/scala/vta/Configs.scala @@ -26,11 +26,11 @@ import vta.core._ import vta.test._ /** VTA. - * - * This file contains all the configurations supported by VTA. - * These configurations are built in a mix/match form based on core - * and shell configurations. - */ + * + * This file contains all the configurations supported by VTA. + * These configurations are built in a mix/match form based on core + * and shell configurations. + */ class DefaultPynqConfig extends Config(new CoreConfig ++ new PynqConfig) class DefaultF1Config extends Config(new CoreConfig ++ new F1Config) class DefaultDe10Config extends Config(new CoreConfig ++ new De10Config)