diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..b7a72ce3 --- /dev/null +++ b/.clang-format @@ -0,0 +1,5 @@ +Language: Cpp +BasedOnStyle: LLVM +IndentWidth: 4 +TabWidth: 4 +UseTab: Never diff --git a/cosim/axi3-perf-example/ps.cpp b/cosim/axi3-perf-example/ps.cpp index 3dc294f5..d9ebbe13 100644 --- a/cosim/axi3-perf-example/ps.cpp +++ b/cosim/axi3-perf-example/ps.cpp @@ -4,148 +4,148 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include -#include "bsg_zynq_pl.h" -#include "bsg_printing.h" #include "bsg_argparse.h" +#include "bsg_printing.h" +#include "bsg_zynq_pl.h" +#include +#include #include -uint64_t get_microseconds() -{ +uint64_t get_microseconds() { struct timeval tv; - gettimeofday(&tv,NULL); - return tv.tv_sec*(uint64_t)1000000+tv.tv_usec; + gettimeofday(&tv, NULL); + return tv.tv_sec * (uint64_t)1000000 + tv.tv_usec; } int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - - // the read memory map is essentially - // - // 0,4,8,C: registers - // 10, 14: output fifo heads - // 18, 1C: output fifo counts - // 20,24,28,2C: input fifo counts - // 30: last address of write - - // the write memory map is essentially - // - // 0,4,8,C: registers - // 10,14,18,1C: input fifo - - int val1 = 0xDEADBEEF; - int val2 = 0xCAFEBABE; - int val3 = 0x0000CADE; - int val4 = 0xC0DE0000; - int mask1 = 0xf; - int mask2 = 0xf; - - // NEON - { - uint32_t words_per_xfer = 4; - const char *label="neon 4x32:"; - uint64_t start=get_microseconds(); - int limit = 100000; - for (int i = 0; i < limit/4; i++) { - zpl->shell_write(GP0_ADDR_BASE, 23, 0xf); - zpl->shell_write(GP0_ADDR_BASE, 12, 0xf); - zpl->shell_write(GP0_ADDR_BASE, 1, 0xf); - zpl->shell_write(GP0_ADDR_BASE, 43, 0xf); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + + // the read memory map is essentially + // + // 0,4,8,C: registers + // 10, 14: output fifo heads + // 18, 1C: output fifo counts + // 20,24,28,2C: input fifo counts + // 30: last address of write + + // the write memory map is essentially + // + // 0,4,8,C: registers + // 10,14,18,1C: input fifo + + int val1 = 0xDEADBEEF; + int val2 = 0xCAFEBABE; + int val3 = 0x0000CADE; + int val4 = 0xC0DE0000; + int mask1 = 0xf; + int mask2 = 0xf; + + // NEON + { + uint32_t words_per_xfer = 4; + const char *label = "neon 4x32:"; + uint64_t start = get_microseconds(); + int limit = 100000; + for (int i = 0; i < limit / 4; i++) { + zpl->shell_write(GP0_ADDR_BASE, 23, 0xf); + zpl->shell_write(GP0_ADDR_BASE, 12, 0xf); + zpl->shell_write(GP0_ADDR_BASE, 1, 0xf); + zpl->shell_write(GP0_ADDR_BASE, 43, 0xf); + } + uint64_t net = get_microseconds() - start; + printf("%s: %llu microseconds for %d xfers: %f words per microsecond\n", + label, net, limit, ((double)limit) / ((double)net)); + } + + // Single AXI request + { + uint32_t words_per_xfer = 1; + const char *label = "int32 :"; + uint64_t start = get_microseconds(); + int limit = 100000; + for (int i = 0; i < limit; i++) { + zpl->shell_write4(GP0_ADDR_BASE, 23, 12, 1, 43); + } + uint64_t net = get_microseconds() - start; + printf("%s: %llu microseconds for %d xfers: %f words per microsecond\n", + label, net, limit, ((double)limit) / ((double)net)); } - uint64_t net=get_microseconds()-start; - printf("%s: %llu microseconds for %d xfers: %f words per microsecond\n", label, net, limit, ((double) limit) / ((double) net) ); - } - - // Single AXI request - { - uint32_t words_per_xfer = 1; - const char *label="int32 :"; - uint64_t start=get_microseconds(); - int limit = 100000; - for (int i = 0; i < limit; i++) { - zpl->shell_write4(GP0_ADDR_BASE, 23, 12, 1, 43); + + if (1) { + // write to two registers, checking our address snoop to see + // actual address that was received over the AXI bus + zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); + assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x0); + zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); + assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x4); + // 8,12 + + // check output fifo counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == 0)); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == 0)); + + // check input fifo counters + bsg_pr_dbg_ps("%d\n", zpl->shell_read(0x20 + GP0_ADDR_BASE)); + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x28 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x2C + GP0_ADDR_BASE) == 4)); + + // write to fifos + zpl->shell_write(0x10 + GP0_ADDR_BASE, val3, mask1); + + // checker counters + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (3))); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); + + // write to fifo + zpl->shell_write(0x10 + GP0_ADDR_BASE, val1, mask1); + // checker counters + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (2))); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); + + zpl->shell_write(0x14 + GP0_ADDR_BASE, val4, mask2); + zpl->shell_write(0x14 + GP0_ADDR_BASE, val2, mask2); + + // checker counters + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); + + // check register writes + assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (val1))); + assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == (val2))); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (2))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); + + // check that the output fifo has the sum of the input fifos + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val3 + val4))); + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val1 + val2))); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); + + // try a different set of input and output fifos + zpl->shell_write(0x18 + GP0_ADDR_BASE, val1, mask1); + zpl->shell_write(0x1C + GP0_ADDR_BASE, val2, mask2); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (1))); + + // read value out of fifo + assert((zpl->shell_read(0x14 + GP0_ADDR_BASE) == (val1 + val2))); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); } - uint64_t net=get_microseconds()-start; - printf("%s: %llu microseconds for %d xfers: %f words per microsecond\n", label, net, limit, ((double) limit) / ((double) net) ); - } - - if (1) { - // write to two registers, checking our address snoop to see - // actual address that was received over the AXI bus - zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); - assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x0); - zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); - assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x4); - // 8,12 - - // check output fifo counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == 0)); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == 0)); - - // check input fifo counters - bsg_pr_dbg_ps("%d\n", zpl->shell_read(0x20 + GP0_ADDR_BASE)); - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x28 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x2C + GP0_ADDR_BASE) == 4)); - - // write to fifos - zpl->shell_write(0x10 + GP0_ADDR_BASE, val3, mask1); - - // checker counters - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (3))); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); - - // write to fifo - zpl->shell_write(0x10 + GP0_ADDR_BASE, val1, mask1); - // checker counters - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (2))); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); - - zpl->shell_write(0x14 + GP0_ADDR_BASE, val4, mask2); - zpl->shell_write(0x14 + GP0_ADDR_BASE, val2, mask2); - - // checker counters - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); - - // check register writes - assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (val1))); - assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == (val2))); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (2))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); - - // check that the output fifo has the sum of the input fifos - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val3 + val4))); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val1 + val2))); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); - - // try a different set of input and output fifos - zpl->shell_write(0x18 + GP0_ADDR_BASE, val1, mask1); - zpl->shell_write(0x1C + GP0_ADDR_BASE, val2, mask2); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (1))); - - // read value out of fifo - assert((zpl->shell_read(0x14 + GP0_ADDR_BASE) == (val1 + val2))); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); - - } - printf("## everything passed; at end of test\n"); - zpl->done(); - - delete zpl; + printf("## everything passed; at end of test\n"); + zpl->done(); + + delete zpl; } diff --git a/cosim/axis-example/ps.cpp b/cosim/axis-example/ps.cpp index 22330711..668e43d7 100644 --- a/cosim/axis-example/ps.cpp +++ b/cosim/axis-example/ps.cpp @@ -4,42 +4,45 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include -#include "bsg_zynq_pl.h" -#include "bsg_printing.h" #include "bsg_argparse.h" +#include "bsg_printing.h" +#include "bsg_zynq_pl.h" +#include +#include #include #include "ps.hpp" int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - zpl->start(); - - zpl->shell_write(GP0_WR_CSR_TINIT, 0, 0xf); - assert(zpl->shell_read(GP0_RD_CSR_TINIT) == 0); - - for (int i = 0; i < 16; i++) { - while (!zpl->shell_read(GP0_RD_PS2PL_FIFO_CTRS)); - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, i, 0xf); - } - - // Make sure the fifo data is correct - for (int i = 0; i < 16; i++) { - while (!zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS)); - assert(zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA) == i); - } - - // Make sure TLAST was set correctly - assert(zpl->shell_read(GP0_RD_CSR_TSTATUS) == 1); - - printf("## everything passed; at end of test\n"); - for (int i = 0; i < 50; i++) zpl->tick(); - zpl->stop(); - zpl->done(); - - delete zpl; - return 0; + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + zpl->start(); + + zpl->shell_write(GP0_WR_CSR_TINIT, 0, 0xf); + assert(zpl->shell_read(GP0_RD_CSR_TINIT) == 0); + + for (int i = 0; i < 16; i++) { + while (!zpl->shell_read(GP0_RD_PS2PL_FIFO_CTRS)) + ; + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, i, 0xf); + } + + // Make sure the fifo data is correct + for (int i = 0; i < 16; i++) { + while (!zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS)) + ; + assert(zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA) == i); + } + + // Make sure TLAST was set correctly + assert(zpl->shell_read(GP0_RD_CSR_TSTATUS) == 1); + + printf("## everything passed; at end of test\n"); + for (int i = 0; i < 50; i++) + zpl->tick(); + zpl->stop(); + zpl->done(); + + delete zpl; + return 0; } diff --git a/cosim/black-parrot-example/ps.cpp b/cosim/black-parrot-example/ps.cpp index f3a5ecd6..137d67c9 100644 --- a/cosim/black-parrot-example/ps.cpp +++ b/cosim/black-parrot-example/ps.cpp @@ -4,20 +4,20 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include +#include #include #include -#include #include +#include +#include +#include #include -#include #include "ps.hpp" +#include "bsg_printing.h" #include "bsg_tag_bitbang.h" #include "bsg_zynq_pl.h" -#include "bsg_printing.h" #ifndef FREE_DRAM #define FREE_DRAM 0 @@ -49,435 +49,446 @@ std::queue getchar_queue; std::bitset done_vec; void *monitor(void *vargp) { - char c; - while(1) { - c = getchar(); - if(c != -1) - getchar_queue.push(c); - } - bsg_pr_info("Exiting from pthread\n"); - - return NULL; + char c; + while (1) { + c = getchar(); + if (c != -1) + getchar_queue.push(c); + } + bsg_pr_info("Exiting from pthread\n"); + + return NULL; } inline uint64_t get_counter_64(bsg_zynq_pl *zpl, uint64_t addr) { - uint64_t val; - do { - uint64_t val_hi = zpl->shell_read(addr + 4); - uint64_t val_lo = zpl->shell_read(addr + 0); - uint64_t val_hi2 = zpl->shell_read(addr + 4); - if (val_hi == val_hi2) { - val = val_hi << 32; - val += val_lo; - return val; - } else - bsg_pr_err("ps.cpp: timer wrapover!\n"); - } while (1); + uint64_t val; + do { + uint64_t val_hi = zpl->shell_read(addr + 4); + uint64_t val_lo = zpl->shell_read(addr + 0); + uint64_t val_hi2 = zpl->shell_read(addr + 4); + if (val_hi == val_hi2) { + val = val_hi << 32; + val += val_lo; + return val; + } else + bsg_pr_err("ps.cpp: timer wrapover!\n"); + } while (1); } int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - - long data; - long val1 = 0x1; - long val2 = 0x0; - long mask1 = 0xf; - long mask2 = 0xf; - - pthread_t thread_id; - - int32_t val; - bsg_pr_info("ps.cpp: reading four base registers\n"); - bsg_pr_info("ps.cpp: reset(lo)=%d, bitbang=%d, dram_init=%d, dram_base=%d\n", - zpl->shell_read(GP0_RD_CSR_SYS_RESETN), - zpl->shell_read(GP0_RD_CSR_TAG_BITBANG), - zpl->shell_read(GP0_RD_CSR_DRAM_INITED), - val = zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); - - bsg_pr_info("ps.cpp: attempting to write and read register 0x8\n"); - - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, mask1); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, mask1); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (val))); - - bsg_pr_info("ps.cpp: successfully wrote and read registers in bsg_zynq_shell " - "(verified ARM GP0 connection)\n"); - - bsg_tag_bitbang *btb = new bsg_tag_bitbang(zpl, GP0_WR_CSR_TAG_BITBANG, TAG_NUM_CLIENTS, TAG_MAX_LEN); - bsg_tag_client *pl_reset_client = new bsg_tag_client(TAG_CLIENT_PL_RESET_ID, TAG_CLIENT_PL_RESET_WIDTH); - bsg_tag_client *wd_reset_client = new bsg_tag_client(TAG_CLIENT_WD_RESET_ID, TAG_CLIENT_WD_RESET_WIDTH); - - // Reset the bsg tag master - btb->reset_master(); - // Reset bsg client0 - btb->reset_client(pl_reset_client); - // Reset bsg client1 - btb->reset_client(wd_reset_client); - // Set bsg client0 to 1 (assert BP reset) - btb->set_client(pl_reset_client, 0x1); - // Set bsg client1 to 1 (assert WD reset) - btb->set_client(wd_reset_client, 0x1); - // Set bsg client0 to 0 (deassert BP reset) - btb->set_client(pl_reset_client, 0x0); - - // We need some additional toggles for data to propagate through - btb->idle(50); - // Deassert the active-low system reset as we finish initializing the whole system - zpl->shell_write(GP0_RD_CSR_SYS_RESETN, 0x1, 0xF); - - unsigned long phys_ptr; - volatile int *buf; - data = zpl->shell_read(GP0_RD_CSR_DRAM_INITED); - if (data == 0) { - bsg_pr_info( - "ps.cpp: CSRs do not contain a DRAM base pointer; calling allocate " - "dram with size %ld\n", - (long)DRAM_ALLOCATE_SIZE); - buf = (volatile int*)zpl->allocate_dram(DRAM_ALLOCATE_SIZE, &phys_ptr); - bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, mask1); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)(phys_ptr))); - bsg_pr_info("ps.cpp: wrote and verified base register\n"); - zpl->shell_write(GP0_WR_CSR_DRAM_INITED, 0x1, mask2); - assert(zpl->shell_read(GP0_RD_CSR_DRAM_INITED) == 1); - } else { - bsg_pr_info("ps.cpp: reusing dram base pointer %x\n", - zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); - } - - int outer = 1024 / 4; - - if (argc == 1) { - bsg_pr_warn( - "No nbf file specified, sleeping for 2^31 seconds (this will hold " - "onto allocated DRAM)\n"); - sleep(1U << 31); - delete zpl; - } + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - bsg_pr_info("ps.cpp: attempting to read mtime reg in BP CFG space, should " - "increase monotonically (testing ARM GP1 connections)\n"); + long data; + long val1 = 0x1; + long val2 = 0x0; + long mask1 = 0xf; + long mask2 = 0xf; - for (int q = 0; q < 10; q++) { - int z = get_counter_64(zpl, GP1_CSR_BASE_ADDR + 0x30bff8); - bsg_pr_dbg_ps("ps.cpp: %d-%d\n", q, z); - } + pthread_t thread_id; - bsg_pr_info("ps.cpp: attempting to read and write mtime reg in BP CFG space " - "(testing ARM GP1 connections)\n"); + int32_t val; + bsg_pr_info("ps.cpp: reading four base registers\n"); + bsg_pr_info( + "ps.cpp: reset(lo)=%d, bitbang=%d, dram_init=%d, dram_base=%d\n", + zpl->shell_read(GP0_RD_CSR_SYS_RESETN), + zpl->shell_read(GP0_RD_CSR_TAG_BITBANG), + zpl->shell_read(GP0_RD_CSR_DRAM_INITED), + val = zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); - bsg_pr_info("ps.cpp: reading mtimecmp\n"); - int y = zpl->shell_read(GP1_CSR_BASE_ADDR + 0x304000); + bsg_pr_info("ps.cpp: attempting to write and read register 0x8\n"); - bsg_pr_info("ps.cpp: writing mtimecmp\n"); - zpl->shell_write(GP1_CSR_BASE_ADDR + 0x304000, y + 1, mask1); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, mask1); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, mask1); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (val))); - bsg_pr_info("ps.cpp: reading mtimecmp\n"); - assert(zpl->shell_read(GP1_CSR_BASE_ADDR + 0x304000) == y + 1); + bsg_pr_info( + "ps.cpp: successfully wrote and read registers in bsg_zynq_shell " + "(verified ARM GP0 connection)\n"); + + bsg_tag_bitbang *btb = new bsg_tag_bitbang(zpl, GP0_WR_CSR_TAG_BITBANG, + TAG_NUM_CLIENTS, TAG_MAX_LEN); + bsg_tag_client *pl_reset_client = + new bsg_tag_client(TAG_CLIENT_PL_RESET_ID, TAG_CLIENT_PL_RESET_WIDTH); + bsg_tag_client *wd_reset_client = + new bsg_tag_client(TAG_CLIENT_WD_RESET_ID, TAG_CLIENT_WD_RESET_WIDTH); + + // Reset the bsg tag master + btb->reset_master(); + // Reset bsg client0 + btb->reset_client(pl_reset_client); + // Reset bsg client1 + btb->reset_client(wd_reset_client); + // Set bsg client0 to 1 (assert BP reset) + btb->set_client(pl_reset_client, 0x1); + // Set bsg client1 to 1 (assert WD reset) + btb->set_client(wd_reset_client, 0x1); + // Set bsg client0 to 0 (deassert BP reset) + btb->set_client(pl_reset_client, 0x0); + + // We need some additional toggles for data to propagate through + btb->idle(50); + // Deassert the active-low system reset as we finish initializing the whole + // system + zpl->shell_write(GP0_RD_CSR_SYS_RESETN, 0x1, 0xF); + + unsigned long phys_ptr; + volatile int *buf; + data = zpl->shell_read(GP0_RD_CSR_DRAM_INITED); + if (data == 0) { + bsg_pr_info( + "ps.cpp: CSRs do not contain a DRAM base pointer; calling allocate " + "dram with size %ld\n", + (long)DRAM_ALLOCATE_SIZE); + buf = (volatile int *)zpl->allocate_dram(DRAM_ALLOCATE_SIZE, &phys_ptr); + bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, mask1); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)(phys_ptr))); + bsg_pr_info("ps.cpp: wrote and verified base register\n"); + zpl->shell_write(GP0_WR_CSR_DRAM_INITED, 0x1, mask2); + assert(zpl->shell_read(GP0_RD_CSR_DRAM_INITED) == 1); + } else { + bsg_pr_info("ps.cpp: reusing dram base pointer %x\n", + zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); + } -#ifdef DRAM_TEST + int outer = 1024 / 4; - long num_times = DRAM_ALLOCATE_SIZE / 32768; - bsg_pr_info( - "ps.cpp: attempting to write L2 %ld times over %ld MB (testing ARM GP1 " - "and HP0 connections)\n", - num_times * outer, (long)((DRAM_ALLOCATE_SIZE) >> 20)); - zpl->shell_write(GP1_DRAM_BASE_ADDR, 0x12345678, mask1); - - for (int s = 0; s < outer; s++) - for (int t = 0; t < num_times; t++) { - zpl->shell_write(GP1_DRAM_BASE_ADDR + 32768 * t + s * 4, 0x1ADACACA + t + s, - mask1); + if (argc == 1) { + bsg_pr_warn( + "No nbf file specified, sleeping for 2^31 seconds (this will hold " + "onto allocated DRAM)\n"); + sleep(1U << 31); + delete zpl; } - bsg_pr_info("ps.cpp: finished write L2 %ld times over %ld MB\n", - num_times * outer, (long)((DRAM_ALLOCATE_SIZE) >> 20)); - int mismatches = 0; - int matches = 0; + bsg_pr_info("ps.cpp: attempting to read mtime reg in BP CFG space, should " + "increase monotonically (testing ARM GP1 connections)\n"); + + for (int q = 0; q < 10; q++) { + int z = get_counter_64(zpl, GP1_CSR_BASE_ADDR + 0x30bff8); + bsg_pr_dbg_ps("ps.cpp: %d-%d\n", q, z); + } + + bsg_pr_info( + "ps.cpp: attempting to read and write mtime reg in BP CFG space " + "(testing ARM GP1 connections)\n"); + + bsg_pr_info("ps.cpp: reading mtimecmp\n"); + int y = zpl->shell_read(GP1_CSR_BASE_ADDR + 0x304000); + + bsg_pr_info("ps.cpp: writing mtimecmp\n"); + zpl->shell_write(GP1_CSR_BASE_ADDR + 0x304000, y + 1, mask1); + + bsg_pr_info("ps.cpp: reading mtimecmp\n"); + assert(zpl->shell_read(GP1_CSR_BASE_ADDR + 0x304000) == y + 1); + +#ifdef DRAM_TEST + + long num_times = DRAM_ALLOCATE_SIZE / 32768; + bsg_pr_info( + "ps.cpp: attempting to write L2 %ld times over %ld MB (testing ARM GP1 " + "and HP0 connections)\n", + num_times * outer, (long)((DRAM_ALLOCATE_SIZE) >> 20)); + zpl->shell_write(GP1_DRAM_BASE_ADDR, 0x12345678, mask1); + + for (int s = 0; s < outer; s++) + for (int t = 0; t < num_times; t++) { + zpl->shell_write(GP1_DRAM_BASE_ADDR + 32768 * t + s * 4, + 0x1ADACACA + t + s, mask1); + } + bsg_pr_info("ps.cpp: finished write L2 %ld times over %ld MB\n", + num_times * outer, (long)((DRAM_ALLOCATE_SIZE) >> 20)); + + int mismatches = 0; + int matches = 0; #ifdef ZYNQ - for (int s = 0; s < outer; s++) - for (int t = 0; t < num_times; t++) - if (buf[(32768 * t + s * 4) / 4] == 0x1ADACACA + t + s) - matches++; - else - mismatches++; - - bsg_pr_info("ps.cpp: DIRECT access from ARM to DDR (some L1/L2 coherence " - "mismatches expected) %d matches, %d mismatches, %f\n", - matches, mismatches, - ((float)matches) / (float)(mismatches + matches)); + for (int s = 0; s < outer; s++) + for (int t = 0; t < num_times; t++) + if (buf[(32768 * t + s * 4) / 4] == 0x1ADACACA + t + s) + matches++; + else + mismatches++; + + bsg_pr_info("ps.cpp: DIRECT access from ARM to DDR (some L1/L2 coherence " + "mismatches expected) %d matches, %d mismatches, %f\n", + matches, mismatches, + ((float)matches) / (float)(mismatches + matches)); #endif - bsg_pr_info( - "ps.cpp: attempting to read L2 %ld times over %ld MB (testing ARM GP1 " - "and HP0 connections)\n", - num_times * outer, (long)((DRAM_ALLOCATE_SIZE) >> 20)); - for (int s = 0; s < outer; s++) - for (int t = 0; t < num_times; t++) - if (zpl->shell_read(GP1_DRAM_BASE_ADDR + 32768 * t + s * 4) == 0x1ADACACA + t + s) - matches++; - else - mismatches++; - - bsg_pr_info("ps.cpp: READ access through BP (some L1 coherence mismatch " - "expected): %d matches, %d mismatches, %f\n", - matches, mismatches, - ((float)matches) / (float)(mismatches + matches)); + bsg_pr_info( + "ps.cpp: attempting to read L2 %ld times over %ld MB (testing ARM GP1 " + "and HP0 connections)\n", + num_times * outer, (long)((DRAM_ALLOCATE_SIZE) >> 20)); + for (int s = 0; s < outer; s++) + for (int t = 0; t < num_times; t++) + if (zpl->shell_read(GP1_DRAM_BASE_ADDR + 32768 * t + s * 4) == + 0x1ADACACA + t + s) + matches++; + else + mismatches++; + + bsg_pr_info("ps.cpp: READ access through BP (some L1 coherence mismatch " + "expected): %d matches, %d mismatches, %f\n", + matches, mismatches, + ((float)matches) / (float)(mismatches + matches)); #endif // DRAM_TEST - // Must zero DRAM for FPGA Linux boot, because opensbi payload mode - // obliterates the section names of the payload (Linux) - if (ZERO_DRAM) { - bsg_pr_info("ps.cpp: Zero-ing DRAM (%d bytes)\n", (int)DRAM_ALLOCATE_SIZE); - for (int i = 0; i < DRAM_ALLOCATE_SIZE; i+=4) { - if (i % (1024*1024) == 0) bsg_pr_info("ps.cpp: zero-d %d MB\n", i/(1024*1024)); - zpl->shell_write(gp1_addr_base + i, 0x0, mask1); + // Must zero DRAM for FPGA Linux boot, because opensbi payload mode + // obliterates the section names of the payload (Linux) + if (ZERO_DRAM) { + bsg_pr_info("ps.cpp: Zero-ing DRAM (%d bytes)\n", + (int)DRAM_ALLOCATE_SIZE); + for (int i = 0; i < DRAM_ALLOCATE_SIZE; i += 4) { + if (i % (1024 * 1024) == 0) + bsg_pr_info("ps.cpp: zero-d %d MB\n", i / (1024 * 1024)); + zpl->shell_write(gp1_addr_base + i, 0x0, mask1); + } } - } - - bsg_pr_info("ps.cpp: beginning nbf load\n"); - nbf_load(zpl, argv[1]); - struct timespec start, end; - clock_gettime(CLOCK_MONOTONIC, &start); - unsigned long long minstret_start = get_counter_64(zpl, GP0_RD_MINSTRET); - unsigned long long mtime_start = get_counter_64(zpl, GP1_CSR_BASE_ADDR + 0x30bff8); - bsg_pr_info("ps.cpp: finished nbf load\n"); - - // Set bsg client1 to 0 (deassert WD reset) - btb->set_client(wd_reset_client, 0x1); - bsg_pr_info("ps.cpp: starting watchdog\n"); - // We need some additional toggles for data to propagate through - btb->idle(50); - zpl->start(); - - bsg_pr_info("ps.cpp: Starting scan thread\n"); - pthread_create(&thread_id, NULL, monitor, NULL); - - bsg_pr_info("ps.cpp: Starting i/o polling thread\n"); - while (1) { - // keep reading as long as there is data - if (zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS) != 0) { - decode_bp_output(zpl, zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA)); + + bsg_pr_info("ps.cpp: beginning nbf load\n"); + nbf_load(zpl, argv[1]); + struct timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); + unsigned long long minstret_start = get_counter_64(zpl, GP0_RD_MINSTRET); + unsigned long long mtime_start = + get_counter_64(zpl, GP1_CSR_BASE_ADDR + 0x30bff8); + bsg_pr_info("ps.cpp: finished nbf load\n"); + + // Set bsg client1 to 0 (deassert WD reset) + btb->set_client(wd_reset_client, 0x1); + bsg_pr_info("ps.cpp: starting watchdog\n"); + // We need some additional toggles for data to propagate through + btb->idle(50); + zpl->start(); + + bsg_pr_info("ps.cpp: Starting scan thread\n"); + pthread_create(&thread_id, NULL, monitor, NULL); + + bsg_pr_info("ps.cpp: Starting i/o polling thread\n"); + while (1) { + // keep reading as long as there is data + if (zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS) != 0) { + decode_bp_output(zpl, zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA)); + } + // break loop when all cores done + if (done_vec.all()) { + break; + } } - // break loop when all cores done - if (done_vec.all()) { - break; + + // Set bsg client1 to 1 (assert WD reset) + btb->set_client(wd_reset_client, 0x1); + bsg_pr_info("ps.cpp: stopping watchdog\n"); + // We need some additional toggles for data to propagate through + btb->idle(50); + zpl->stop(); + + unsigned long long minstret_stop = get_counter_64(zpl, GP0_RD_MINSTRET); + unsigned long long mtime_stop = + get_counter_64(zpl, GP1_CSR_BASE_ADDR + 0x30bff8); + // test delay for reading counter + unsigned long long counter_data = get_counter_64(zpl, GP0_RD_MINSTRET); + clock_gettime(CLOCK_MONOTONIC, &end); + setlocale(LC_NUMERIC, ""); + bsg_pr_info("ps.cpp: end polling i/o\n"); + bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", + minstret_start, minstret_start); + bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", + minstret_stop, minstret_stop); + unsigned long long minstret_delta = minstret_stop - minstret_start; + bsg_pr_info("ps.cpp: minstret delta: %'16llu (%16llx)\n", + minstret_delta, minstret_delta); + bsg_pr_info("ps.cpp: MTIME start: %'16llu (%16llx)\n", + mtime_start, mtime_start); + bsg_pr_info("ps.cpp: MTIME stop: %'16llu (%16llx)\n", + mtime_stop, mtime_stop); + unsigned long long mtime_delta = mtime_stop - mtime_start; + bsg_pr_info("ps.cpp: MTIME delta (=1/8 BP cycles): %'16llu (%16llx)\n", + mtime_delta, mtime_delta); + bsg_pr_info("ps.cpp: IPC : %'16f\n", + ((double)minstret_delta) / ((double)(mtime_delta)) / 8.0); + bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", + counter_data, counter_data); + unsigned long long diff_ns = + 1000LL * 1000LL * 1000LL * + ((unsigned long long)(end.tv_sec - start.tv_sec)) + + (end.tv_nsec - start.tv_nsec); + bsg_pr_info( + "ps.cpp: wall clock time : %'16llu (%16llx) ns\n", + diff_ns, diff_ns); + bsg_pr_info("ps.cpp: sim/emul speed : %'16.2f BP cycles " + "per minute\n", + mtime_delta * 8 / + ((double)(diff_ns) / (60.0 * 1000.0 * 1000.0 * 1000.0))); + + bsg_pr_info( + "ps.cpp: BP DRAM USAGE MASK (each bit is 8 MB): " + "%-8.8d%-8.8d%-8.8d%-8.8d\n", + zpl->shell_read(GP0_RD_MEM_PROF_3), zpl->shell_read(GP0_RD_MEM_PROF_2), + zpl->shell_read(GP0_RD_MEM_PROF_1), zpl->shell_read(GP0_RD_MEM_PROF_0)); + // in general we do not want to free the dram; the Xilinx allocator has a + // tendency to + // fail after many allocate/fail cycle. instead we keep a pointer to the + // dram in a CSR in the accelerator, and if we reload the bitstream, we copy + // the pointer back in. + + if (FREE_DRAM) { + bsg_pr_info("ps.cpp: freeing DRAM buffer\n"); + zpl->free_dram((void *)buf); + zpl->shell_write(GP0_WR_CSR_DRAM_INITED, 0x0, mask2); } - } - - // Set bsg client1 to 1 (assert WD reset) - btb->set_client(wd_reset_client, 0x1); - bsg_pr_info("ps.cpp: stopping watchdog\n"); - // We need some additional toggles for data to propagate through - btb->idle(50); - zpl->stop(); - - unsigned long long minstret_stop = get_counter_64(zpl, GP0_RD_MINSTRET); - unsigned long long mtime_stop = get_counter_64(zpl, GP1_CSR_BASE_ADDR + 0x30bff8); - // test delay for reading counter - unsigned long long counter_data = get_counter_64(zpl, GP0_RD_MINSTRET); - clock_gettime(CLOCK_MONOTONIC, &end); - setlocale(LC_NUMERIC, ""); - bsg_pr_info("ps.cpp: end polling i/o\n"); - bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", - minstret_start, minstret_start); - bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", - minstret_stop, minstret_stop); - unsigned long long minstret_delta = minstret_stop - minstret_start; - bsg_pr_info("ps.cpp: minstret delta: %'16llu (%16llx)\n", - minstret_delta, minstret_delta); - bsg_pr_info("ps.cpp: MTIME start: %'16llu (%16llx)\n", - mtime_start, mtime_start); - bsg_pr_info("ps.cpp: MTIME stop: %'16llu (%16llx)\n", - mtime_stop, mtime_stop); - unsigned long long mtime_delta = mtime_stop - mtime_start; - bsg_pr_info("ps.cpp: MTIME delta (=1/8 BP cycles): %'16llu (%16llx)\n", - mtime_delta, mtime_delta); - bsg_pr_info("ps.cpp: IPC : %'16f\n", - ((double)minstret_delta) / ((double)(mtime_delta)) / 8.0); - bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", - counter_data, counter_data); - unsigned long long diff_ns = - 1000LL * 1000LL * 1000LL * - ((unsigned long long)(end.tv_sec - start.tv_sec)) + - (end.tv_nsec - start.tv_nsec); - bsg_pr_info("ps.cpp: wall clock time : %'16llu (%16llx) ns\n", - diff_ns, diff_ns); - bsg_pr_info( - "ps.cpp: sim/emul speed : %'16.2f BP cycles per minute\n", - mtime_delta * 8 / - ((double)(diff_ns) / (60.0 * 1000.0 * 1000.0 * 1000.0))); - - bsg_pr_info("ps.cpp: BP DRAM USAGE MASK (each bit is 8 MB): " - "%-8.8d%-8.8d%-8.8d%-8.8d\n", - zpl->shell_read(GP0_RD_MEM_PROF_3), - zpl->shell_read(GP0_RD_MEM_PROF_2), - zpl->shell_read(GP0_RD_MEM_PROF_1), - zpl->shell_read(GP0_RD_MEM_PROF_0)); - // in general we do not want to free the dram; the Xilinx allocator has a - // tendency to - // fail after many allocate/fail cycle. instead we keep a pointer to the dram - // in a CSR - // in the accelerator, and if we reload the bitstream, we copy the pointer - // back in. - - if (FREE_DRAM) { - bsg_pr_info("ps.cpp: freeing DRAM buffer\n"); - zpl->free_dram((void *)buf); - zpl->shell_write(GP0_WR_CSR_DRAM_INITED, 0x0, mask2); - } - - zpl->done(); - delete zpl; - return 0; + + zpl->done(); + delete zpl; + return 0; } std::uint32_t rotl(std::uint32_t v, std::int32_t shift) { - std::int32_t s = shift>=0? shift%32 : -((-shift)%32); - return (v<>(32-s)); + std::int32_t s = shift >= 0 ? shift % 32 : -((-shift) % 32); + return (v << s) | (v >> (32 - s)); } void nbf_load(bsg_zynq_pl *zpl, char *nbf_filename) { - string nbf_command; - string tmp; - string delimiter = "_"; - - long long int nbf[3]; - int pos = 0; - long unsigned int base_addr; - int data; - ifstream nbf_file(nbf_filename); - - if (!nbf_file.is_open()) { - bsg_pr_err("ps.cpp: error opening nbf file.\n"); - delete zpl; - return; - } - - int line_count = 0; - while (getline(nbf_file, nbf_command)) { - line_count++; - int i = 0; - while ((pos = nbf_command.find(delimiter)) != std::string::npos) { - tmp = nbf_command.substr(0, pos); - nbf[i] = std::stoull(tmp, nullptr, 16); - nbf_command.erase(0, pos + 1); - i++; - } - nbf[i] = std::stoull(nbf_command, nullptr, 16); - - if (nbf[0] == 0x3 || nbf[0] == 0x2 || nbf[0] == 0x1 || nbf[0] == 0x0) { - // we map BP physical addresses for DRAM (0x8000_0000 - 0x9FFF_FFFF) (256MB) - // to the same ARM physical addresses - // see top_zynq.v for more details - - // we map BP physical address for CSRs etc (0x0000_0000 - 0x0FFF_FFFF) - // to ARM address to 0xA0000_0000 - 0xAFFF_FFFF (256MB) - if (nbf[1] >= DRAM_BASE_ADDR) - base_addr = gp1_addr_base - DRAM_BASE_ADDR; - else - base_addr = GP1_CSR_BASE_ADDR; - - if (nbf[0] == 0x3) { - zpl->shell_write(base_addr + nbf[1], nbf[2], 0xf); - zpl->shell_write(base_addr + nbf[1] + 4, nbf[2] >> 32, 0xf); - } - else if (nbf[0] == 0x2) { - zpl->shell_write(base_addr + nbf[1], nbf[2], 0xf); - } - else if (nbf[0] == 0x1) { - int offset = nbf[1] % 4; - int shift = 2 * offset; - data = zpl->shell_read(base_addr + nbf[1] - offset); - data = data & rotl((uint32_t)0xffff0000,shift) + nbf[2] & ((uint32_t)0x0000ffff << shift); - zpl->shell_write(base_addr + nbf[1] - offset, data, 0xf); - } - else { - int offset = nbf[1] % 4; - int shift = 2 * offset; - data = zpl->shell_read(base_addr + nbf[1] - offset); - data = data & rotl((uint32_t)0xffffff00,shift) + nbf[2] & ((uint32_t)0x000000ff << shift); - zpl->shell_write(base_addr + nbf[1] - offset, data, 0xf); - } + string nbf_command; + string tmp; + string delimiter = "_"; + + long long int nbf[3]; + int pos = 0; + long unsigned int base_addr; + int data; + ifstream nbf_file(nbf_filename); + + if (!nbf_file.is_open()) { + bsg_pr_err("ps.cpp: error opening nbf file.\n"); + delete zpl; + return; } - else if (nbf[0] == 0xfe) { - continue; - } else if (nbf[0] == 0xff) { - bsg_pr_dbg_ps("ps.cpp: nbf finish command, line %d\n", line_count); - continue; - } else { - bsg_pr_dbg_ps("ps.cpp: unrecognized nbf command, line %d : %llx\n", - line_count, nbf[0]); - return; + + int line_count = 0; + while (getline(nbf_file, nbf_command)) { + line_count++; + int i = 0; + while ((pos = nbf_command.find(delimiter)) != std::string::npos) { + tmp = nbf_command.substr(0, pos); + nbf[i] = std::stoull(tmp, nullptr, 16); + nbf_command.erase(0, pos + 1); + i++; + } + nbf[i] = std::stoull(nbf_command, nullptr, 16); + + if (nbf[0] == 0x3 || nbf[0] == 0x2 || nbf[0] == 0x1 || nbf[0] == 0x0) { + // we map BP physical addresses for DRAM (0x8000_0000 - 0x9FFF_FFFF) + // (256MB) + // to the same ARM physical addresses + // see top_zynq.v for more details + + // we map BP physical address for CSRs etc (0x0000_0000 - + // 0x0FFF_FFFF) to ARM address to 0xA0000_0000 - 0xAFFF_FFFF + // (256MB) + if (nbf[1] >= DRAM_BASE_ADDR) + base_addr = gp1_addr_base - DRAM_BASE_ADDR; + else + base_addr = GP1_CSR_BASE_ADDR; + + if (nbf[0] == 0x3) { + zpl->shell_write(base_addr + nbf[1], nbf[2], 0xf); + zpl->shell_write(base_addr + nbf[1] + 4, nbf[2] >> 32, 0xf); + } else if (nbf[0] == 0x2) { + zpl->shell_write(base_addr + nbf[1], nbf[2], 0xf); + } else if (nbf[0] == 0x1) { + int offset = nbf[1] % 4; + int shift = 2 * offset; + data = zpl->shell_read(base_addr + nbf[1] - offset); + data = data & rotl((uint32_t)0xffff0000, shift) + nbf[2] & + ((uint32_t)0x0000ffff << shift); + zpl->shell_write(base_addr + nbf[1] - offset, data, 0xf); + } else { + int offset = nbf[1] % 4; + int shift = 2 * offset; + data = zpl->shell_read(base_addr + nbf[1] - offset); + data = data & rotl((uint32_t)0xffffff00, shift) + nbf[2] & + ((uint32_t)0x000000ff << shift); + zpl->shell_write(base_addr + nbf[1] - offset, data, 0xf); + } + } else if (nbf[0] == 0xfe) { + continue; + } else if (nbf[0] == 0xff) { + bsg_pr_dbg_ps("ps.cpp: nbf finish command, line %d\n", line_count); + continue; + } else { + bsg_pr_dbg_ps("ps.cpp: unrecognized nbf command, line %d : %llx\n", + line_count, nbf[0]); + return; + } } - } - bsg_pr_dbg_ps("ps.cpp: finished loading %d lines of nbf.\n", line_count); + bsg_pr_dbg_ps("ps.cpp: finished loading %d lines of nbf.\n", line_count); } bool decode_bp_output(bsg_zynq_pl *zpl, long data) { - long rd_wr = data >> 31; - long address = (data >> 8) & 0x7FFFFC; - char print_data = data & 0xFF; - char core = (address-0x102000) >> 3; - // write from BP - if (rd_wr) { - if (address == 0x101000) { - printf("%c", print_data); - fflush(stdout); - } else if (address >= 0x102000 && address < 0x103000) { - done_vec[core] = true; - if (print_data == 0) { - bsg_pr_info("CORE[%d] PASS\n", core); - } else { - bsg_pr_info("CORE[%d] FAIL\n", core); - } - } else if (address == 0x103000) { - bsg_pr_dbg_ps("ps.cpp: Watchdog tick\n"); - } else { - bsg_pr_err("ps.cpp: Errant write to %lx\n", address); - return false; + long rd_wr = data >> 31; + long address = (data >> 8) & 0x7FFFFC; + char print_data = data & 0xFF; + char core = (address - 0x102000) >> 3; + // write from BP + if (rd_wr) { + if (address == 0x101000) { + printf("%c", print_data); + fflush(stdout); + } else if (address >= 0x102000 && address < 0x103000) { + done_vec[core] = true; + if (print_data == 0) { + bsg_pr_info("CORE[%d] PASS\n", core); + } else { + bsg_pr_info("CORE[%d] FAIL\n", core); + } + } else if (address == 0x103000) { + bsg_pr_dbg_ps("ps.cpp: Watchdog tick\n"); + } else { + bsg_pr_err("ps.cpp: Errant write to %lx\n", address); + return false; + } } - } - // read from BP - else { - // getchar - if (address == 0x100000) { - if (getchar_queue.empty()) { - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, -1, 0xf); - } else { - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, getchar_queue.front(), 0xf); - getchar_queue.pop(); - } + // read from BP + else { + // getchar + if (address == 0x100000) { + if (getchar_queue.empty()) { + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, -1, 0xf); + } else { + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, getchar_queue.front(), + 0xf); + getchar_queue.pop(); + } + } + // parameter ROM, only partially implemented + else if (address >= 0x120000 && address <= 0x120128) { + bsg_pr_dbg_ps("ps.cpp: PARAM ROM read from (%lx)\n", address); + int offset = address - 0x120000; + // CC_X_DIM, return number of cores + if (offset == 0x0) { + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, BP_NCPUS, 0xf); + } + // CC_Y_DIM, just return 1 so X*Y == number of cores + else if (offset == 0x4) { + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, 1, 0xf); + } + } else if (address >= 0x110000 && address < 0x111000) { + int bootrom_addr = (address >> 2) & 0xfff; + zpl->shell_write(GP0_WR_CSR_BOOTROM_ADDR, bootrom_addr, 0xf); + int bootrom_data = zpl->shell_read(GP0_RD_BOOTROM_DATA); + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, bootrom_data, 0xf); + // if not implemented, print error + } else { + bsg_pr_err("ps.cpp: Errant read from (%lx)\n", address); + return false; + } } - // parameter ROM, only partially implemented - else if (address >= 0x120000 && address <= 0x120128) { - bsg_pr_dbg_ps("ps.cpp: PARAM ROM read from (%lx)\n", address); - int offset = address - 0x120000; - // CC_X_DIM, return number of cores - if (offset == 0x0) { - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, BP_NCPUS, 0xf); - } - // CC_Y_DIM, just return 1 so X*Y == number of cores - else if (offset == 0x4) { - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, 1, 0xf); - } - } else if (address >= 0x110000 && address < 0x111000) { - int bootrom_addr = (address >> 2) & 0xfff; - zpl->shell_write(GP0_WR_CSR_BOOTROM_ADDR, bootrom_addr, 0xf); - int bootrom_data = zpl->shell_read(GP0_RD_BOOTROM_DATA); - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, bootrom_data, 0xf); - // if not implemented, print error - } else { - bsg_pr_err("ps.cpp: Errant read from (%lx)\n", address); - return false; - } - } - return true; + return true; } - diff --git a/cosim/black-parrot-example/vivado/.gitignore b/cosim/black-parrot-example/v/.gitignore similarity index 100% rename from cosim/black-parrot-example/vivado/.gitignore rename to cosim/black-parrot-example/v/.gitignore diff --git a/cosim/black-parrot-example/vivado/Makefile b/cosim/black-parrot-example/vivado/Makefile index 02fba0c9..fba5d721 100644 --- a/cosim/black-parrot-example/vivado/Makefile +++ b/cosim/black-parrot-example/vivado/Makefile @@ -31,9 +31,9 @@ $(FLIST): $(BP_FLIST) $(BASE_FLIST) sed -i "/bsg_icg_pos.sv/d" $@ sed -i "/test_bsg_clock_params.sv/d" $@ # Fixing recursive instantiation bug in vivado 2022.2 - sed "s/\`BSG_INV_PARAM(width_p)/width_p = 1/" $(BASEJUMP_STL_DIR)/bsg_misc/bsg_popcount.sv > bsg_popcount.sv + sed "s/\`BSG_INV_PARAM(width_p)/width_p = 1/" $(BASEJUMP_STL_DIR)/bsg_misc/bsg_popcount.sv > $(CURR_VSRC_DIR)/bsg_popcount.sv sed -i "/bsg_popcount.sv/d" $@ - echo bsg_popcount.sv >> $@ + echo $(CURR_VSRC_DIR)/bsg_popcount.sv >> $@ include $(TOP)/cosim/mk/Makefile.vivado diff --git a/cosim/black-parrot-minimal-example/ps.cpp b/cosim/black-parrot-minimal-example/ps.cpp index d5465987..7d61a282 100644 --- a/cosim/black-parrot-minimal-example/ps.cpp +++ b/cosim/black-parrot-minimal-example/ps.cpp @@ -4,21 +4,21 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include +#include #include #include -#include #include +#include +#include +#include #include -#include #include "ps.hpp" #include "bp_bedrock_packet.h" -#include "bsg_zynq_pl.h" -#include "bsg_printing.h" #include "bsg_argparse.h" +#include "bsg_printing.h" +#include "bsg_zynq_pl.h" #ifndef FREE_DRAM #define FREE_DRAM 0 @@ -50,416 +50,426 @@ std::queue getchar_queue; std::bitset done_vec; inline void send_bp_fwd_packet(bsg_zynq_pl *zpl, bp_bedrock_packet *packet) { - int axil_len = sizeof(bp_bedrock_packet) / 4; - - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_PS2PL_FIFO_CTRS)); - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, pkt_data[i], 0xf); - } + int axil_len = sizeof(bp_bedrock_packet) / 4; + + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_PS2PL_FIFO_CTRS)) + ; + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA, pkt_data[i], 0xf); + } } inline void recv_bp_rev_packet(bsg_zynq_pl *zpl, bp_bedrock_packet *packet) { - int axil_len = sizeof(bp_bedrock_packet) / 4; + int axil_len = sizeof(bp_bedrock_packet) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS)); - pkt_data[i] = zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS)) + ; + pkt_data[i] = zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA); + } } inline void recv_bp_fwd_packet(bsg_zynq_pl *zpl, bp_bedrock_packet *packet) { - int axil_len = sizeof(bp_bedrock_packet) / 4; + int axil_len = sizeof(bp_bedrock_packet) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS+4)); - pkt_data[i] = zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA+4); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_PL2PS_FIFO_CTRS + 4)) + ; + pkt_data[i] = zpl->shell_read(GP0_RD_PL2PS_FIFO_DATA + 4); + } } inline void send_bp_rev_packet(bsg_zynq_pl *zpl, bp_bedrock_packet *packet) { - int axil_len = sizeof(bp_bedrock_packet) / 4; + int axil_len = sizeof(bp_bedrock_packet) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_PS2PL_FIFO_CTRS+4)); - zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA+4, pkt_data[i], 0xf); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_PS2PL_FIFO_CTRS + 4)) + ; + zpl->shell_write(GP0_WR_PS2PL_FIFO_DATA + 4, pkt_data[i], 0xf); + } } int j = 0; -inline void send_bp_write(bsg_zynq_pl *zpl, uint64_t addr, int64_t data, uint8_t wmask) { - bp_bedrock_packet fwd_packet; - bp_bedrock_mem_payload payload; - - payload.did = 0xfff; - - fwd_packet.msg_type = BEDROCK_MEM_WR; - fwd_packet.subop = BEDROCK_STORE; - fwd_packet.addr0 = (addr >> 0 ) & 0xffffffff; - fwd_packet.addr1 = (addr >> 32) & 0xffffffff; - fwd_packet.size = (wmask == 0xff) ? 3 : 2; // Only support 32/64 currently - fwd_packet.payload = payload; - fwd_packet.data0 = (data >> 0 ) & 0xffffffff; - fwd_packet.data1 = (data >> 32) & 0xffffffff; - - send_bp_fwd_packet(zpl, &fwd_packet); +inline void send_bp_write(bsg_zynq_pl *zpl, uint64_t addr, int64_t data, + uint8_t wmask) { + bp_bedrock_packet fwd_packet; + bp_bedrock_mem_payload payload; + + payload.did = 0xfff; + + fwd_packet.msg_type = BEDROCK_MEM_WR; + fwd_packet.subop = BEDROCK_STORE; + fwd_packet.addr0 = (addr >> 0) & 0xffffffff; + fwd_packet.addr1 = (addr >> 32) & 0xffffffff; + fwd_packet.size = (wmask == 0xff) ? 3 : 2; // Only support 32/64 currently + fwd_packet.payload = payload; + fwd_packet.data0 = (data >> 0) & 0xffffffff; + fwd_packet.data1 = (data >> 32) & 0xffffffff; + + send_bp_fwd_packet(zpl, &fwd_packet); } inline int64_t send_bp_read(bsg_zynq_pl *zpl, uint64_t addr) { - bp_bedrock_packet fwd_packet; - bp_bedrock_mem_payload payload; + bp_bedrock_packet fwd_packet; + bp_bedrock_mem_payload payload; - payload.did = 0xfff; + payload.did = 0xfff; - fwd_packet.msg_type = BEDROCK_MEM_RD; - fwd_packet.subop = BEDROCK_STORE; - fwd_packet.addr0 = (addr >> 0 ) & 0xffffffff; - fwd_packet.addr1 = (addr >> 32) & 0xffffffff; - fwd_packet.size = 3; // Only support 64b currently - fwd_packet.payload = payload; + fwd_packet.msg_type = BEDROCK_MEM_RD; + fwd_packet.subop = BEDROCK_STORE; + fwd_packet.addr0 = (addr >> 0) & 0xffffffff; + fwd_packet.addr1 = (addr >> 32) & 0xffffffff; + fwd_packet.size = 3; // Only support 64b currently + fwd_packet.payload = payload; - send_bp_fwd_packet(zpl, &fwd_packet); + send_bp_fwd_packet(zpl, &fwd_packet); - bp_bedrock_packet rev_packet; - recv_bp_rev_packet(zpl, &rev_packet); + bp_bedrock_packet rev_packet; + recv_bp_rev_packet(zpl, &rev_packet); - int64_t return_data = 0; - return_data |= (rev_packet.data0 << 0); - return_data |= (rev_packet.data0 << 32); + int64_t return_data = 0; + return_data |= (rev_packet.data0 << 0); + return_data |= (rev_packet.data0 << 32); - return return_data; + return return_data; } void *monitor(void *vargp) { - char c; - while(1) { - c = getchar(); - if(c != -1) - getchar_queue.push(c); - } - bsg_pr_info("Exiting from pthread\n"); - - return NULL; + char c; + while (1) { + c = getchar(); + if (c != -1) + getchar_queue.push(c); + } + bsg_pr_info("Exiting from pthread\n"); + + return NULL; } -inline uint64_t get_counter_64(bsg_zynq_pl *zpl, uint64_t addr, bool bp_not_shell) { - uint64_t val, val_hi, val_lo, val_hi2; - if (bp_not_shell) { - return (uint64_t) send_bp_read(zpl, addr + 4); - } - - do { - val_hi = zpl->shell_read(addr + 4); - val_lo = zpl->shell_read(addr + 0); - val_hi2 = zpl->shell_read(addr + 4); - if (val_hi == val_hi2) { - val = val_hi << 32; - val += val_lo; - return val; - } else { - bsg_pr_err("ps.cpp: timer wrapover!\n"); +inline uint64_t get_counter_64(bsg_zynq_pl *zpl, uint64_t addr, + bool bp_not_shell) { + uint64_t val, val_hi, val_lo, val_hi2; + if (bp_not_shell) { + return (uint64_t)send_bp_read(zpl, addr + 4); } - } while (1); + + do { + val_hi = zpl->shell_read(addr + 4); + val_lo = zpl->shell_read(addr + 0); + val_hi2 = zpl->shell_read(addr + 4); + if (val_hi == val_hi2) { + val = val_hi << 32; + val += val_lo; + return val; + } else { + bsg_pr_err("ps.cpp: timer wrapover!\n"); + } + } while (1); } int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - long data; - long val1 = 0x1; - long val2 = 0x0; - long mask1 = 0xf; - long mask2 = 0xf; + long data; + long val1 = 0x1; + long val2 = 0x0; + long mask1 = 0xf; + long mask2 = 0xf; - pthread_t thread_id; - long allocated_dram = DRAM_ALLOCATE_SIZE; + pthread_t thread_id; + long allocated_dram = DRAM_ALLOCATE_SIZE; - int32_t val; - bsg_pr_info("ps.cpp: reading three base registers\n"); - bsg_pr_info("ps.cpp: reset(lo)=%d, dram_init=%d, dram_base=%d\n", - zpl->shell_read(GP0_RD_CSR_SYS_RESETN), - zpl->shell_read(GP0_RD_CSR_DRAM_INITED), - val = zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); + int32_t val; + bsg_pr_info("ps.cpp: reading three base registers\n"); + bsg_pr_info("ps.cpp: reset(lo)=%d, dram_init=%d, dram_base=%d\n", + zpl->shell_read(GP0_RD_CSR_SYS_RESETN), + zpl->shell_read(GP0_RD_CSR_DRAM_INITED), + val = zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); - bsg_pr_info("ps.cpp: attempting to write and read register 0x8\n"); + bsg_pr_info("ps.cpp: attempting to write and read register 0x8\n"); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, mask1); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, mask1); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (val))); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, mask1); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, mask1); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (val))); - bsg_pr_info("ps.cpp: successfully wrote and read registers in bsg_zynq_shell " - "(verified ARM GP0 connection)\n"); - - // Freeze processor - zpl->shell_write(GP0_WR_CSR_FREEZEN, 0x1, 0xF); - - // Deassert the active-low system reset as we finish initializing the whole system - zpl->shell_write(GP0_RD_CSR_SYS_RESETN, 0x1, 0xF); - - // Put processor into debug mode - zpl->shell_write(GP0_WR_CSR_DEBUG_IRQ, 0x1, 0xF); - for (int i = 0; i < 10; i++) zpl->tick(); - zpl->shell_write(GP0_WR_CSR_DEBUG_IRQ, 0x0, 0xF); - - unsigned long phys_ptr; - volatile int32_t *buf; - data = zpl->shell_read(GP0_RD_CSR_DRAM_INITED); - if (data == 0) { bsg_pr_info( - "ps.cpp: CSRs do not contain a DRAM base pointer; calling allocate " - "dram with size %ld\n", - allocated_dram); - buf = (volatile int32_t *)zpl->allocate_dram(allocated_dram, &phys_ptr); - bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, mask1); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)(phys_ptr))); - bsg_pr_info("ps.cpp: wrote and verified base register\n"); - zpl->shell_write(GP0_RD_CSR_DRAM_INITED, 1, mask1); - assert(zpl->shell_read(GP0_RD_CSR_DRAM_INITED) == 1); - } else - bsg_pr_info("ps.cpp: reusing dram base pointer %x\n", - zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); - - int outer = 1024 / 4; - - if (argc == 1) { - bsg_pr_warn( - "No nbf file specified, sleeping for 2^31 seconds (this will hold " - "onto allocated DRAM)\n"); - sleep(1U << 31); - delete zpl; - return -1; - } + "ps.cpp: successfully wrote and read registers in bsg_zynq_shell " + "(verified ARM GP0 connection)\n"); + + // Freeze processor + zpl->shell_write(GP0_WR_CSR_FREEZEN, 0x1, 0xF); + + // Deassert the active-low system reset as we finish initializing the whole + // system + zpl->shell_write(GP0_RD_CSR_SYS_RESETN, 0x1, 0xF); + + // Put processor into debug mode + zpl->shell_write(GP0_WR_CSR_DEBUG_IRQ, 0x1, 0xF); + for (int i = 0; i < 10; i++) + zpl->tick(); + zpl->shell_write(GP0_WR_CSR_DEBUG_IRQ, 0x0, 0xF); + + unsigned long phys_ptr; + volatile int32_t *buf; + data = zpl->shell_read(GP0_RD_CSR_DRAM_INITED); + if (data == 0) { + bsg_pr_info( + "ps.cpp: CSRs do not contain a DRAM base pointer; calling allocate " + "dram with size %ld\n", + allocated_dram); + buf = (volatile int32_t *)zpl->allocate_dram(allocated_dram, &phys_ptr); + bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, mask1); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)(phys_ptr))); + bsg_pr_info("ps.cpp: wrote and verified base register\n"); + zpl->shell_write(GP0_RD_CSR_DRAM_INITED, 1, mask1); + assert(zpl->shell_read(GP0_RD_CSR_DRAM_INITED) == 1); + } else + bsg_pr_info("ps.cpp: reusing dram base pointer %x\n", + zpl->shell_read(GP0_RD_CSR_DRAM_BASE)); + + int outer = 1024 / 4; + + if (argc == 1) { + bsg_pr_warn( + "No nbf file specified, sleeping for 2^31 seconds (this will hold " + "onto allocated DRAM)\n"); + sleep(1U << 31); + delete zpl; + return -1; + } #ifdef ZYNQ - // Must zero DRAM for FPGA Linux boot, because opensbi payload mode - // obliterates the section names of the payload (Linux) - if (ZERO_DRAM) { - bsg_pr_info("ps.cpp: Zero-ing DRAM (%d bytes)\n", DRAM_ALLOCATE_SIZE); - for (int i = 0; i < DRAM_ALLOCATE_SIZE; i+=4) { - if (i % (1024*1024) == 0) bsg_pr_info("ps.cpp: zero-d %d MB\n", i/(1024*1024)); - send_bp_write(zpl, gp1_addr_base + i, 0x0, mask1); + // Must zero DRAM for FPGA Linux boot, because opensbi payload mode + // obliterates the section names of the payload (Linux) + if (ZERO_DRAM) { + bsg_pr_info("ps.cpp: Zero-ing DRAM (%d bytes)\n", DRAM_ALLOCATE_SIZE); + for (int i = 0; i < DRAM_ALLOCATE_SIZE; i += 4) { + if (i % (1024 * 1024) == 0) + bsg_pr_info("ps.cpp: zero-d %d MB\n", i / (1024 * 1024)); + send_bp_write(zpl, gp1_addr_base + i, 0x0, mask1); + } } - } #endif #ifdef DRAM_TEST - long num_times = allocated_dram / 32768; - bsg_pr_info( - "ps.cpp: attempting to write L2 %ld times over %ld MB (testing ARM GP1 " - "and HP0 connections)\n", - num_times * outer, (allocated_dram) >> 20); - send_bp_write(zpl, DRAM_BASE_ADDR, 0x12345678, mask1); - - for (int s = 0; s < outer; s++) - for (int t = 0; t < num_times; t++) { - send_bp_write(zpl, DRAM_BASE_ADDR + 32768 * t + s * 4, 0x1ADACACA + t + s, - mask1); - } - bsg_pr_info("ps.cpp: finished write L2 %ld times over %ld MB\n", - num_times * outer, (allocated_dram) >> 20); - - int mismatches = 0; - int matches = 0; + long num_times = allocated_dram / 32768; + bsg_pr_info( + "ps.cpp: attempting to write L2 %ld times over %ld MB (testing ARM GP1 " + "and HP0 connections)\n", + num_times * outer, (allocated_dram) >> 20); + send_bp_write(zpl, DRAM_BASE_ADDR, 0x12345678, mask1); + + for (int s = 0; s < outer; s++) + for (int t = 0; t < num_times; t++) { + send_bp_write(zpl, DRAM_BASE_ADDR + 32768 * t + s * 4, + 0x1ADACACA + t + s, mask1); + } + bsg_pr_info("ps.cpp: finished write L2 %ld times over %ld MB\n", + num_times * outer, (allocated_dram) >> 20); + + int mismatches = 0; + int matches = 0; #ifdef ZYNQ - for (int s = 0; s < outer; s++) - for (int t = 0; t < num_times; t++) - if (buf[(32768 * t + s * 4) / 4] == 0x1ADACACA + t + s) - matches++; - else - mismatches++; - - bsg_pr_info("ps.cpp: DIRECT access from ARM to DDR (some L1/L2 coherence " - "mismatches expected) %d matches, %d mismatches, %f\n", - matches, mismatches, - ((float)matches) / (float)(mismatches + matches)); + for (int s = 0; s < outer; s++) + for (int t = 0; t < num_times; t++) + if (buf[(32768 * t + s * 4) / 4] == 0x1ADACACA + t + s) + matches++; + else + mismatches++; + + bsg_pr_info("ps.cpp: DIRECT access from ARM to DDR (some L1/L2 coherence " + "mismatches expected) %d matches, %d mismatches, %f\n", + matches, mismatches, + ((float)matches) / (float)(mismatches + matches)); #endif - bsg_pr_info( - "ps.cpp: attempting to read L2 %ld times over %ld MB (testing ARM GP1 " - "and HP0 connections)\n", - num_times * outer, (allocated_dram) >> 20); - for (int s = 0; s < outer; s++) - for (int t = 0; t < num_times; t++) - if (zpl->shell_read(DRAM_BASE_ADDR + 32768 * t + s * 4) == 0x1ADACACA + t + s) - matches++; - else - mismatches++; - - bsg_pr_info("ps.cpp: READ access through BP (some L1 coherence mismatch " - "expected): %d matches, %d mismatches, %f\n", - matches, mismatches, - ((float)matches) / (float)(mismatches + matches)); + bsg_pr_info( + "ps.cpp: attempting to read L2 %ld times over %ld MB (testing ARM GP1 " + "and HP0 connections)\n", + num_times * outer, (allocated_dram) >> 20); + for (int s = 0; s < outer; s++) + for (int t = 0; t < num_times; t++) + if (zpl->shell_read(DRAM_BASE_ADDR + 32768 * t + s * 4) == + 0x1ADACACA + t + s) + matches++; + else + mismatches++; + + bsg_pr_info("ps.cpp: READ access through BP (some L1 coherence mismatch " + "expected): %d matches, %d mismatches, %f\n", + matches, mismatches, + ((float)matches) / (float)(mismatches + matches)); #endif // DRAM_TEST - bsg_pr_info("ps.cpp: beginning nbf load\n"); - nbf_load(zpl, argv[1]); - struct timespec start, end; - clock_gettime(CLOCK_MONOTONIC, &start); - unsigned long long minstret_start = get_counter_64(zpl, GP0_RD_MINSTRET, 0); - bsg_pr_dbg_ps("ps.cpp: finished nbf load\n"); - - bsg_pr_info("ps.cpp: Starting scan thread\n"); - pthread_create(&thread_id, NULL, monitor, NULL); - - // Freeze processor - zpl->shell_write(GP0_WR_CSR_FREEZEN, 0x0, 0xF); - - bsg_pr_info("ps.cpp: Starting i/o polling thread\n"); - int axil_len = sizeof(bp_bedrock_packet) / 4; - while (1) { - decode_bp_output(zpl); - // break loop when all cores done - if (done_vec.all()) { - break; + bsg_pr_info("ps.cpp: beginning nbf load\n"); + nbf_load(zpl, argv[1]); + struct timespec start, end; + clock_gettime(CLOCK_MONOTONIC, &start); + unsigned long long minstret_start = get_counter_64(zpl, GP0_RD_MINSTRET, 0); + bsg_pr_dbg_ps("ps.cpp: finished nbf load\n"); + + bsg_pr_info("ps.cpp: Starting scan thread\n"); + pthread_create(&thread_id, NULL, monitor, NULL); + + // Freeze processor + zpl->shell_write(GP0_WR_CSR_FREEZEN, 0x0, 0xF); + + bsg_pr_info("ps.cpp: Starting i/o polling thread\n"); + int axil_len = sizeof(bp_bedrock_packet) / 4; + while (1) { + decode_bp_output(zpl); + // break loop when all cores done + if (done_vec.all()) { + break; + } } - } - - unsigned long long minstret_stop = get_counter_64(zpl, GP0_RD_MINSTRET, 0); - // test delay for reading counter - unsigned long long counter_data = get_counter_64(zpl, GP0_RD_MINSTRET, 0); - clock_gettime(CLOCK_MONOTONIC, &end); - setlocale(LC_NUMERIC, ""); - bsg_pr_info("ps.cpp: end polling i/o\n"); - bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", - minstret_start, minstret_start); - bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", - minstret_stop, minstret_stop); - unsigned long long minstret_delta = minstret_stop - minstret_start; - bsg_pr_info("ps.cpp: minstret delta: %'16llu (%16llx)\n", - minstret_delta, minstret_delta); - unsigned long long diff_ns = - 1000LL * 1000LL * 1000LL * - ((unsigned long long)(end.tv_sec - start.tv_sec)) + - (end.tv_nsec - start.tv_nsec); - bsg_pr_info("ps.cpp: wall clock time : %'16llu (%16llx) ns\n", - diff_ns, diff_ns); + + unsigned long long minstret_stop = get_counter_64(zpl, GP0_RD_MINSTRET, 0); + // test delay for reading counter + unsigned long long counter_data = get_counter_64(zpl, GP0_RD_MINSTRET, 0); + clock_gettime(CLOCK_MONOTONIC, &end); + setlocale(LC_NUMERIC, ""); + bsg_pr_info("ps.cpp: end polling i/o\n"); + bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", + minstret_start, minstret_start); + bsg_pr_info("ps.cpp: minstret (instructions retired): %'16llu (%16llx)\n", + minstret_stop, minstret_stop); + unsigned long long minstret_delta = minstret_stop - minstret_start; + bsg_pr_info("ps.cpp: minstret delta: %'16llu (%16llx)\n", + minstret_delta, minstret_delta); + unsigned long long diff_ns = + 1000LL * 1000LL * 1000LL * + ((unsigned long long)(end.tv_sec - start.tv_sec)) + + (end.tv_nsec - start.tv_nsec); + bsg_pr_info( + "ps.cpp: wall clock time : %'16llu (%16llx) ns\n", + diff_ns, diff_ns); #ifdef ZYNQ - // in general we do not want to free the dram; the Xilinx allocator has a - // tendency to - // fail after many allocate/fail cycle. instead we keep a pointer to the dram - // in a CSR - // in the accelerator, and if we reload the bitstream, we copy the pointer - // back in.s - - if (FREE_DRAM) { - bsg_pr_info("ps.cpp: freeing DRAM buffer\n"); - zpl->free_dram((void *)buf); - zpl->shell_write(GP0_WR_CSR_DRAM_INITED, 0x0, mask2); - } + // in general we do not want to free the dram; the Xilinx allocator has a + // tendency to + // fail after many allocate/fail cycle. instead we keep a pointer to the + // dram in a CSR in the accelerator, and if we reload the bitstream, we copy + // the pointer back in.s + + if (FREE_DRAM) { + bsg_pr_info("ps.cpp: freeing DRAM buffer\n"); + zpl->free_dram((void *)buf); + zpl->shell_write(GP0_WR_CSR_DRAM_INITED, 0x0, mask2); + } #endif - zpl->done(); - delete zpl; - return -1; + zpl->done(); + delete zpl; + return -1; } std::uint32_t rotl(std::uint32_t v, std::int32_t shift) { - std::int32_t s = shift>=0? shift%32 : -((-shift)%32); - return (v<>(32-s)); + std::int32_t s = shift >= 0 ? shift % 32 : -((-shift) % 32); + return (v << s) | (v >> (32 - s)); } void nbf_load(bsg_zynq_pl *zpl, char *nbf_filename) { - string nbf_command; - string tmp; - string delimiter = "_"; - - long long int nbf[3]; - int pos = 0; - long unsigned int base_addr; - int data; - ifstream nbf_file(nbf_filename); - - if (!nbf_file.is_open()) { - bsg_pr_err("ps.cpp: error opening nbf file.\n"); - return; - } - - int line_count = 0; - int credit_count = 0; - while (getline(nbf_file, nbf_command)) { - line_count++; - int i = 0; - while ((pos = nbf_command.find(delimiter)) != std::string::npos) { - tmp = nbf_command.substr(0, pos); - nbf[i] = std::stoull(tmp, nullptr, 16); - nbf_command.erase(0, pos + 1); - i++; + string nbf_command; + string tmp; + string delimiter = "_"; + + long long int nbf[3]; + int pos = 0; + long unsigned int base_addr; + int data; + ifstream nbf_file(nbf_filename); + + if (!nbf_file.is_open()) { + bsg_pr_err("ps.cpp: error opening nbf file.\n"); + return; } - nbf[i] = std::stoull(nbf_command, nullptr, 16); - - if (nbf[0] == 0x3 || nbf[0] == 0x2 || nbf[0] == 0x1 || nbf[0] == 0x0) { - if (nbf[0] == 0x3) { - send_bp_write(zpl, nbf[1], nbf[2], 0xff); - } - else if (nbf[0] == 0x2) { - send_bp_write(zpl, nbf[1], nbf[2], 0xf); - } - } else if (nbf[0] == 0xfe) { - continue; - } else if (nbf[0] == 0xff) { - bsg_pr_dbg_ps("ps.cpp: nbf finish command, line %d\n", line_count); - continue; - } else { - bsg_pr_dbg_ps("ps.cpp: unrecognized nbf command, line %d : %llx\n", - line_count, nbf[0]); - return; + + int line_count = 0; + int credit_count = 0; + while (getline(nbf_file, nbf_command)) { + line_count++; + int i = 0; + while ((pos = nbf_command.find(delimiter)) != std::string::npos) { + tmp = nbf_command.substr(0, pos); + nbf[i] = std::stoull(tmp, nullptr, 16); + nbf_command.erase(0, pos + 1); + i++; + } + nbf[i] = std::stoull(nbf_command, nullptr, 16); + + if (nbf[0] == 0x3 || nbf[0] == 0x2 || nbf[0] == 0x1 || nbf[0] == 0x0) { + if (nbf[0] == 0x3) { + send_bp_write(zpl, nbf[1], nbf[2], 0xff); + } else if (nbf[0] == 0x2) { + send_bp_write(zpl, nbf[1], nbf[2], 0xf); + } + } else if (nbf[0] == 0xfe) { + continue; + } else if (nbf[0] == 0xff) { + bsg_pr_dbg_ps("ps.cpp: nbf finish command, line %d\n", line_count); + continue; + } else { + bsg_pr_dbg_ps("ps.cpp: unrecognized nbf command, line %d : %llx\n", + line_count, nbf[0]); + return; + } } - } - bsg_pr_dbg_ps("ps.cpp: waiting for credit returns.\n", credit_count); - while (zpl->shell_read(GP0_RD_CREDITS)); + bsg_pr_dbg_ps("ps.cpp: waiting for credit returns.\n", credit_count); + while (zpl->shell_read(GP0_RD_CREDITS)) + ; - bsg_pr_dbg_ps("ps.cpp: finished loading %d lines of nbf.\n", line_count); + bsg_pr_dbg_ps("ps.cpp: finished loading %d lines of nbf.\n", line_count); } bool decode_bp_output(bsg_zynq_pl *zpl) { - bp_bedrock_packet fwd_packet; - recv_bp_fwd_packet(zpl, &fwd_packet); - - uint32_t address = fwd_packet.addr0 & 0xFFFFFFFC; - uint64_t data = 0; - data |= fwd_packet.data0 << 0; - data |= fwd_packet.data1 << 32; - char print_data = data & 0xFF; - char core = (address-0x102000) >> 3; - // write from BP - if (address == 0x101000) { - printf("%c", print_data); - fflush(stdout); - } else if (address >= 0x102000 && address < 0x103000) { - done_vec[core] = true; - if (print_data == 0) { - bsg_pr_info("CORE[%d] PASS\n", core); + bp_bedrock_packet fwd_packet; + recv_bp_fwd_packet(zpl, &fwd_packet); + + uint32_t address = fwd_packet.addr0 & 0xFFFFFFFC; + uint64_t data = 0; + data |= fwd_packet.data0 << 0; + data |= fwd_packet.data1 << 32; + char print_data = data & 0xFF; + char core = (address - 0x102000) >> 3; + // write from BP + if (address == 0x101000) { + printf("%c", print_data); + fflush(stdout); + } else if (address >= 0x102000 && address < 0x103000) { + done_vec[core] = true; + if (print_data == 0) { + bsg_pr_info("CORE[%d] PASS\n", core); + } else { + bsg_pr_info("CORE[%d] FAIL\n", core); + } + } else if (address == 0x103000) { + bsg_pr_dbg_ps("ps.cpp: Watchdog tick\n"); + } else if (address >= 0x110000 && address < 0x111000) { + int bootrom_addr0 = (address >> 2) & 0x1ff; + zpl->shell_write(GP0_WR_CSR_BOOTROM_ADDR, bootrom_addr0, 0xf); + int bootrom_data0 = zpl->shell_read(GP0_RD_BOOTROM_DATA); + int bootrom_addr1 = bootrom_addr0 + 1; + zpl->shell_write(GP0_WR_CSR_BOOTROM_ADDR, bootrom_addr1, 0xf); + int bootrom_data1 = zpl->shell_read(GP0_RD_BOOTROM_DATA); + + bp_bedrock_packet rev_packet = fwd_packet; + rev_packet.data0 = bootrom_data0; + rev_packet.data1 = bootrom_data1; + send_bp_rev_packet(zpl, &rev_packet); } else { - bsg_pr_info("CORE[%d] FAIL\n", core); + bsg_pr_err("ps.cpp: Errant write to %lx\n", address); + return false; } - } else if (address == 0x103000) { - bsg_pr_dbg_ps("ps.cpp: Watchdog tick\n"); - } else if (address >= 0x110000 && address < 0x111000) { - int bootrom_addr0 = (address >> 2) & 0x1ff; - zpl->shell_write(GP0_WR_CSR_BOOTROM_ADDR, bootrom_addr0, 0xf); - int bootrom_data0 = zpl->shell_read(GP0_RD_BOOTROM_DATA); - int bootrom_addr1 = bootrom_addr0 + 1; - zpl->shell_write(GP0_WR_CSR_BOOTROM_ADDR, bootrom_addr1, 0xf); - int bootrom_data1 = zpl->shell_read(GP0_RD_BOOTROM_DATA); - - bp_bedrock_packet rev_packet = fwd_packet; - rev_packet.data0 = bootrom_data0; - rev_packet.data1 = bootrom_data1; - send_bp_rev_packet(zpl, &rev_packet); - } else { - bsg_pr_err("ps.cpp: Errant write to %lx\n", address); - return false; - } - - return true; -} + return true; +} diff --git a/cosim/black-parrot-minimal-example/v/.gitignore b/cosim/black-parrot-minimal-example/v/.gitignore new file mode 100644 index 00000000..a882545c --- /dev/null +++ b/cosim/black-parrot-minimal-example/v/.gitignore @@ -0,0 +1 @@ +bsg_popcount.sv diff --git a/cosim/black-parrot-minimal-example/vivado/Makefile b/cosim/black-parrot-minimal-example/vivado/Makefile index f9bb94a1..028d6537 100644 --- a/cosim/black-parrot-minimal-example/vivado/Makefile +++ b/cosim/black-parrot-minimal-example/vivado/Makefile @@ -32,9 +32,9 @@ $(FLIST): $(BP_FLIST) $(BASE_FLIST) sed -i "/bsg_icg_pos.sv/d" $@ sed -i "/test_bsg_clock_params.sv/d" $@ # Fixing recursive instantiation bug in vivado 2022.2 - sed "s/BSG_INV_PARAM(width_p)/width_p = 1/" $(BASEJUMP_STL_DIR)/bsg_misc/bsg_popcount.sv >> bsg_popcount.sv + sed "s/BSG_INV_PARAM(width_p)/width_p = 1/" $(BASEJUMP_STL_DIR)/bsg_misc/bsg_popcount.sv >> $(DESIGN_VSRC_DIR)/bsg_popcount.sv sed -i "/bsg_popcount.sv/d" $@ - echo bsg_popcount.sv >> $@ + echo $(DESIGN_VSRC_DIR)/bsg_popcount.sv >> $@ include $(TOP)/cosim/mk/Makefile.vivado diff --git a/cosim/double-shell-example/ps.cpp b/cosim/double-shell-example/ps.cpp index 3414958c..d9caad42 100644 --- a/cosim/double-shell-example/ps.cpp +++ b/cosim/double-shell-example/ps.cpp @@ -4,161 +4,161 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include -#include "bsg_zynq_pl.h" #include "bsg_printing.h" +#include "bsg_zynq_pl.h" +#include +#include int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - - // the read memory map is essentially - // - // 0,4: registers - // 8, : pl_to_ps fifo data - // C, : pl_to_ps fifo counts - // 10: ps_to_pl fifo counts - - // the write memory map is essentially - // - // 0,4: registers - // 8: ps_to_pl data - - int val1 = 0xDEADBEEF; - int val2 = 0xCAFEBABE; - int val3 = 0x0000CADE; - int val4 = 0xC0DE0000; - int val5 = 0xBEBEBEBE; - int val6 = 0xDEFACADE; - int mask1 = 0xf; - int mask2 = 0xf; - - // write to two registers - zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); - zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); - - zpl->shell_write(0x0 + GP1_ADDR_BASE, val3, mask1); - zpl->shell_write(0x4 + GP1_ADDR_BASE, val4, mask2); - - // verify the writes worked by reading - // assert ( (zpl->shell_read(0x0 + GP0_ADDR_BASE) == val1) ); - (zpl->shell_read(0x0 + GP0_ADDR_BASE) == val1); - assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == val2)); - assert((zpl->shell_read(0x0 + GP1_ADDR_BASE) == val3)); - assert((zpl->shell_read(0x4 + GP1_ADDR_BASE) == val4)); - - // check pl_to_ps fifo counters are zero (no data) - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == 0)); - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == 0)); - - // check ps_to_pl fifo credits (4 credits avail) - - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == 4)); - - // write to fifo - zpl->shell_write(0x8 + GP0_ADDR_BASE, val5, mask1); - - // check counters (rememder the FIFOs cross over) - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (1))); - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (0))); - - // write to fifo - zpl->shell_write(0x8 + GP1_ADDR_BASE, val6, mask1); - - // check counters (rememder the FIFOs cross over) - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (1))); - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (1))); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == 4)); - - // read data coming from pl to ps - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val6))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val5))); - - bsg_pr_info("bsg_zynq: data communicated between two AXI slave regions.\n"); - - zpl->shell_write(0x8 + GP0_ADDR_BASE, val1, mask1); - zpl->shell_write(0x8 + GP0_ADDR_BASE, val2, mask1); - zpl->shell_write(0x8 + GP0_ADDR_BASE, val3, mask1); - zpl->shell_write(0x8 + GP0_ADDR_BASE, val4, mask1); - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (4))); - - bsg_pr_info("bsg_zynq filled up fifo to GP1.\n"); - - zpl->shell_write(0x8 + GP1_ADDR_BASE, val4, mask1); - zpl->shell_write(0x8 + GP1_ADDR_BASE, val3, mask1); - zpl->shell_write(0x8 + GP1_ADDR_BASE, val2, mask1); - zpl->shell_write(0x8 + GP1_ADDR_BASE, val1, mask1); - - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (4))); - - bsg_pr_info("bsg_zynq filled up fifo to GP0.\n"); - - zpl->shell_write(0x8 + GP1_ADDR_BASE, val5, mask1); - zpl->shell_write(0x8 + GP1_ADDR_BASE, val6, mask1); - zpl->shell_write(0x8 + GP1_ADDR_BASE, val6, mask1); - zpl->shell_write(0x8 + GP1_ADDR_BASE, val1, mask1); - - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (4))); - - bsg_pr_info("bsg_zynq filled up sequential fifo pair successfully.\n"); - - zpl->shell_write(0x8 + GP0_ADDR_BASE, val5, mask1); - zpl->shell_write(0x8 + GP0_ADDR_BASE, val6, mask1); - zpl->shell_write(0x8 + GP0_ADDR_BASE, val6, mask1); - zpl->shell_write(0x8 + GP0_ADDR_BASE, val5, mask1); - - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (0))); // no free space - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (0))); // no free space - - bsg_pr_info("bsg_zynq filled up all fifos successfully.\n"); - - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val4))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val3))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val2))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val1))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val5))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val6))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val6))); - assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val1))); - - bsg_pr_info("bsg_zynq read out sequential fifo pair successfully.\n"); - - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val1))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val2))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val3))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val4))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val5))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val6))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val6))); - assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val5))); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + + // the read memory map is essentially + // + // 0,4: registers + // 8, : pl_to_ps fifo data + // C, : pl_to_ps fifo counts + // 10: ps_to_pl fifo counts + + // the write memory map is essentially + // + // 0,4: registers + // 8: ps_to_pl data + + int val1 = 0xDEADBEEF; + int val2 = 0xCAFEBABE; + int val3 = 0x0000CADE; + int val4 = 0xC0DE0000; + int val5 = 0xBEBEBEBE; + int val6 = 0xDEFACADE; + int mask1 = 0xf; + int mask2 = 0xf; + + // write to two registers + zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); + zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); + + zpl->shell_write(0x0 + GP1_ADDR_BASE, val3, mask1); + zpl->shell_write(0x4 + GP1_ADDR_BASE, val4, mask2); + + // verify the writes worked by reading + // assert ( (zpl->shell_read(0x0 + GP0_ADDR_BASE) == val1) ); + (zpl->shell_read(0x0 + GP0_ADDR_BASE) == val1); + assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == val2)); + assert((zpl->shell_read(0x0 + GP1_ADDR_BASE) == val3)); + assert((zpl->shell_read(0x4 + GP1_ADDR_BASE) == val4)); + + // check pl_to_ps fifo counters are zero (no data) + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == 0)); + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == 0)); + + // check ps_to_pl fifo credits (4 credits avail) + + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == 4)); + + // write to fifo + zpl->shell_write(0x8 + GP0_ADDR_BASE, val5, mask1); + + // check counters (rememder the FIFOs cross over) + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (1))); + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (0))); + + // write to fifo + zpl->shell_write(0x8 + GP1_ADDR_BASE, val6, mask1); + + // check counters (rememder the FIFOs cross over) + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (1))); + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (1))); + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == 4)); + + // read data coming from pl to ps + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val6))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val5))); + + bsg_pr_info("bsg_zynq: data communicated between two AXI slave regions.\n"); + + zpl->shell_write(0x8 + GP0_ADDR_BASE, val1, mask1); + zpl->shell_write(0x8 + GP0_ADDR_BASE, val2, mask1); + zpl->shell_write(0x8 + GP0_ADDR_BASE, val3, mask1); + zpl->shell_write(0x8 + GP0_ADDR_BASE, val4, mask1); + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (4))); + + bsg_pr_info("bsg_zynq filled up fifo to GP1.\n"); + + zpl->shell_write(0x8 + GP1_ADDR_BASE, val4, mask1); + zpl->shell_write(0x8 + GP1_ADDR_BASE, val3, mask1); + zpl->shell_write(0x8 + GP1_ADDR_BASE, val2, mask1); + zpl->shell_write(0x8 + GP1_ADDR_BASE, val1, mask1); + + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (4))); + + bsg_pr_info("bsg_zynq filled up fifo to GP0.\n"); + + zpl->shell_write(0x8 + GP1_ADDR_BASE, val5, mask1); + zpl->shell_write(0x8 + GP1_ADDR_BASE, val6, mask1); + zpl->shell_write(0x8 + GP1_ADDR_BASE, val6, mask1); + zpl->shell_write(0x8 + GP1_ADDR_BASE, val1, mask1); + + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (4))); + + bsg_pr_info("bsg_zynq filled up sequential fifo pair successfully.\n"); + + zpl->shell_write(0x8 + GP0_ADDR_BASE, val5, mask1); + zpl->shell_write(0x8 + GP0_ADDR_BASE, val6, mask1); + zpl->shell_write(0x8 + GP0_ADDR_BASE, val6, mask1); + zpl->shell_write(0x8 + GP0_ADDR_BASE, val5, mask1); + + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == (4))); + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == (0))); // no free space + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (0))); // no free space + + bsg_pr_info("bsg_zynq filled up all fifos successfully.\n"); + + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val4))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val3))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val2))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val1))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val5))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val6))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val6))); + assert((zpl->shell_read(0x8 + GP0_ADDR_BASE) == (val1))); + + bsg_pr_info("bsg_zynq read out sequential fifo pair successfully.\n"); - bsg_pr_info("bsg_zynq read out second sequential fifo pair successfully.\n"); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val1))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val2))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val3))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val4))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val5))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val6))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val6))); + assert((zpl->shell_read(0x8 + GP1_ADDR_BASE) == (val5))); - // check pl_to_ps fifo counters are zero (no data) - assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == 0)); - assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == 0)); + bsg_pr_info( + "bsg_zynq read out second sequential fifo pair successfully.\n"); - // check ps_to_pl fifo credits (4 credits avail) + // check pl_to_ps fifo counters are zero (no data) + assert((zpl->shell_read(0xC + GP0_ADDR_BASE) == 0)); + assert((zpl->shell_read(0xC + GP1_ADDR_BASE) == 0)); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == 4)); + // check ps_to_pl fifo credits (4 credits avail) + + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x10 + GP1_ADDR_BASE) == 4)); + + zpl->done(); - zpl->done(); - - delete zpl; - return 0; + delete zpl; + return 0; } - diff --git a/cosim/dram-example/ps.cpp b/cosim/dram-example/ps.cpp index e91b701d..1c8f0417 100644 --- a/cosim/dram-example/ps.cpp +++ b/cosim/dram-example/ps.cpp @@ -12,39 +12,39 @@ // This test is incomplete, the PL does not actually currently access the DRAM. // -#include -#include #include "bsg_zynq_pl.h" +#include +#include #define DRAM_ALLOC_SIZE_BYTES 16384 int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - int mask1 = 0xf; - unsigned long phys_ptr; + int mask1 = 0xf; + unsigned long phys_ptr; - volatile int *buf; + volatile int *buf; - buf = (volatile int *)zpl->allocate_dram(DRAM_ALLOC_SIZE_BYTES, &phys_ptr); + buf = (volatile int *)zpl->allocate_dram(DRAM_ALLOC_SIZE_BYTES, &phys_ptr); - // write all of the dram - for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) - buf[i] = i; + // write all of the dram + for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) + buf[i] = i; - // read all of the dram - for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) - assert(buf[i] == i); + // read all of the dram + for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) + assert(buf[i] == i); - zpl->shell_write(0x0 + GP0_ADDR_BASE, phys_ptr, mask1); + zpl->shell_write(0x0 + GP0_ADDR_BASE, phys_ptr, mask1); - assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (phys_ptr))); + assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (phys_ptr))); - if (argc == 1) - zpl->free_dram((void *)buf); + if (argc == 1) + zpl->free_dram((void *)buf); - zpl->done(); + zpl->done(); - delete zpl; - exit(EXIT_SUCCESS); + delete zpl; + exit(EXIT_SUCCESS); } diff --git a/cosim/hammerblade-example/ps.cpp b/cosim/hammerblade-example/ps.cpp index e0b2aabd..5a5746e6 100644 --- a/cosim/hammerblade-example/ps.cpp +++ b/cosim/hammerblade-example/ps.cpp @@ -4,22 +4,22 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include +#include +#include #include #include -#include #include +#include +#include +#include #include -#include -#include #include "ps.hpp" -#include "bsg_zynq_pl.h" +#include "bsg_argparse.h" #include "bsg_printing.h" #include "bsg_tag_bitbang.h" -#include "bsg_argparse.h" +#include "bsg_zynq_pl.h" #include "bsg_manycore_machine.h" #include "bsg_manycore_packet.h" @@ -37,322 +37,345 @@ void configure_blackparrot(bsg_zynq_pl *zpl); void nbf_load(bsg_zynq_pl *zpl, char *filename); -inline void send_mc_request_packet(bsg_zynq_pl *zpl, hb_mc_request_packet_t *packet) { - int axil_len = sizeof(hb_mc_request_packet_t) / 4; +inline void send_mc_request_packet(bsg_zynq_pl *zpl, + hb_mc_request_packet_t *packet) { + int axil_len = sizeof(hb_mc_request_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_EP_REQ_FIFO_CTR)); - zpl->shell_write(GP0_WR_EP_REQ_FIFO_DATA, pkt_data[i], 0xf); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_EP_REQ_FIFO_CTR)) + ; + zpl->shell_write(GP0_WR_EP_REQ_FIFO_DATA, pkt_data[i], 0xf); + } } -inline void recv_mc_response_packet(bsg_zynq_pl *zpl, hb_mc_response_packet_t *packet) { - int axil_len = sizeof(hb_mc_response_packet_t) / 4; +inline void recv_mc_response_packet(bsg_zynq_pl *zpl, + hb_mc_response_packet_t *packet) { + int axil_len = sizeof(hb_mc_response_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_MC_RSP_FIFO_CTR)); - pkt_data[i] = zpl->shell_read(GP0_RD_MC_RSP_FIFO_DATA); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_MC_RSP_FIFO_CTR)) + ; + pkt_data[i] = zpl->shell_read(GP0_RD_MC_RSP_FIFO_DATA); + } } -inline void recv_mc_request_packet(bsg_zynq_pl *zpl, hb_mc_request_packet_t *packet) { - int axil_len = sizeof(hb_mc_request_packet_t) / 4; +inline void recv_mc_request_packet(bsg_zynq_pl *zpl, + hb_mc_request_packet_t *packet) { + int axil_len = sizeof(hb_mc_request_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_MC_REQ_FIFO_CTR)); - pkt_data[i] = zpl->shell_read(GP0_RD_MC_REQ_FIFO_DATA); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_MC_REQ_FIFO_CTR)) + ; + pkt_data[i] = zpl->shell_read(GP0_RD_MC_REQ_FIFO_DATA); + } } -inline void send_mc_response_packet(bsg_zynq_pl *zpl, hb_mc_response_packet_t *packet) { - int axil_len = sizeof(hb_mc_response_packet_t) / 4; +inline void send_mc_response_packet(bsg_zynq_pl *zpl, + hb_mc_response_packet_t *packet) { + int axil_len = sizeof(hb_mc_response_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_EP_RSP_FIFO_CTR)); - zpl->shell_write(GP0_WR_EP_RSP_FIFO_DATA, pkt_data[i], 0xf); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_EP_RSP_FIFO_CTR)) + ; + zpl->shell_write(GP0_WR_EP_RSP_FIFO_DATA, pkt_data[i], 0xf); + } } -inline void send_mc_write(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, uint32_t epa, int32_t data) { - hb_mc_request_packet_t req_pkt; - - req_pkt.op_v2 = 2; // SW - req_pkt.reg_id = 0xff; // unused - req_pkt.payload = data; - req_pkt.x_src = BSG_MANYCORE_MACHINE_LOADER_COORD_X; - req_pkt.y_src = BSG_MANYCORE_MACHINE_LOADER_COORD_Y; - req_pkt.x_dst = x; - req_pkt.y_dst = y; - req_pkt.addr = epa >> 2; - - bsg_pr_dbg_ps("Writing: [%x]<-%x\n", req_pkt.addr, req_pkt.payload); - send_mc_request_packet(zpl, &req_pkt); +inline void send_mc_write(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, uint32_t epa, + int32_t data) { + hb_mc_request_packet_t req_pkt; + + req_pkt.op_v2 = 2; // SW + req_pkt.reg_id = 0xff; // unused + req_pkt.payload = data; + req_pkt.x_src = BSG_MANYCORE_MACHINE_LOADER_COORD_X; + req_pkt.y_src = BSG_MANYCORE_MACHINE_LOADER_COORD_Y; + req_pkt.x_dst = x; + req_pkt.y_dst = y; + req_pkt.addr = epa >> 2; + + bsg_pr_dbg_ps("Writing: [%x]<-%x\n", req_pkt.addr, req_pkt.payload); + send_mc_request_packet(zpl, &req_pkt); } -inline int32_t send_mc_read(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, uint32_t epa) { - hb_mc_request_packet_t req_pkt; +inline int32_t send_mc_read(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, + uint32_t epa) { + hb_mc_request_packet_t req_pkt; - req_pkt.op_v2 = 0; // LD - req_pkt.reg_id = 0xff; // unused - req_pkt.payload = 0; // Ignore payload - req_pkt.x_src = BSG_MANYCORE_MACHINE_LOADER_COORD_X; - req_pkt.y_src = BSG_MANYCORE_MACHINE_LOADER_COORD_Y; - req_pkt.x_dst = x; - req_pkt.y_dst = y; - req_pkt.addr = epa >> 2; + req_pkt.op_v2 = 0; // LD + req_pkt.reg_id = 0xff; // unused + req_pkt.payload = 0; // Ignore payload + req_pkt.x_src = BSG_MANYCORE_MACHINE_LOADER_COORD_X; + req_pkt.y_src = BSG_MANYCORE_MACHINE_LOADER_COORD_Y; + req_pkt.x_dst = x; + req_pkt.y_dst = y; + req_pkt.addr = epa >> 2; - send_mc_request_packet(zpl, &req_pkt); + send_mc_request_packet(zpl, &req_pkt); - hb_mc_response_packet_t resp_pkt; - recv_mc_response_packet(zpl, &resp_pkt); + hb_mc_response_packet_t resp_pkt; + recv_mc_response_packet(zpl, &resp_pkt); - return resp_pkt.data; + return resp_pkt.data; } std::queue getchar_queue; void *monitor(void *vargp) { - char c; - while(1) { - c = getchar(); - if(c != -1) - getchar_queue.push(c); - } - bsg_pr_info("Exiting from pthread\n"); - - return NULL; + char c; + while (1) { + c = getchar(); + if (c != -1) + getchar_queue.push(c); + } + bsg_pr_info("Exiting from pthread\n"); + + return NULL; } int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - - pthread_t thread_id; - - bsg_pr_info("ps.cpp: reading three base registers\n"); - bsg_pr_info("ps.cpp: dram_base=%lx\n", zpl->shell_read(0x00 + gp0_addr_base)); - - long val; - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, 0xf); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, 0xf); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == val)); - - bsg_tag_bitbang *btb = new bsg_tag_bitbang(zpl, GP0_WR_CSR_TAG_BITBANG, TAG_NUM_CLIENTS, TAG_MAX_LEN); - bsg_tag_client *mc_reset_client = new bsg_tag_client(TAG_CLIENT_MC_RESET_ID, TAG_CLIENT_MC_RESET_WIDTH); - - // Reset the bsg tag master - btb->reset_master(); - // Reset bsg client0 - btb->reset_client(mc_reset_client); - // Set bsg client0 to 1 (assert BP reset) - btb->set_client(mc_reset_client, 0x1); - // Set bsg client0 to 0 (deassert BP reset) - btb->set_client(mc_reset_client, 0x0); - - // We need some additional toggles for data to propagate through - btb->idle(50); - // Deassert the active-low system reset as we finish initializing the whole system - zpl->shell_write(GP0_WR_CSR_SYS_RESETN, 0x1, 0xF); - - unsigned long phys_ptr; - volatile int32_t *buf; - long allocated_dram = DRAM_ALLOCATE_SIZE; - bsg_pr_info("ps.cpp: calling allocate dram with size %ld\n", allocated_dram); - buf = (volatile int32_t *)zpl->allocate_dram(allocated_dram, &phys_ptr); - bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, 0xf); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)phys_ptr)); - bsg_pr_info("ps.cpp: wrote and verified base register\n"); - - if (argc == 1) { - bsg_pr_warn( - "No nbf file specified, sleeping for 2^31 seconds (this will hold " - "onto allocated DRAM)\n"); - sleep(1U << 31); - delete zpl; - return -1; - } - - nbf_load(zpl, argv[1]); - - bsg_pr_info("ps.cpp: Starting scan thread\n"); - pthread_create(&thread_id, NULL, monitor, NULL); - - bsg_pr_info("ps.cpp: Starting MC i/o polling thread\n"); - int mc_finished = 0; - while (mc_finished != NUM_MC_FINISH) { - bsg_pr_dbg_ps("Waiting for incoming request packet\n"); - hb_mc_request_packet_t mc_pkt; - recv_mc_request_packet(zpl, &mc_pkt); - bsg_pr_dbg_ps("Request packet signaled\n"); - int mc_epa = (mc_pkt.addr << 2) & 0xffff; // Trim to 16b EPA - int mc_data = mc_pkt.payload; - bsg_pr_dbg_ps("Request packet [%x] = %x\n", mc_epa, mc_data); - if (mc_epa == 0xeadc || mc_epa == 0xeee0) { - printf("%c", mc_data & 0xff); - fflush(stdout); - } else if (mc_epa == 0xead0) { - bsg_pr_info("MC finish packet received %d\n", ++mc_finished); - } else { - bsg_pr_info("Errant request packet: %x %x\n", mc_epa, mc_data); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + + pthread_t thread_id; + + bsg_pr_info("ps.cpp: reading three base registers\n"); + bsg_pr_info("ps.cpp: dram_base=%lx\n", + zpl->shell_read(0x00 + gp0_addr_base)); + + long val; + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, 0xf); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, 0xf); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == val)); + + bsg_tag_bitbang *btb = new bsg_tag_bitbang(zpl, GP0_WR_CSR_TAG_BITBANG, + TAG_NUM_CLIENTS, TAG_MAX_LEN); + bsg_tag_client *mc_reset_client = + new bsg_tag_client(TAG_CLIENT_MC_RESET_ID, TAG_CLIENT_MC_RESET_WIDTH); + + // Reset the bsg tag master + btb->reset_master(); + // Reset bsg client0 + btb->reset_client(mc_reset_client); + // Set bsg client0 to 1 (assert BP reset) + btb->set_client(mc_reset_client, 0x1); + // Set bsg client0 to 0 (deassert BP reset) + btb->set_client(mc_reset_client, 0x0); + + // We need some additional toggles for data to propagate through + btb->idle(50); + // Deassert the active-low system reset as we finish initializing the whole + // system + zpl->shell_write(GP0_WR_CSR_SYS_RESETN, 0x1, 0xF); + + unsigned long phys_ptr; + volatile int32_t *buf; + long allocated_dram = DRAM_ALLOCATE_SIZE; + bsg_pr_info("ps.cpp: calling allocate dram with size %ld\n", + allocated_dram); + buf = (volatile int32_t *)zpl->allocate_dram(allocated_dram, &phys_ptr); + bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, 0xf); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)phys_ptr)); + bsg_pr_info("ps.cpp: wrote and verified base register\n"); + + if (argc == 1) { + bsg_pr_warn( + "No nbf file specified, sleeping for 2^31 seconds (this will hold " + "onto allocated DRAM)\n"); + sleep(1U << 31); + delete zpl; + return -1; + } + + nbf_load(zpl, argv[1]); + + bsg_pr_info("ps.cpp: Starting scan thread\n"); + pthread_create(&thread_id, NULL, monitor, NULL); + + bsg_pr_info("ps.cpp: Starting MC i/o polling thread\n"); + int mc_finished = 0; + while (mc_finished != NUM_MC_FINISH) { + bsg_pr_dbg_ps("Waiting for incoming request packet\n"); + hb_mc_request_packet_t mc_pkt; + recv_mc_request_packet(zpl, &mc_pkt); + bsg_pr_dbg_ps("Request packet signaled\n"); + int mc_epa = (mc_pkt.addr << 2) & 0xffff; // Trim to 16b EPA + int mc_data = mc_pkt.payload; + bsg_pr_dbg_ps("Request packet [%x] = %x\n", mc_epa, mc_data); + if (mc_epa == 0xeadc || mc_epa == 0xeee0) { + printf("%c", mc_data & 0xff); + fflush(stdout); + } else if (mc_epa == 0xead0) { + bsg_pr_info("MC finish packet received %d\n", ++mc_finished); + } else { + bsg_pr_info("Errant request packet: %x %x\n", mc_epa, mc_data); + } } - } - - configure_blackparrot(zpl); - - bsg_pr_info("ps.cpp: Starting BP i/o polling thread\n"); - int bp_finished = 0; - while (bp_finished != NUM_BP_FINISH) { - bsg_pr_dbg_ps("Waiting for incoming request packet\n"); - hb_mc_request_packet_t mc_pkt; - hb_mc_response_packet_t ep_rsp; - recv_mc_request_packet(zpl, &mc_pkt); - bsg_pr_dbg_ps("Request packet signaled\n"); - int mc_epa = (mc_pkt.addr << 2) & 0xffff; // Trim to 16b EPA - int mc_data = mc_pkt.payload; - bsg_pr_dbg_ps("Request packet [%x] = %x\n", mc_epa, mc_data); - if (mc_epa == 0x0000) { - hb_mc_response_packet_fill(&ep_rsp, &mc_pkt); - if (getchar_queue.empty()) { - ep_rsp.data = -1; - } else { - ep_rsp.data = getchar_queue.front(); - getchar_queue.pop(); - } - send_mc_response_packet(zpl, &ep_rsp); - } else if (mc_epa == 0x1000) { - printf("%c", mc_data & 0xff); - fflush(stdout); - } else if (mc_epa == 0x2000) { - bsg_pr_info("BP finish packet received %d\n", ++bp_finished); - } else if (mc_epa >= 0x3000 && mc_epa < 0x4000) { - int rom_idx = (mc_epa & 0xff) >> 2; - zpl->shell_write(GP0_WR_CSR_ROM_ADDR, rom_idx, 0xf); - hb_mc_response_packet_fill(&ep_rsp, &mc_pkt); - ep_rsp.data = zpl->shell_read(GP0_RD_ROM_DATA); - send_mc_response_packet(zpl, &ep_rsp); - } else { - bsg_pr_info("Errant request packet: %x %x\n", mc_epa, mc_data); + + configure_blackparrot(zpl); + + bsg_pr_info("ps.cpp: Starting BP i/o polling thread\n"); + int bp_finished = 0; + while (bp_finished != NUM_BP_FINISH) { + bsg_pr_dbg_ps("Waiting for incoming request packet\n"); + hb_mc_request_packet_t mc_pkt; + hb_mc_response_packet_t ep_rsp; + recv_mc_request_packet(zpl, &mc_pkt); + bsg_pr_dbg_ps("Request packet signaled\n"); + int mc_epa = (mc_pkt.addr << 2) & 0xffff; // Trim to 16b EPA + int mc_data = mc_pkt.payload; + bsg_pr_dbg_ps("Request packet [%x] = %x\n", mc_epa, mc_data); + if (mc_epa == 0x0000) { + hb_mc_response_packet_fill(&ep_rsp, &mc_pkt); + if (getchar_queue.empty()) { + ep_rsp.data = -1; + } else { + ep_rsp.data = getchar_queue.front(); + getchar_queue.pop(); + } + send_mc_response_packet(zpl, &ep_rsp); + } else if (mc_epa == 0x1000) { + printf("%c", mc_data & 0xff); + fflush(stdout); + } else if (mc_epa == 0x2000) { + bsg_pr_info("BP finish packet received %d\n", ++bp_finished); + } else if (mc_epa >= 0x3000 && mc_epa < 0x4000) { + int rom_idx = (mc_epa & 0xff) >> 2; + zpl->shell_write(GP0_WR_CSR_ROM_ADDR, rom_idx, 0xf); + hb_mc_response_packet_fill(&ep_rsp, &mc_pkt); + ep_rsp.data = zpl->shell_read(GP0_RD_ROM_DATA); + send_mc_response_packet(zpl, &ep_rsp); + } else { + bsg_pr_info("Errant request packet: %x %x\n", mc_epa, mc_data); + } } - } - zpl->done(); - delete zpl; - return 0; + zpl->done(); + delete zpl; + return 0; } // Configure BlackParrot // void configure_blackparrot(bsg_zynq_pl *zpl) { - // From Makefile - int num_tiles_x = BSG_MANYCORE_POD_TILES_X; - int num_tiles_y = BSG_MANYCORE_POD_TILES_Y; - int x_cord_width = BSG_MANYCORE_NOC_COORD_X_WIDTH; - int y_cord_width = BSG_MANYCORE_NOC_COORD_Y_WIDTH; - int x_subcord_width = (int) std::log2(num_tiles_x); - int y_subcord_width = (int) std::log2(num_tiles_y); - int pod_x_cord_width = x_cord_width - x_subcord_width; - int pod_y_cord_width = y_cord_width - y_subcord_width; - int bp_y_tile = (3 << y_subcord_width) | 0; - int bp_x_tile = (1 << x_subcord_width) | 0; - int bp_dram_pod_cord = (1 << pod_y_cord_width) | 1; - int bp_host_cord = (1 << (y_cord_width+x_subcord_width)); - int bp_cfg_base_epa = 0x2000; - int bp_cfg_reg_unused = bp_cfg_base_epa | 0x0000; - int bp_cfg_reg_freeze = bp_cfg_base_epa | 0x0008; - int bp_cfg_reg_npc = bp_cfg_base_epa | 0x0010; - int bp_cfg_reg_hio_mask = bp_cfg_base_epa | 0x0038; - int bp_cfg_reg_icache_id = bp_cfg_base_epa | 0x0200; - int bp_cfg_reg_icache_mode = bp_cfg_base_epa | 0x0208; - int bp_cfg_reg_dcache_id = bp_cfg_base_epa | 0x0400; - int bp_cfg_reg_dcache_mode = bp_cfg_base_epa | 0x0408; - - int bp_bridge_base_epa = 0x4000; - int bp_bridge_reg_dram_offset = bp_bridge_base_epa | 0x0000; - int bp_bridge_reg_dram_pod = bp_bridge_base_epa | 0x0008; - int bp_bridge_reg_my_cord = bp_bridge_base_epa | 0x0010; - int bp_bridge_reg_host_cord = bp_bridge_base_epa | 0x0018; - int bp_bridge_reg_scratchpad = bp_bridge_base_epa | 0x1000; - - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_freeze, 1); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_npc, 0x80000000); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_hio_mask, 0xffff); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_icache_mode, 1); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_dcache_mode, 1); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_dram_offset, 0x2000000); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_dram_pod, bp_dram_pod_cord); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_my_cord, 0); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_host_cord, bp_host_cord); - send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_freeze, 0); + // From Makefile + int num_tiles_x = BSG_MANYCORE_POD_TILES_X; + int num_tiles_y = BSG_MANYCORE_POD_TILES_Y; + int x_cord_width = BSG_MANYCORE_NOC_COORD_X_WIDTH; + int y_cord_width = BSG_MANYCORE_NOC_COORD_Y_WIDTH; + int x_subcord_width = (int)std::log2(num_tiles_x); + int y_subcord_width = (int)std::log2(num_tiles_y); + int pod_x_cord_width = x_cord_width - x_subcord_width; + int pod_y_cord_width = y_cord_width - y_subcord_width; + int bp_y_tile = (3 << y_subcord_width) | 0; + int bp_x_tile = (1 << x_subcord_width) | 0; + int bp_dram_pod_cord = (1 << pod_y_cord_width) | 1; + int bp_host_cord = (1 << (y_cord_width + x_subcord_width)); + int bp_cfg_base_epa = 0x2000; + int bp_cfg_reg_unused = bp_cfg_base_epa | 0x0000; + int bp_cfg_reg_freeze = bp_cfg_base_epa | 0x0008; + int bp_cfg_reg_npc = bp_cfg_base_epa | 0x0010; + int bp_cfg_reg_hio_mask = bp_cfg_base_epa | 0x0038; + int bp_cfg_reg_icache_id = bp_cfg_base_epa | 0x0200; + int bp_cfg_reg_icache_mode = bp_cfg_base_epa | 0x0208; + int bp_cfg_reg_dcache_id = bp_cfg_base_epa | 0x0400; + int bp_cfg_reg_dcache_mode = bp_cfg_base_epa | 0x0408; + + int bp_bridge_base_epa = 0x4000; + int bp_bridge_reg_dram_offset = bp_bridge_base_epa | 0x0000; + int bp_bridge_reg_dram_pod = bp_bridge_base_epa | 0x0008; + int bp_bridge_reg_my_cord = bp_bridge_base_epa | 0x0010; + int bp_bridge_reg_host_cord = bp_bridge_base_epa | 0x0018; + int bp_bridge_reg_scratchpad = bp_bridge_base_epa | 0x1000; + + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_freeze, 1); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_npc, 0x80000000); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_hio_mask, 0xffff); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_icache_mode, 1); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_dcache_mode, 1); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_dram_offset, + 0x2000000); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_dram_pod, + bp_dram_pod_cord); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_my_cord, 0); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_bridge_reg_host_cord, + bp_host_cord); + send_mc_write(zpl, bp_x_tile, bp_y_tile, bp_cfg_reg_freeze, 0); } void nbf_load(bsg_zynq_pl *zpl, char *nbf_filename) { - string nbf_command; - string tmp; - string delimiter = "_"; - - long long int nbf[4]; - int pos = 0; - long unsigned int base_addr; - int data; - ifstream nbf_file(nbf_filename); - - if (!nbf_file.is_open()) { - bsg_pr_err("ps.cpp: error opening nbf file.\n"); - delete zpl; - return; - } - - bsg_pr_info("Starting NBF load\n"); - int line_count = 0; - while (getline(nbf_file, nbf_command)) { - int i = 0; - while ((pos = nbf_command.find(delimiter)) != std::string::npos) { - tmp = nbf_command.substr(0, pos); - nbf[i] = std::stoull(tmp, nullptr, 16); - nbf_command.erase(0, pos + 1); - i++; + string nbf_command; + string tmp; + string delimiter = "_"; + + long long int nbf[4]; + int pos = 0; + long unsigned int base_addr; + int data; + ifstream nbf_file(nbf_filename); + + if (!nbf_file.is_open()) { + bsg_pr_err("ps.cpp: error opening nbf file.\n"); + delete zpl; + return; } - nbf[i] = std::stoull(nbf_command, nullptr, 16); - - int x_tile = nbf[0]; - int y_tile = nbf[1]; - int epa = nbf[2]; // word addr - int nbf_data = nbf[3]; - - bool finish = (x_tile == 0xff) && (y_tile == 0xff) && (epa == 0x00000000) && (nbf_data == 0x00000000); - bool fence = (x_tile == 0xff) && (y_tile == 0xff) && (epa == 0xffffffff) && (nbf_data == 0xffffffff); - - if (finish) { - bsg_pr_info("ps.cpp: nbf finish command, line %d\n", line_count); - continue; - } else if (fence) { - bsg_pr_dbg_ps("ps.cpp: nbf fence command (ignoring), line %d\n", line_count); - bsg_pr_info("Waiting for credit drain\n"); - while(zpl->shell_read(GP0_RD_CREDIT_COUNT) > 0); - bsg_pr_info("Credits drained\n"); - continue; - } else { - send_mc_write(zpl, x_tile, y_tile, epa << 2, nbf_data); + + bsg_pr_info("Starting NBF load\n"); + int line_count = 0; + while (getline(nbf_file, nbf_command)) { + int i = 0; + while ((pos = nbf_command.find(delimiter)) != std::string::npos) { + tmp = nbf_command.substr(0, pos); + nbf[i] = std::stoull(tmp, nullptr, 16); + nbf_command.erase(0, pos + 1); + i++; + } + nbf[i] = std::stoull(nbf_command, nullptr, 16); + + int x_tile = nbf[0]; + int y_tile = nbf[1]; + int epa = nbf[2]; // word addr + int nbf_data = nbf[3]; + + bool finish = (x_tile == 0xff) && (y_tile == 0xff) && + (epa == 0x00000000) && (nbf_data == 0x00000000); + bool fence = (x_tile == 0xff) && (y_tile == 0xff) && + (epa == 0xffffffff) && (nbf_data == 0xffffffff); + + if (finish) { + bsg_pr_info("ps.cpp: nbf finish command, line %d\n", line_count); + continue; + } else if (fence) { + bsg_pr_dbg_ps("ps.cpp: nbf fence command (ignoring), line %d\n", + line_count); + bsg_pr_info("Waiting for credit drain\n"); + while (zpl->shell_read(GP0_RD_CREDIT_COUNT) > 0) + ; + bsg_pr_info("Credits drained\n"); + continue; + } else { + send_mc_write(zpl, x_tile, y_tile, epa << 2, nbf_data); #ifdef VERIFY_NBF - int32_t verif_data; - - bsg_pr_dbg_ps("Querying: %x\n", epa << 2); - verif_data = send_mc_read(zpl, x_tile, y_tile, epa << 2); - - // Some verification reads are expected to fail e.g. CSRs - if (req_pkt.payload == resp_pkt.data) { - bsg_pr_info("Received verification: %x==%x\n", req_pkt.payload, resp_pkt.data); - } else { - bsg_pr_info("Failed verification: %x!=%x\n", req_pkt.payload, resp_pkt.data); - } + int32_t verif_data; + + bsg_pr_dbg_ps("Querying: %x\n", epa << 2); + verif_data = send_mc_read(zpl, x_tile, y_tile, epa << 2); + + // Some verification reads are expected to fail e.g. CSRs + if (req_pkt.payload == resp_pkt.data) { + bsg_pr_info("Received verification: %x==%x\n", req_pkt.payload, + resp_pkt.data); + } else { + bsg_pr_info("Failed verification: %x!=%x\n", req_pkt.payload, + resp_pkt.data); + } #endif + } } - } } - diff --git a/cosim/include/bridge/bsg_zynq_pl.h b/cosim/include/bridge/bsg_zynq_pl.h index a953e2f2..1ed492cb 100644 --- a/cosim/include/bridge/bsg_zynq_pl.h +++ b/cosim/include/bridge/bsg_zynq_pl.h @@ -2,20 +2,20 @@ #ifndef BSG_ZYNQ_PL_H #define BSG_ZYNQ_PL_H -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include +#include +#include #include +#include #include +#include +#include +#include +#include +#include +#include #include "bsg_zynq_pl_hardware.h" @@ -25,41 +25,36 @@ using namespace std; class bsg_zynq_pl : public bsg_zynq_pl_hardware { - public: - bsg_zynq_pl(int argc, char *argv[]) { - printf("// bsg_zynq_pl: be sure to run as root\n"); - init(); - } + public: + bsg_zynq_pl(int argc, char *argv[]) { + printf("// bsg_zynq_pl: be sure to run as root\n"); + init(); + } - ~bsg_zynq_pl(void) { - deinit(); - } + ~bsg_zynq_pl(void) { deinit(); } - void tick(void) override { - /* Does nothing on PS */ - } + void tick(void) override { /* Does nothing on PS */ + } - void done(void) override { - printf("bsg_zynq_pl: done() called, exiting\n"); - } + void done(void) override { + printf("bsg_zynq_pl: done() called, exiting\n"); + } - void *allocate_dram(unsigned long len_in_bytes, unsigned long *physical_ptr) override { - bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); - return (void *)(physical_ptr = (unsigned long *)0xdeadbeef); - } + void *allocate_dram(unsigned long len_in_bytes, + unsigned long *physical_ptr) override { + bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); + return (void *)(physical_ptr = (unsigned long *)0xdeadbeef); + } - void free_dram(void *virtual_ptr) override { - printf("bsg_zynq_pl: Freeing dummy DRAM\n"); - } + void free_dram(void *virtual_ptr) override { + printf("bsg_zynq_pl: Freeing dummy DRAM\n"); + } - int32_t shell_read(uintptr_t addr) override { - return uart_read(addr); - } + int32_t shell_read(uintptr_t addr) override { return uart_read(addr); } - void shell_write(uintptr_t addr, int32_t data, uint8_t wmask) override { - uart_write(addr, data, wmask); - } + void shell_write(uintptr_t addr, int32_t data, uint8_t wmask) override { + uart_write(addr, data, wmask); + } }; #endif - diff --git a/cosim/include/common/bsg_argparse.h b/cosim/include/common/bsg_argparse.h index 406c859f..293eb208 100644 --- a/cosim/include/common/bsg_argparse.h +++ b/cosim/include/common/bsg_argparse.h @@ -26,7 +26,7 @@ static void get_argv(char *args, int argc, char **argv) { // VCS doesn't provide it to us. Instead, we "hack" around it by reading // the path from 'proc/self/exe'. The maximum path-name length is 1024, // with an extra null character for safety - static char path[1025] = { '\0' }; + static char path[1025] = {'\0'}; readlink("/proc/self/exe", path, sizeof(path) - 1); argv[0] = path; @@ -52,4 +52,3 @@ static void get_argv(char *args, int argc, char **argv) { } #endif - diff --git a/cosim/include/common/bsg_axil.h b/cosim/include/common/bsg_axil.h index 0933109e..956cfc88 100644 --- a/cosim/include/common/bsg_axil.h +++ b/cosim/include/common/bsg_axil.h @@ -3,29 +3,30 @@ #define BSG_AXIL_H #include -#include -#include -#include -#include #include +#include #include #include #include +#include +#include +#include #include #include -#include #include #include "bsg_nonsynth_dpi_gpio.hpp" -#include "bsg_printing.h" #include "bsg_pin.h" +#include "bsg_printing.h" #ifndef ZYNQ_AXI_TIMEOUT #define ZYNQ_AXI_TIMEOUT 1000 #endif -extern "C" { int bsg_dpi_time(); } +extern "C" { +int bsg_dpi_time(); +} using namespace std; using namespace bsg_nonsynth_dpi; using namespace boost::coroutines2; @@ -35,7 +36,7 @@ typedef coroutine::pull_type coro_t; typedef coroutine::push_type yield_t; class s_axil_device { -public: + public: virtual bool is_read(uintptr_t address) = 0; virtual bool can_read(uintptr_t address) = 0; virtual int32_t read(uintptr_t address) = 0; @@ -46,7 +47,7 @@ class s_axil_device { }; class m_axil_device { -public: + public: virtual bool pending_read(uintptr_t *address) = 0; virtual void return_read(int32_t data) = 0; @@ -57,9 +58,8 @@ class m_axil_device { // A = axil address width // D = axil data width -template -class maxil { -private: +template class maxil { + private: string base; pin<1> p_aclk; @@ -101,10 +101,9 @@ class maxil { mutex = 0; } -public: + public: maxil(const string &base) - : base(base), - p_aclk(string(base) + string(".aclk_gpio")), + : base(base), p_aclk(string(base) + string(".aclk_gpio")), p_aresetn(string(base) + string(".aresetn_gpio")), p_awaddr(string(base) + string(".awaddr_gpio")), p_awprot(string(base) + string(".awprot_gpio")), @@ -124,8 +123,7 @@ class maxil { p_rdata(string(base) + string(".rdata_gpio")), p_rresp(string(base) + string(".rresp_gpio")), p_rvalid(string(base) + string(".rvalid_gpio")), - p_rready(string(base) + string(".rready_gpio")), - mutex(0) { + p_rready(string(base) + string(".rready_gpio")), mutex(0) { std::cout << "Instantiating AXIL at " << base << std::endl; } @@ -265,9 +263,8 @@ class maxil { // A = axil address width // D = axil data width -template -class saxil { -private: +template class saxil { + private: pin<1> p_aclk; pin<1> p_aresetn; @@ -307,7 +304,7 @@ class saxil { mutex = 0; } -public: + public: saxil(const string &base) : p_aclk(string(base) + string(".aclk_gpio")), p_aresetn(string(base) + string(".aresetn_gpio")), @@ -486,4 +483,3 @@ class saxil { }; #endif - diff --git a/cosim/include/common/bsg_axis.h b/cosim/include/common/bsg_axis.h index 6452475a..257b7e7f 100644 --- a/cosim/include/common/bsg_axis.h +++ b/cosim/include/common/bsg_axis.h @@ -2,29 +2,30 @@ #define BSG_AXIS_H #include -#include -#include -#include -#include #include +#include #include #include #include +#include +#include +#include #include #include -#include #include #include "bsg_nonsynth_dpi_gpio.hpp" -#include "bsg_printing.h" #include "bsg_pin.h" +#include "bsg_printing.h" #ifndef ZYNQ_AXI_TIMEOUT #define ZYNQ_AXI_TIMEOUT 1000 #endif -extern "C" { int bsg_dpi_time(); } +extern "C" { +int bsg_dpi_time(); +} using namespace std; using namespace bsg_nonsynth_dpi; using namespace boost::coroutines2; @@ -34,191 +35,188 @@ typedef coroutine::pull_type coro_t; typedef coroutine::push_type yield_t; class s_axis_device { - public: - virtual bool can_write(uint8_t last) = 0; - virtual void write(int32_t data, uint8_t last) = 0; + public: + virtual bool can_write(uint8_t last) = 0; + virtual void write(int32_t data, uint8_t last) = 0; }; class m_axis_device { - public: - virtual bool pending_write(int32_t *data, uint8_t *last) = 0; + public: + virtual bool pending_write(int32_t *data, uint8_t *last) = 0; }; // D = axis data width -template -class saxis { - private: - pin<1> p_aclk; - pin<1> p_aresetn; - - pin<1> p_tready; - pin<1> p_tvalid; - pin p_tdata; - pin p_tkeep; - pin<1> p_tlast; - - // We use a boolean instead of true mutex so that we can check it - bool mutex = 0; - - void lock(yield_t &yield) { - do { - yield(); - } while (mutex); - mutex = 1; - } - - void unlock(yield_t &yield) { +template class saxis { + private: + pin<1> p_aclk; + pin<1> p_aresetn; + + pin<1> p_tready; + pin<1> p_tvalid; + pin p_tdata; + pin p_tkeep; + pin<1> p_tlast; + + // We use a boolean instead of true mutex so that we can check it + bool mutex = 0; + + void lock(yield_t &yield) { + do { + yield(); + } while (mutex); + mutex = 1; + } + + void unlock(yield_t &yield) { + yield(); + mutex = 0; + } + + public: + saxis(const string &base) + : p_aclk(string(base) + string(".aclk_gpio")), + p_aresetn(string(base) + string(".aresetn_gpio")), + p_tready(string(base) + string(".tready_gpio")), + p_tvalid(string(base) + string(".tvalid_gpio")), + p_tdata(string(base) + string(".tdata_gpio")), + p_tkeep(string(base) + string(".tkeep_gpio")), + p_tlast(string(base) + string(".tlast_gpio")) { + std::cout << "Instantiating SAXIS at " << base << std::endl; + } + + // Wait for (low true) reset to be asserted by the testbench + void reset(yield_t &yield) { + printf("bsg_zynq_pl: Entering reset\n"); + lock(yield); + while (this->p_aresetn == 0) { yield(); - mutex = 0; } - - public: - saxis(const string &base) - : p_aclk(string(base) + string(".aclk_gpio")), - p_aresetn(string(base) + string(".aresetn_gpio")), - p_tready(string(base) + string(".tready_gpio")), - p_tvalid(string(base) + string(".tvalid_gpio")), - p_tdata(string(base) + string(".tdata_gpio")), - p_tkeep(string(base) + string(".tkeep_gpio")), - p_tlast(string(base) + string(".tlast_gpio")) { - std::cout << "Instantiating SAXIS at " << base << std::endl; + unlock(yield); + printf("bsg_zynq_pl: Exiting reset\n"); + } + + bool axis_has_write(uint8_t *last) { + bool tv = this->p_tvalid; + *last = this->p_tlast; + + return tv && !mutex; + } + + void axis_write_helper(s_axis_device *p, yield_t &yield) { + lock(yield); + int timeout_counter = 0; + + bool t_done = false; + int tdata; + bool tlast; + + this->p_tready = 1; + do { + if (timeout_counter++ == ZYNQ_AXI_TIMEOUT) { + bsg_pr_err("bsg_zynq_pl: SAXIS read request timeout\n"); + timeout_counter = 0; } - // Wait for (low true) reset to be asserted by the testbench - void reset(yield_t &yield) { - printf("bsg_zynq_pl: Entering reset\n"); - lock(yield); - while (this->p_aresetn == 0) { - yield(); + if (this->p_tvalid == 1) { + tdata = this->p_tdata; + tlast = this->p_tlast; + p->write(tdata, tlast); + t_done = true; } - unlock(yield); - printf("bsg_zynq_pl: Exiting reset\n"); - } - bool axis_has_write(uint8_t *last) { - bool tv = this->p_tvalid; - *last = this->p_tlast; + // Tick the clock one cycle + yield(); - return tv && !mutex; - } + if (t_done) { + this->p_tready = 0; + } + } while (!t_done); - void axis_write_helper(s_axis_device *p, yield_t &yield) { - lock(yield); - int timeout_counter = 0; - - bool t_done = false; - int tdata; - bool tlast; - - this->p_tready = 1; - do { - if (timeout_counter++ == ZYNQ_AXI_TIMEOUT) { - bsg_pr_err("bsg_zynq_pl: SAXIS read request timeout\n"); - timeout_counter = 0; - } - - if (this->p_tvalid == 1) { - tdata = this->p_tdata; - tlast = this->p_tlast; - p->write(tdata, tlast); - t_done = true; - } - - // Tick the clock one cycle - yield(); - - if (t_done) { - this->p_tready = 0; - } - } while (!t_done); - - unlock(yield); - return; - } + unlock(yield); + return; + } }; // D = axis data width -template -class maxis { - private: - pin<1> p_aclk; - pin<1> p_aresetn; - - pin<1> p_tready; - pin<1> p_tvalid; - pin p_tdata; - pin p_tkeep; - pin<1> p_tlast; - - // We use a boolean instead of true mutex so that we can check it - bool mutex = 0; - - void lock(yield_t &yield) { - do { - yield(); - } while (mutex); - mutex = 1; - } - - void unlock(yield_t &yield) { +template class maxis { + private: + pin<1> p_aclk; + pin<1> p_aresetn; + + pin<1> p_tready; + pin<1> p_tvalid; + pin p_tdata; + pin p_tkeep; + pin<1> p_tlast; + + // We use a boolean instead of true mutex so that we can check it + bool mutex = 0; + + void lock(yield_t &yield) { + do { + yield(); + } while (mutex); + mutex = 1; + } + + void unlock(yield_t &yield) { + yield(); + mutex = 0; + } + + public: + maxis(const string &base) + : p_aclk(string(base) + string(".aclk_gpio")), + p_aresetn(string(base) + string(".aresetn_gpio")), + p_tready(string(base) + string(".tready_gpio")), + p_tvalid(string(base) + string(".tvalid_gpio")), + p_tdata(string(base) + string(".tdata_gpio")), + p_tkeep(string(base) + string(".tkeep_gpio")), + p_tlast(string(base) + string(".tlast_gpio")) { + std::cout << "Instantiating MAXIS at " << base << std::endl; + } + + // Wait for (low true) reset to be asserted by the testbench + void reset(yield_t &yield) { + printf("bsg_zynq_pl: Entering reset\n"); + lock(yield); + while (this->p_aresetn == 0) { yield(); - mutex = 0; } + unlock(yield); + printf("bsg_zynq_pl: Exiting reset\n"); + } + + void axis_write_helper(int32_t tdata, int32_t tlast, yield_t &yield) { + lock(yield); + int timeout_counter = 0; + + bool t_done = false; + this->p_tvalid = 1; + this->p_tdata = tdata; + this->p_tlast = tlast; + this->p_tkeep = 0xf; // Do not support tkeep for now + do { + // check timeout + if (timeout_counter++ == ZYNQ_AXI_TIMEOUT) { + bsg_pr_err("bsg_zynq_pl: MAXIS write timeout\n"); + } - public: - maxis(const string &base) - : p_aclk(string(base) + string(".aclk_gpio")), - p_aresetn(string(base) + string(".aresetn_gpio")), - p_tready(string(base) + string(".tready_gpio")), - p_tvalid(string(base) + string(".tvalid_gpio")), - p_tdata(string(base) + string(".tdata_gpio")), - p_tkeep(string(base) + string(".tkeep_gpio")), - p_tlast(string(base) + string(".tlast_gpio")) { - std::cout << "Instantiating MAXIS at " << base << std::endl; + if (this->p_tready) { + t_done = true; } - // Wait for (low true) reset to be asserted by the testbench - void reset(yield_t &yield) { - printf("bsg_zynq_pl: Entering reset\n"); - lock(yield); - while (this->p_aresetn == 0) { - yield(); + // tick the clock one cycle + yield(); + + if (t_done) { + this->p_tvalid = 0; } - unlock(yield); - printf("bsg_zynq_pl: Exiting reset\n"); - } + } while (!t_done); - void axis_write_helper(int32_t tdata, int32_t tlast, yield_t &yield) { - lock(yield); - int timeout_counter = 0; - - bool t_done = false; - this->p_tvalid = 1; - this->p_tdata = tdata; - this->p_tlast = tlast; - this->p_tkeep = 0xf; // Do not support tkeep for now - do { - // check timeout - if (timeout_counter++ == ZYNQ_AXI_TIMEOUT) { - bsg_pr_err("bsg_zynq_pl: MAXIS write timeout\n"); - } - - if (this->p_tready) { - t_done = true; - } - - // tick the clock one cycle - yield(); - - if (t_done) { - this->p_tvalid = 0; - } - } while (!t_done); - - unlock(yield); - return; - } + unlock(yield); + return; + } }; #endif - diff --git a/cosim/include/common/bsg_peripherals.h b/cosim/include/common/bsg_peripherals.h index fd9d7eec..4fdd7646 100644 --- a/cosim/include/common/bsg_peripherals.h +++ b/cosim/include/common/bsg_peripherals.h @@ -2,19 +2,19 @@ #ifndef BSG_PERIPHERALS_H #define BSG_PERIPHERALS_H -#include #include +#include #include "bsg_axil.h" #include "bsg_axis.h" // Scratchpad #define SCRATCHPAD_BASE 0x1000000 -#define SCRATCHPAD_SIZE 0x100000 +#define SCRATCHPAD_SIZE 0x0100000 class zynq_scratchpad : public s_axil_device { std::vector mem; -public: + public: zynq_scratchpad() { mem.resize(SCRATCHPAD_SIZE, 0); } bool is_read(uintptr_t address) override { @@ -27,13 +27,9 @@ class zynq_scratchpad : public s_axil_device { (address < SCRATCHPAD_BASE + SCRATCHPAD_SIZE); } - bool can_read(uintptr_t address) override { - return true; - } + bool can_read(uintptr_t address) override { return true; } - bool can_write(uintptr_t address) override { - return true; - } + bool can_write(uintptr_t address) override { return true; } int32_t read(uintptr_t address) override { uintptr_t final_addr = @@ -54,7 +50,7 @@ class zynq_scratchpad : public s_axil_device { // UART (loosely modelled off 16550) #define UART_BASE 0x1100000 -#define UART_SIZE 0x1000 +#define UART_SIZE 0x0001000 #define UART_REG_RX_FIFO 0x000 #define UART_REG_TX_FIFO 0x004 #define UART_REG_STAT 0x008 @@ -63,7 +59,7 @@ class zynq_uart : public s_axil_device { std::queue rx_fifo; std::queue tx_fifo; -public: + public: zynq_uart() {} bool is_read(uintptr_t address) override { @@ -75,13 +71,9 @@ class zynq_uart : public s_axil_device { return (address >= UART_BASE) && (address < UART_BASE + UART_SIZE); } - bool can_read(uintptr_t address) override { - return true; - } + bool can_read(uintptr_t address) override { return true; } - bool can_write(uintptr_t address) override { - return true; - } + bool can_write(uintptr_t address) override { return true; } int32_t read(uintptr_t address) override { uintptr_t final_addr = ((address - UART_BASE) + UART_SIZE) % UART_SIZE; @@ -92,8 +84,8 @@ class zynq_uart : public s_axil_device { rx_fifo.pop(); } } else if (final_addr == UART_REG_STAT) { - retval = ((1 & tx_fifo.empty()) << 2) // TX empty - | ((1 & !rx_fifo.empty()) << 0); // RX valid + retval = ((1 & tx_fifo.empty()) << 2) // TX empty + | ((1 & !rx_fifo.empty()) << 0); // RX valid } else { bsg_pr_info(" bsg_zynq_pl: errant uart read: %x\n", final_addr); } @@ -108,11 +100,11 @@ class zynq_uart : public s_axil_device { if (final_addr == UART_REG_TX_FIFO) { tx_fifo.push(data); } else if (final_addr == UART_REG_CTRL) { - if (data & 0b00001) { // reset TX FIFO + if (data & 0b00001) { // reset TX FIFO while (!tx_fifo.empty()) { tx_fifo.pop(); } - } else if (data & 0xb00010) { // reset RX FIFO + } else if (data & 0xb00010) { // reset RX FIFO while (!rx_fifo.empty()) { rx_fifo.pop(); } @@ -154,12 +146,12 @@ class zynq_uart : public s_axil_device { class zynq_watchdog : public m_axil_device { int count = 0; -public: + public: bool pending_write(uintptr_t *address, int32_t *data, uint8_t *wmask) { // Every time we check for pending, we increment the count if (count++ % WATCHDOG_PERIOD == 0) { *address = WATCHDOG_ADDRESS; - *data = 'W'; // For 'woof' + *data = 'W'; // For 'woof' *wmask = 0xf; bsg_pr_dbg_pl(" bsg_zynq_pl: watchdog send\n"); return true; @@ -170,11 +162,10 @@ class zynq_watchdog : public m_axil_device { bool pending_read(uintptr_t *address) { return 0; } - void return_write() { - bsg_pr_dbg_pl(" bsg_zynq_pl: watchdog return\n"); - } + void return_write() { bsg_pr_dbg_pl(" bsg_zynq_pl: watchdog return\n"); } - void return_read(int32_t data) { /* Unimp */ } + void return_read(int32_t data) { /* Unimp */ + } }; // Buffer @@ -182,12 +173,10 @@ class zynq_buffer : public s_axis_device, m_axis_device { bool buffer_full = false; std::queue buffer; -public: + public: zynq_buffer() {} - bool can_write(uint8_t last) { - return !buffer_full; - } + bool can_write(uint8_t last) { return !buffer_full; } void write(int32_t data, uint8_t last) { bsg_pr_dbg_pl(" bsg_zynq_pl: fifo write <- %x\n", data); @@ -219,5 +208,168 @@ class zynq_buffer : public s_axis_device, m_axis_device { // USER Functions }; +// Debug +// TODO: +#define DEBUG_BASE 0x0000000 +#define DEBUG_SIZE 0x0100000 +// Compile with DMI DPI +class zynq_debug : public s_axil_device, m_axil_device { + public: + zynq_debug() { + bsg_pr_err("Debug unit co-simulation not yet implemented!"); + } + + // S AXI + bool is_read(uintptr_t address) override { + return (address >= DEBUG_BASE) && (address < DEBUG_BASE + DEBUG_SIZE); + } + + bool is_write(uintptr_t address) override { + return (address >= DEBUG_BASE) && (address < DEBUG_BASE + DEBUG_SIZE); + } + + bool can_read(uintptr_t address) override { return true; } + + bool can_write(uintptr_t address) override { return true; } + + int32_t read(uintptr_t address) override { + return 0; // Unimplemented + } + + void write(uintptr_t address, int32_t data) override { + return; // Unimplemented + } + + // M AXI + bool pending_write(uintptr_t *address, int32_t *data, uint8_t *wmask) { + return false; + } + + bool pending_read(uintptr_t *address) { return 0; } + + void return_write() { bsg_pr_dbg_pl(" bsg_zynq_pl: debug return\n"); } + + void return_read(int32_t data) { /* Unimp */ + } + + // USER Functions +}; + +// PLIC +// TODO: +#define PLIC_BASE 0x0000000 +#define PLIC_SIZE 0x0100000 +#define PLIC_INTERRUPT_ADDRESS 0x000000 +// This is a super pared down PLIC that doesn't have a sense of priorities, just +// interrupts +class zynq_plic : public s_axil_device, m_axil_device { + bool level = false; + bool raised = false; + + public: + zynq_plic() { bsg_pr_err("plic co-simulation not yet implemented!"); } + + // S AXI + bool is_read(uintptr_t address) override { + return (address >= PLIC_BASE) && (address < PLIC_BASE + PLIC_SIZE); + } + + bool is_write(uintptr_t address) override { + return (address >= PLIC_BASE) && (address < PLIC_BASE + PLIC_SIZE); + } + + bool can_read(uintptr_t address) override { return true; } + + bool can_write(uintptr_t address) override { return true; } + + int32_t read(uintptr_t address) override { + return 0; // Unimplemented + } + + void write(uintptr_t address, int32_t data) override { + return; // Unimplemented + } + + // M AXI + bool pending_write(uintptr_t *address, int32_t *data, uint8_t *wmask) { + if (raised) { + *address = PLIC_INTERRUPT_ADDRESS; + *data = 1; + *wmask = 0xf; + bsg_pr_dbg_pl(" bsg_zynq_pl: plic_irq send\n"); + + raised = false; + + return true; + } + + return true; + } + + bool pending_read(uintptr_t *address) { return 0; } + + void return_write() { bsg_pr_dbg_pl(" bsg_zynq_pl: plic return\n"); } + + void return_read(int32_t data) { /* Unimp */ + } + + // USER Functions + + // Returns previous irq state + bool set_irq(bool val) { + bool temp = level; + + level = val; + raised = level ^ temp; + + return temp; + } +}; + +// DMA +// TODO: +#define DMA_BASE 0x0000000 +#define DMA_SIZE 0x0100000 +#define DMA_INTERRUPT_ADDRESS 0x000000 +// Compile with DMI DPI +class zynq_dma : public s_axil_device, m_axil_device { + public: + zynq_dma() { bsg_pr_err("dma co-simulation not yet implemented!"); } + + // S AXI + bool is_read(uintptr_t address) override { + return (address >= DMA_BASE) && (address < DMA_BASE + DMA_SIZE); + } + + bool is_write(uintptr_t address) override { + return (address >= DMA_BASE) && (address < DMA_BASE + DMA_SIZE); + } + + bool can_read(uintptr_t address) override { return true; } + + bool can_write(uintptr_t address) override { return true; } + + int32_t read(uintptr_t address) override { + return 0; // Unimplemented + } + + void write(uintptr_t address, int32_t data) override { + return; // Unimplemented + } + + // M AXI + bool pending_write(uintptr_t *address, int32_t *data, uint8_t *wmask) { + return false; + } + + bool pending_read(uintptr_t *address) { return 0; } + + void return_write() { bsg_pr_dbg_pl(" bsg_zynq_pl: debug return\n"); } + + void return_read(int32_t data) { /* Unimp */ + } + + // USER Functions +}; #endif diff --git a/cosim/include/common/bsg_pin.h b/cosim/include/common/bsg_pin.h index 678977df..c17f5be0 100644 --- a/cosim/include/common/bsg_pin.h +++ b/cosim/include/common/bsg_pin.h @@ -3,25 +3,26 @@ #define BSG_PIN_H #include "bsg_nonsynth_dpi_gpio.hpp" -#include "bsg_printing.h" #include "bsg_pin.h" +#include "bsg_printing.h" #ifndef ZYNQ_AXI_TIMEOUT #define ZYNQ_AXI_TIMEOUT 1000 #endif -extern "C" { int bsg_dpi_time(); } +extern "C" { +int bsg_dpi_time(); +} using namespace std; using namespace bsg_nonsynth_dpi; using namespace boost::coroutines2; using namespace std::placeholders; // W = width of pin -template -class pin { +template class pin { std::unique_ptr> gpio; -public: + public: pin(const string &hierarchy) { gpio = std::make_unique>(hierarchy); } @@ -49,4 +50,3 @@ class pin { }; #endif - diff --git a/cosim/include/common/bsg_printing.h b/cosim/include/common/bsg_printing.h index fb496283..4ae69937 100644 --- a/cosim/include/common/bsg_printing.h +++ b/cosim/include/common/bsg_printing.h @@ -37,11 +37,11 @@ #define _BSG_PRINTING_H #ifdef __cplusplus -#include #include +#include #else -#include #include +#include #endif #include @@ -57,8 +57,8 @@ extern "C" { #if defined(ZYNQ_PL_DEBUG) #define bsg_pr_dbg_pl(fmt, ...) \ - do { \ - bsg_pr_info(BSG_PRINT_PREFIX_DEBUG_PL fmt, ##__VA_ARGS__); \ + do { \ + bsg_pr_info(BSG_PRINT_PREFIX_DEBUG_PL fmt, ##__VA_ARGS__); \ } while (0) #else #define bsg_pr_dbg_pl(...) @@ -66,26 +66,26 @@ extern "C" { #if defined(ZYNQ_PS_DEBUG) #define bsg_pr_dbg_ps(fmt, ...) \ - do { \ - printf(BSG_PRINT_PREFIX_DEBUG_PS fmt, ##__VA_ARGS__); \ + do { \ + printf(BSG_PRINT_PREFIX_DEBUG_PS fmt, ##__VA_ARGS__); \ } while (0) #else #define bsg_pr_dbg_ps(...) #endif #define bsg_pr_err(fmt, ...) \ - do { \ - printf(BSG_PRINT_PREFIX_ERROR fmt, ##__VA_ARGS__); \ + do { \ + printf(BSG_PRINT_PREFIX_ERROR fmt, ##__VA_ARGS__); \ } while (0) #define bsg_pr_warn(fmt, ...) \ - do { \ - printf(BSG_PRINT_PREFIX_WARN fmt, ##__VA_ARGS__); \ + do { \ + printf(BSG_PRINT_PREFIX_WARN fmt, ##__VA_ARGS__); \ } while (0) #define bsg_pr_info(fmt, ...) \ - do { \ - printf(BSG_PRINT_PREFIX_INFO fmt, ##__VA_ARGS__); \ + do { \ + printf(BSG_PRINT_PREFIX_INFO fmt, ##__VA_ARGS__); \ } while (0) #if defined(__cplusplus) diff --git a/cosim/include/common/bsg_tag_bitbang.h b/cosim/include/common/bsg_tag_bitbang.h index 00674515..89bb2f0c 100644 --- a/cosim/include/common/bsg_tag_bitbang.h +++ b/cosim/include/common/bsg_tag_bitbang.h @@ -8,14 +8,15 @@ struct bsg_tag_client { int nodeID; int width; - bsg_tag_client(int nodeID, int width) : nodeID(nodeID), width(width) { } + bsg_tag_client(int nodeID, int width) : nodeID(nodeID), width(width) {} }; class bsg_tag_bitbang { static int safe_clog2(int x) { int ret = 1; - int val = x*2-1; - while ((val >>= 1) > 1) ret++; + int val = x * 2 - 1; + while ((val >>= 1) > 1) + ret++; return ret; } @@ -25,38 +26,39 @@ class bsg_tag_bitbang { uintptr_t shell_addr; // low-level bit manipulation function - void write_bit(int bit) { - zpl->shell_write(shell_addr, (bit&1), 0xf); - } + void write_bit(int bit) { zpl->shell_write(shell_addr, (bit & 1), 0xf); } // low-level tag interaction void write_client(int nodeID, int data_not_reset, int width, int payload) { // start bit write_bit(1); // payload len - for(int i = 0; i < max_len; i++) { + for (int i = 0; i < max_len; i++) { write_bit((width >> i) & 0x1); } // data_not_reset write_bit(data_not_reset); // nodeID - for(int i = 0; i < id_len; i++) { + for (int i = 0; i < id_len; i++) { write_bit((nodeID >> i) & 0x1); } // payload - for(int i = 0; i < width; i++) { + for (int i = 0; i < width; i++) { write_bit((payload >> i) & 0x1); } // end bit write_bit(0x0); } - public: + public: // Construct a bitbang tag client - bsg_tag_bitbang(bsg_zynq_pl *zpl, uintptr_t shell_addr, int num_clients, int max_len) - : zpl(zpl), shell_addr(shell_addr), id_len(safe_clog2(num_clients)), max_len(max_len) { - bsg_pr_info("Creating Bitbang Driver: %p %" PRIxPTR " %d %d\n", zpl, shell_addr, num_clients, max_len); - } + bsg_tag_bitbang(bsg_zynq_pl *zpl, uintptr_t shell_addr, int num_clients, + int max_len) + : zpl(zpl), shell_addr(shell_addr), id_len(safe_clog2(num_clients)), + max_len(max_len) { + bsg_pr_info("Creating Bitbang Driver: %p %" PRIxPTR " %d %d\n", zpl, + shell_addr, num_clients, max_len); + } // Set a specific bsg tag client void set_client(bsg_tag_client *client, int payload) { @@ -75,7 +77,7 @@ class bsg_tag_bitbang { bsg_pr_info("Reset Tag Master\n"); write_bit(1); // Make sure we get enough cycles for tag master to initialize itself - for(int i = 0; i < 100; i++) { + for (int i = 0; i < 100; i++) { write_bit(0); } } diff --git a/cosim/include/common/bsg_zynq_pl_hardware.h b/cosim/include/common/bsg_zynq_pl_hardware.h index ecc11747..bed1b149 100644 --- a/cosim/include/common/bsg_zynq_pl_hardware.h +++ b/cosim/include/common/bsg_zynq_pl_hardware.h @@ -2,20 +2,20 @@ #ifndef BSG_ZYNQ_PL_HARDWARE_H #define BSG_ZYNQ_PL_HARDWARE_H -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include +#include +#include #include +#include #include +#include +#include +#include +#include +#include +#include #ifdef UART_ENABLE #define termios asmtermios #include @@ -29,7 +29,7 @@ using namespace std; class bsg_zynq_pl_hardware { -public: + public: virtual void start(void) = 0; virtual void stop(void) = 0; virtual void tick(void) = 0; @@ -38,7 +38,7 @@ class bsg_zynq_pl_hardware { unsigned long *physical_ptr) = 0; virtual void free_dram(void *virtual_ptr) = 0; -protected: + protected: int serial_port; uintptr_t gp0_base_offset = 0; uintptr_t gp1_base_offset = 0; @@ -142,15 +142,15 @@ class bsg_zynq_pl_hardware { volatile int32_t *ptr32 = axil_get_ptr32(address); int32_t data = *ptr32; bsg_pr_dbg_pl(" bsg_zynq_pl: AXI reading [%" PRIxPTR "]->%8.8x\n", - address, data); + address, data); return data; } inline void axil_write(uintptr_t address, int32_t data, uint8_t wstrb) { bsg_pr_dbg_pl(" bsg_zynq_pl: AXI writing [%" PRIxPTR - "]=%8.8x mask %" PRIu8 "\n", - address, data, wstrb); + "]=%8.8x mask %" PRIu8 "\n", + address, data, wstrb); // for now we don't support alternate write strobes assert(wstrb == 0XF || wstrb == 0x3 || wstrb == 0x1); @@ -165,7 +165,7 @@ class bsg_zynq_pl_hardware { volatile int8_t *ptr8 = axil_get_ptr8(address); *ptr8 = data; } else { - assert(false); // Illegal write strobe + assert(false); // Illegal write strobe } } #endif @@ -222,7 +222,7 @@ class bsg_zynq_pl_hardware { } #endif -public: + public: virtual int32_t shell_read(uintptr_t addr) = 0; virtual void shell_write(uintptr_t addr, int32_t data, uint8_t wmask) = 0; @@ -244,4 +244,3 @@ class bsg_zynq_pl_hardware { }; #endif - diff --git a/cosim/include/common/bsg_zynq_pl_simulation.h b/cosim/include/common/bsg_zynq_pl_simulation.h index 724097db..fa682a06 100644 --- a/cosim/include/common/bsg_zynq_pl_simulation.h +++ b/cosim/include/common/bsg_zynq_pl_simulation.h @@ -6,15 +6,14 @@ #define BSG_ZYNQ_PL_SIMULATION_H #include -#include -#include -#include #include #include #include +#include +#include +#include #include #include -#include #include #include @@ -22,9 +21,9 @@ #include "bsg_argparse.h" #include "bsg_axil.h" #include "bsg_axis.h" -#include "bsg_printing.h" #include "bsg_nonsynth_dpi_gpio.hpp" #include "bsg_peripherals.h" +#include "bsg_printing.h" #include "zynq_headers.h" using namespace std; @@ -32,20 +31,18 @@ using namespace bsg_nonsynth_dpi; using namespace boost::coroutines2; using namespace std::placeholders; - // Copy this to C++14 so we don't have to upgrade // https://stackoverflow.com/questions/3424962/where-is-erase-if // for std::vector namespace std { - template - void erase_if(vector& c, Predicate pred) { - c.erase(remove_if(c.begin(), c.end(), pred), c.end()); - } +template +void erase_if(vector &c, Predicate pred) { + c.erase(remove_if(c.begin(), c.end(), pred), c.end()); } - +} // namespace std class bsg_zynq_pl_simulation { -public: + public: virtual void start(void) { create_peripherals(); } virtual void stop(void) { destroy_peripherals(); } virtual void tick(void) = 0; @@ -54,7 +51,7 @@ class bsg_zynq_pl_simulation { unsigned long *physical_ptr) = 0; virtual void free_dram(void *virtual_ptr) = 0; -protected: + protected: std::unique_ptr> axi_gp0; std::unique_ptr> axi_gp1; std::unique_ptr> axi_gp2; @@ -72,102 +69,106 @@ class bsg_zynq_pl_simulation { std::unique_ptr scratchpad; std::unique_ptr watchdog; std::unique_ptr buffer; + std::unique_ptr debug; + std::unique_ptr plic; + std::unique_ptr dma; + std::function shell_read_fn; + std::function shell_write_fn; std::vector> co_list; void init() { #ifdef GP0_ENABLE axi_gp0 = std::make_unique>( STRINGIFY(GP0_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_gp0->reset(yield); - })); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_gp0->reset(yield); })); #endif #ifdef GP1_ENABLE axi_gp1 = std::make_unique>( STRINGIFY(GP1_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_gp1->reset(yield); - })); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_gp1->reset(yield); })); #endif #ifdef GP2_ENABLE axi_gp2 = std::make_unique>( STRINGIFY(GP2_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_gp2->reset(yield); - })); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_gp2->reset(yield); })); #endif #ifdef HP0_ENABLE +// TODO: Make AXI MEM a peripheral #ifndef AXI_MEM_ENABLE axi_hp0 = std::make_unique>( STRINGIFY(HP0_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_hp0->reset(yield); - })); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_hp0->reset(yield); })); #endif #endif #ifdef HP1_ENABLE axi_hp1 = std::make_unique>( STRINGIFY(HP1_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_hp1->reset(yield); - })); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_hp1->reset(yield); })); #endif #ifdef HP2_ENABLE axi_hp2 = std::make_unique>( STRINGIFY(HP2_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_hp2->reset(yield); - })); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_hp2->reset(yield); })); #endif #ifdef SP0_ENABLE - axi_sp0 = std::make_unique>( - STRINGIFY(SP0_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_sp0->reset(yield); - })); + axi_sp0 = + std::make_unique>(STRINGIFY(SP0_HIER_BASE)); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_sp0->reset(yield); })); #endif #ifdef SP1_ENABLE - axi_sp1 = std::make_unique>( - STRINGIFY(SP1_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_sp1->reset(yield); - })); + axi_sp1 = + std::make_unique>(STRINGIFY(SP1_HIER_BASE)); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_sp1->reset(yield); })); #endif #ifdef SP2_ENABLE - axi_sp2 = std::make_unique>( - STRINGIFY(SP2_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_sp2->reset(yield); - })); + axi_sp2 = + std::make_unique>(STRINGIFY(SP2_HIER_BASE)); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_sp2->reset(yield); })); #endif #ifdef MP0_ENABLE - axi_mp0 = std::make_unique>( - STRINGIFY(MP0_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_mp0->reset(yield); - })); + axi_mp0 = + std::make_unique>(STRINGIFY(MP0_HIER_BASE)); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_mp0->reset(yield); })); #endif #ifdef MP1_ENABLE - axi_mp1 = std::make_unique>( - STRINGIFY(MP1_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_mp1->reset(yield); - })); + axi_mp1 = + std::make_unique>(STRINGIFY(MP1_HIER_BASE)); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_mp1->reset(yield); })); #endif #ifdef MP2_ENABLE - axi_mp2 = std::make_unique>( - STRINGIFY(MP2_HIER_BASE)); - co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_mp2->reset(yield); - })); + axi_mp2 = + std::make_unique>(STRINGIFY(MP2_HIER_BASE)); + co_list.push_back(std::make_unique( + [=](yield_t &yield) { axi_mp2->reset(yield); })); +#endif +#ifdef HOST_ZYNQ + shell_read_fn = std::bind(&bsg_zynq_pl_simulation::shell_read_axil, + this, std::placeholders::_1); + shell_write_fn = + std::bind(&bsg_zynq_pl_simulation::shell_write_axil, this, + std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3); +#else +#error #endif // Do the reset while (co_list.size() > 0) { next(); } } - + void create_peripherals() { #ifdef SCRATCHPAD_ENABLE scratchpad = std::make_unique(); @@ -180,6 +181,15 @@ class bsg_zynq_pl_simulation { #endif #ifdef BUFFER_ENABLE buffer = std::make_unique(); +#endif +#ifdef DEBUG_ENABLE + debug = std::make_unique(); +#endif +#ifdef PLIC_ENABLE + plic = std::make_unique(); +#endif +#ifdef DMA_ENABLE + dma = std::make_unique(); #endif } @@ -188,6 +198,9 @@ class bsg_zynq_pl_simulation { watchdog.reset(); uart.reset(); buffer.reset(); + debug.reset(); + plic.reset(); + dma.reset(); } void next() { @@ -206,8 +219,9 @@ class bsg_zynq_pl_simulation { int32_t data; uint8_t wstrb; uint8_t last; -#ifdef HP1_ENABLE - if (!axi_hp1->axil_has_read(&addr)) { + + if (!axi_hp1.get() || !axi_hp1->axil_has_read(&addr)) { + } else if (scratchpad.get() && scratchpad->is_read(addr)) { if (scratchpad->can_read(addr)) { co_list.push_back(std::make_unique([=](yield_t &yield) { @@ -218,7 +232,29 @@ class bsg_zynq_pl_simulation { } else if (uart.get() && uart->is_read(addr)) { if (uart->can_read(addr)) { co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_hp1->axil_read_helper((s_axil_device *)uart.get(), yield); + axi_hp1->axil_read_helper((s_axil_device *)uart.get(), + yield); + })); + } + } else if (debug.get() && debug->is_read(addr)) { + if (debug->can_read(addr)) { + co_list.push_back(std::make_unique([=](yield_t &yield) { + axi_hp1->axil_read_helper((s_axil_device *)debug.get(), + yield); + })); + } + } else if (plic.get() && plic->is_read(addr)) { + if (plic->can_read(addr)) { + co_list.push_back(std::make_unique([=](yield_t &yield) { + axi_hp1->axil_read_helper((s_axil_device *)plic.get(), + yield); + })); + } + } else if (dma.get() && dma->is_read(addr)) { + if (dma->can_read(addr)) { + co_list.push_back(std::make_unique([=](yield_t &yield) { + axi_hp1->axil_read_helper((s_axil_device *)dma.get(), + yield); })); } } else { @@ -226,39 +262,61 @@ class bsg_zynq_pl_simulation { addr); } - if (!axi_hp1->axil_has_write(&addr)) { + if (!axi_hp1.get() || !axi_hp1->axil_has_write(&addr)) { + } else if (scratchpad && scratchpad->is_write(addr)) { if (scratchpad->can_write(addr)) { co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_hp1->axil_write_helper((s_axil_device *)scratchpad.get(), - yield); + axi_hp1->axil_write_helper( + (s_axil_device *)scratchpad.get(), yield); })); } } else if (uart.get() && uart->is_write(addr)) { if (uart->can_write(addr)) { co_list.push_back(std::make_unique([=](yield_t &yield) { - axi_hp1->axil_write_helper((s_axil_device *)uart.get(), yield); + axi_hp1->axil_write_helper((s_axil_device *)uart.get(), + yield); + })); + } + } else if (debug.get() && debug->is_write(addr)) { + if (debug->can_write(addr)) { + co_list.push_back(std::make_unique([=](yield_t &yield) { + axi_hp1->axil_write_helper((s_axil_device *)debug.get(), + yield); + })); + } + } else if (plic.get() && plic->is_write(addr)) { + if (plic->can_write(addr)) { + co_list.push_back(std::make_unique([=](yield_t &yield) { + axi_hp1->axil_write_helper((s_axil_device *)plic.get(), + yield); + })); + } + } else if (dma.get() && dma->is_write(addr)) { + if (dma->can_write(addr)) { + co_list.push_back(std::make_unique([=](yield_t &yield) { + axi_hp1->axil_write_helper((s_axil_device *)dma.get(), + yield); })); } } else { bsg_pr_err(" bsg_zynq_pl: Unsupported AXI device write at [%x]\n", addr); } -#endif -#ifdef MP0_ENABLE - if (!axi_mp0->axis_has_write(&last)) { + + if (!axi_mp0.get() || !axi_mp0->axis_has_write(&last)) { + } else if (buffer.get()) { if (buffer->can_write(last)) { co_list.push_back(std::make_unique([=](yield_t &yield) { axi_mp0->axis_write_helper((s_axis_device *)buffer.get(), - yield); + yield); })); } } else { bsg_pr_err(" bsg_zynq_pl: Unsupported AXI device write at [%x]\n", addr); } -#endif } void pollm_helper() { @@ -266,24 +324,32 @@ class bsg_zynq_pl_simulation { int32_t data; uint8_t wstrb; uint8_t last; -#if GP2_ENABLE - if (watchdog.get() && watchdog->pending_write(&addr, &data, &wstrb)) { + if (!axi_gp2.get()) { + } else if (watchdog.get() && + watchdog->pending_write(&addr, &data, &wstrb)) { axil_write(2, addr, data, wstrb, [=]() { watchdog->return_write(); }); } else if (watchdog.get() && watchdog->pending_read(&addr)) { axil_read(2, addr, [=](int32_t rdata) { watchdog->return_read(rdata); }); + } else if (debug.get() && debug->pending_write(&addr, &data, &wstrb)) { + axil_write(2, addr, data, wstrb, [=]() { debug->return_write(); }); + } else if (debug.get() && debug->pending_read(&addr)) { + axil_read(2, addr, + [=](int32_t rdata) { debug->return_read(rdata); }); + } else if (debug.get() && debug->pending_write(&addr, &data, &wstrb)) { + axil_write(2, addr, data, wstrb, [=]() { debug->return_write(); }); + } else if (debug.get() && debug->pending_read(&addr)) { + axil_read(2, addr, + [=](int32_t rdata) { debug->return_read(rdata); }); } -#endif -#ifdef MP0_ENABLE - if (buffer.get() && buffer->pending_write(&data, &last)) { - axis_write(0, data, last, [=]() { }); + if (!axi_sp0.get()) { + } else if (buffer.get() && buffer->pending_write(&data, &last)) { + axis_write(0, data, last, [=]() {}); } -#endif } -#ifdef AXIL_ENABLE void axil_read(int port, uintptr_t addr, std::function callback) { if (port == 2) { @@ -323,9 +389,9 @@ class bsg_zynq_pl_simulation { })); } } -#endif -#ifdef AXIS_ENABLE - void axis_write(int port, int32_t data, uint8_t last, std::function callback) { + + void axis_write(int port, int32_t data, uint8_t last, + std::function callback) { if (port == 2) { co_list.push_back(std::make_unique([=](yield_t &yield) { axi_sp2->axis_write_helper(data, last, yield); @@ -343,9 +409,7 @@ class bsg_zynq_pl_simulation { })); } } -#endif -#ifdef UART_ENABLE // Must sync to verilog // typedef struct packed // { @@ -407,102 +471,127 @@ class bsg_zynq_pl_simulation { callback(data); })); } -#endif -public: - virtual void shell_write(uintptr_t addr, int32_t data, uint8_t wstrb) { - int port; + + int32_t shell_read_axil(uintptr_t addr) { + int port = 0; + bool done = false; + int32_t rdata; + auto f_call = [&](int32_t x) { + rdata = x; + done = true; + }; // we subtract the bases to make it consistent with the Zynq AXI IPI // implementation - if (0) { -#ifdef GP0_ENABLE - } else if (addr >= GP0_ADDR_BASE && + if (axi_gp0.get() && addr >= GP0_ADDR_BASE && addr <= GP0_ADDR_BASE + GP0_ADDR_SIZE_BYTES) { port = 0; addr = addr - GP0_ADDR_BASE; -#endif -#ifdef GP1_ENABLE - } else if (addr >= GP1_ADDR_BASE && + } else if (axi_gp1.get() && addr >= GP1_ADDR_BASE && addr <= GP1_ADDR_BASE + GP1_ADDR_SIZE_BYTES) { port = 1; addr = addr - GP1_ADDR_BASE; -#endif } else { bsg_pr_err(" bsg_zynq_pl: unsupported AXIL address: %x\n", addr); - return; + return -1; } + axil_read(port, addr, f_call); + + do { + next(); + } while (!done); + + bsg_pr_dbg_pl(" bsg_zynq_pl: AXI reading port %d [%x] -> %8.8x\n", + port, addr, rdata); + + return rdata; + } + + int32_t shell_read_uart(uintptr_t addr) { + int port = 0; bool done = false; - auto f_call = [&]() { done = true; }; -#ifdef HOST_ZYNQ - axil_write(port, addr, data, wstrb, f_call); -#else - uart_write(port, addr, data, wstrb, f_call); -#endif + int32_t rdata; + auto f_call = [&](int32_t x) { + rdata = x; + done = true; + }; + + uart_read(port, addr, f_call); do { next(); } while (!done); - bsg_pr_dbg_pl(" bsg_zynq_pl: AXI writing port %d, [%x]<-%8.8x\n", port, - addr, data); + bsg_pr_dbg_pl(" bsg_zynq_pl: UART reading port %d [%x] -> %8.8x\n", + port, addr, rdata); - return; + return rdata; } - virtual int32_t shell_read(uintptr_t addr) { - int port; + void shell_write_axil(uintptr_t addr, int32_t data, uint8_t wstrb) { + int port = 0; + bool done = false; + auto f_call = [&]() { done = true; }; // we subtract the bases to make it consistent with the Zynq AXI IPI // implementation - if (0) { -#ifdef GP0_ENABLE - } else if (addr >= GP0_ADDR_BASE && + if (axi_gp0.get() && addr >= GP0_ADDR_BASE && addr <= GP0_ADDR_BASE + GP0_ADDR_SIZE_BYTES) { port = 0; addr = addr - GP0_ADDR_BASE; -#endif -#ifdef GP1_ENABLE - } else if (addr >= GP1_ADDR_BASE && + } else if (axi_gp1.get() && addr >= GP1_ADDR_BASE && addr <= GP1_ADDR_BASE + GP1_ADDR_SIZE_BYTES) { port = 1; addr = addr - GP1_ADDR_BASE; -#endif } else { bsg_pr_err(" bsg_zynq_pl: unsupported AXIL address: %x\n", addr); - return -1; + return; } + axil_write(port, addr, data, wstrb, f_call); + do { + next(); + } while (!done); + + bsg_pr_dbg_pl(" bsg_zynq_pl: AXI writing port %d, [%x]<-%8.8x\n", port, + addr, data); + + return; + } + + void shell_write_uart(uintptr_t addr, int32_t data, uint8_t wstrb) { + int port = 0; bool done = false; - int32_t rdata; - auto f_call = [&](int32_t x) { - rdata = x; - done = true; - }; -#ifdef HOST_ZYNQ - axil_read(port, addr, f_call); -#else - uart_read(port, addr, f_call); -#endif + auto f_call = [&]() { done = true; }; + + uart_write(port, addr, data, wstrb, f_call); do { next(); } while (!done); - bsg_pr_dbg_pl(" bsg_zynq_pl: AXI reading port %d [%x] -> %8.8x\n", - port, addr, rdata); + bsg_pr_dbg_pl(" bsg_zynq_pl: AXI writing port %d, [%x]<-%8.8x\n", port, + addr, data); - return rdata; + return; + } + + public: + void shell_write(uintptr_t addr, int32_t data, uint8_t wstrb) { + shell_write_fn(addr, data, wstrb); } - virtual void shell_read4(uintptr_t addr, int32_t *data0, int32_t *data1, - int32_t *data2, int32_t *data3) { + int32_t shell_read(uintptr_t addr) { return shell_read_fn(addr); } + + void shell_read4(uintptr_t addr, int32_t *data0, int32_t *data1, + int32_t *data2, int32_t *data3) { *data0 = shell_read(addr + 0); *data1 = shell_read(addr + 4); *data2 = shell_read(addr + 8); *data3 = shell_read(addr + 12); } - virtual void shell_write4(uintptr_t addr, int32_t data0, int32_t data1, - int32_t data2, int32_t data3) { + void shell_write4(uintptr_t addr, int32_t data0, int32_t data1, + int32_t data2, int32_t data3) { shell_write(addr + 0, data0, 0xf); shell_write(addr + 4, data1, 0xf); shell_write(addr + 8, data2, 0xf); @@ -511,4 +600,3 @@ class bsg_zynq_pl_simulation { }; #endif - diff --git a/cosim/include/common/zynq_headers.h b/cosim/include/common/zynq_headers.h index de8b072a..d9e69b7c 100644 --- a/cosim/include/common/zynq_headers.h +++ b/cosim/include/common/zynq_headers.h @@ -46,12 +46,12 @@ typedef uint32_t uint32x4_t[4]; #endif #define GP0_ADDR_SIZE_BYTES (1ULL << GP0_ADDR_WIDTH) #endif -static uintptr_t gp0_addr_size_bytes = (uintptr_t) GP0_ADDR_SIZE_BYTES; +static uintptr_t gp0_addr_size_bytes = (uintptr_t)GP0_ADDR_SIZE_BYTES; #ifndef GP0_ADDR_BASE #error GP0_ADDR_BASE must be defined #endif -static uintptr_t gp0_addr_base = (uintptr_t) GP0_ADDR_BASE; +static uintptr_t gp0_addr_base = (uintptr_t)GP0_ADDR_BASE; #ifndef GP0_DATA_WIDTH #error GP0_DATA_WIDTH must be defined @@ -76,12 +76,12 @@ static uintptr_t gp0_addr_base = (uintptr_t) GP0_ADDR_BASE; #endif #define GP1_ADDR_SIZE_BYTES (1ULL << GP1_ADDR_WIDTH) #endif -static uintptr_t gp1_addr_size_bytes = (uintptr_t) GP1_ADDR_SIZE_BYTES; +static uintptr_t gp1_addr_size_bytes = (uintptr_t)GP1_ADDR_SIZE_BYTES; #ifndef GP1_ADDR_BASE #error GP1_ADDR_BASE must be defined #endif -static uintptr_t gp1_addr_base = (uintptr_t) GP1_ADDR_BASE; +static uintptr_t gp1_addr_base = (uintptr_t)GP1_ADDR_BASE; #ifndef GP1_DATA_WIDTH #error GP1_DATA_WIDTH must be defined @@ -118,12 +118,12 @@ static uintptr_t gp1_addr_base = (uintptr_t) GP1_ADDR_BASE; #endif #define GP2_ADDR_SIZE_BYTES (1ULL << GP2_ADDR_WIDTH) #endif -static uintptr_t gp2_addr_size_bytes = (uintptr_t) GP2_ADDR_SIZE_BYTES; +static uintptr_t gp2_addr_size_bytes = (uintptr_t)GP2_ADDR_SIZE_BYTES; #ifndef GP2_ADDR_BASE #error GP2_ADDR_BASE must be defined #endif -static uintptr_t gp2_addr_base = (uintptr_t) GP2_ADDR_BASE; +static uintptr_t gp2_addr_base = (uintptr_t)GP2_ADDR_BASE; #ifndef GP2_DATA_WIDTH #error GP2_DATA_WIDTH must be defined @@ -136,7 +136,7 @@ static uintptr_t gp2_addr_base = (uintptr_t) GP2_ADDR_BASE; #endif #ifndef HP0_ENABLE -#define HP0_ADDR_WIDTH 0 +#define HP0_ADDR_WIDTH 0 #define HP0_DATA_WIDTH 0 #define HP0_ADDR_BASE 0 #define HP0_HIER_BASE "" @@ -148,12 +148,12 @@ static uintptr_t gp2_addr_base = (uintptr_t) GP2_ADDR_BASE; #endif #define HP0_ADDR_SIZE_BYTES (1ULL << HP0_ADDR_WIDTH) #endif -static uintptr_t hp0_addr_size_bytes = (uintptr_t) HP0_ADDR_SIZE_BYTES; +static uintptr_t hp0_addr_size_bytes = (uintptr_t)HP0_ADDR_SIZE_BYTES; #ifndef HP0_ADDR_BASE #error HP0_ADDR_BASE must be defined #endif -static uintptr_t hp0_addr_base = (uintptr_t) HP0_ADDR_BASE; +static uintptr_t hp0_addr_base = (uintptr_t)HP0_ADDR_BASE; #ifndef HP0_DATA_WIDTH #error HP0_DATA_WIDTH must be defined @@ -180,12 +180,12 @@ static uintptr_t hp0_addr_base = (uintptr_t) HP0_ADDR_BASE; #endif #define HP1_ADDR_SIZE_BYTES (1ULL << HP1_ADDR_WIDTH) #endif -static uintptr_t hp1_addr_size_bytes = (uintptr_t) HP1_ADDR_SIZE_BYTES; +static uintptr_t hp1_addr_size_bytes = (uintptr_t)HP1_ADDR_SIZE_BYTES; #ifndef HP1_ADDR_BASE #error HP1_ADDR_BASE must be defined #endif -static uintptr_t hp1_addr_base = (uintptr_t) HP1_ADDR_BASE; +static uintptr_t hp1_addr_base = (uintptr_t)HP1_ADDR_BASE; #ifndef HP1_DATA_WIDTH #error HP1_DATA_WIDTH must be defined @@ -210,12 +210,12 @@ static uintptr_t hp1_addr_base = (uintptr_t) HP1_ADDR_BASE; #endif #define HP2_ADDR_SIZE_BYTES (1ULL << HP2_ADDR_WIDTH) #endif -static uintptr_t hp2_addr_size_bytes = (uintptr_t) HP2_ADDR_SIZE_BYTES; +static uintptr_t hp2_addr_size_bytes = (uintptr_t)HP2_ADDR_SIZE_BYTES; #ifndef HP2_ADDR_BASE #error HP2_ADDR_BASE must be defined #endif -static uintptr_t hp2_addr_base = (uintptr_t) HP2_ADDR_BASE; +static uintptr_t hp2_addr_base = (uintptr_t)HP2_ADDR_BASE; #ifndef HP2_DATA_WIDTH #error HP2_DATA_WIDTH must be defined @@ -330,7 +330,7 @@ static uintptr_t hp2_addr_base = (uintptr_t) HP2_ADDR_BASE; #endif #ifndef UART_ENABLE -#define UART_DEV /dev/null +#define UART_DEV / dev / null #define UART_DEV_STR "" #define UART_BAUD 0 #else @@ -347,11 +347,10 @@ static uintptr_t hp2_addr_base = (uintptr_t) HP2_ADDR_BASE; #endif #else #define UART_DEV_STR STRINGIFY(UART_DEV) -#define PPCAT_NX(A, B) A ## B +#define PPCAT_NX(A, B) A##B #define PPCAT(A, B) PPCAT_NX(A, B) #define UART_BAUD_ENUM PPCAT(B, UART_BAUD) #endif #endif #endif - diff --git a/cosim/include/vcs/bsg_zynq_pl.h b/cosim/include/vcs/bsg_zynq_pl.h index 490dd4ed..8c7997c4 100644 --- a/cosim/include/vcs/bsg_zynq_pl.h +++ b/cosim/include/vcs/bsg_zynq_pl.h @@ -6,51 +6,52 @@ #define BSG_ZYNQ_PL_H #include -#include -#include -#include #include #include #include +#include +#include +#include #include #include -#include #include #include "bsg_zynq_pl_simulation.h" -extern "C" { void bsg_dpi_next(); } -extern "C" { int bsg_dpi_time(); } +extern "C" { +void bsg_dpi_next(); +} +extern "C" { +int bsg_dpi_time(); +} class bsg_zynq_pl : public bsg_zynq_pl_simulation { - public: - bsg_zynq_pl(int argc, char *argv[]) { - tick(); - init(); - } + public: + bsg_zynq_pl(int argc, char *argv[]) { + tick(); + init(); + } - ~bsg_zynq_pl(void) { } + ~bsg_zynq_pl(void) {} - void tick(void) override { - bsg_dpi_next(); - } + void tick(void) override { bsg_dpi_next(); } - void done(void) override { - bsg_pr_info(" bsg_zynq_pl: done() called, exiting\n"); - } + void done(void) override { + bsg_pr_info(" bsg_zynq_pl: done() called, exiting\n"); + } - void *allocate_dram(unsigned long len_in_bytes, unsigned long *physical_ptr) override { - bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); - void *virtual_ptr = (unsigned long *)malloc(len_in_bytes); - *physical_ptr = (unsigned long)virtual_ptr; + void *allocate_dram(unsigned long len_in_bytes, + unsigned long *physical_ptr) override { + bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); + void *virtual_ptr = (unsigned long *)malloc(len_in_bytes); + *physical_ptr = (unsigned long)virtual_ptr; - return virtual_ptr; - } + return virtual_ptr; + } - void free_dram(void *virtual_ptr) override { - printf("bsg_zynq_pl: Freeing dummy DRAM\n"); - } + void free_dram(void *virtual_ptr) override { + printf("bsg_zynq_pl: Freeing dummy DRAM\n"); + } }; #endif - diff --git a/cosim/include/verilator/bsg_zynq_pl.h b/cosim/include/verilator/bsg_zynq_pl.h index 3b6ae047..1644b6a2 100644 --- a/cosim/include/verilator/bsg_zynq_pl.h +++ b/cosim/include/verilator/bsg_zynq_pl.h @@ -5,31 +5,33 @@ #ifndef BSG_ZYNQ_PL_H #define BSG_ZYNQ_PL_H -#include -#include -#include -#include -#include #include "bsg_axil.h" -#include "bsg_printing.h" #include "bsg_nonsynth_dpi_clock_gen.hpp" #include "bsg_nonsynth_dpi_gpio.hpp" #include "bsg_peripherals.h" -#include "zynq_headers.h" -#include "verilated_fst_c.h" +#include "bsg_printing.h" #include "verilated_cov.h" +#include "verilated_fst_c.h" +#include "zynq_headers.h" +#include +#include +#include +#include +#include -#include "bsg_zynq_pl_simulation.h" #include "Vbsg_nonsynth_zynq_testbench.h" +#include "bsg_zynq_pl_simulation.h" #include "verilated.h" -extern "C" { int bsg_dpi_time(); } +extern "C" { +int bsg_dpi_time(); +} class bsg_zynq_pl : public bsg_zynq_pl_simulation { Vbsg_nonsynth_zynq_testbench *tb; VerilatedFstC *wf; - public: + public: bsg_zynq_pl(int argc, char *argv[]) { // Initialize Verilators variables Verilated::commandArgs(argc, argv); @@ -47,7 +49,7 @@ class bsg_zynq_pl : public bsg_zynq_pl_simulation { init(); } - ~bsg_zynq_pl(void) { } + ~bsg_zynq_pl(void) {} // Each bsg_timekeeper::next() moves to the next clock edge // so we need 2 to perform one full clock cycle. @@ -68,7 +70,8 @@ class bsg_zynq_pl : public bsg_zynq_pl_simulation { wf->close(); } - void *allocate_dram(unsigned long len_in_bytes, unsigned long *physical_ptr) { + void *allocate_dram(unsigned long len_in_bytes, + unsigned long *physical_ptr) { bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); void *virtual_ptr = (unsigned long *)malloc(len_in_bytes); *physical_ptr = (unsigned long)virtual_ptr; @@ -82,4 +85,3 @@ class bsg_zynq_pl : public bsg_zynq_pl_simulation { }; #endif - diff --git a/cosim/include/xcelium/bsg_zynq_pl.h b/cosim/include/xcelium/bsg_zynq_pl.h index 7f9bef41..21392d50 100644 --- a/cosim/include/xcelium/bsg_zynq_pl.h +++ b/cosim/include/xcelium/bsg_zynq_pl.h @@ -6,51 +6,52 @@ #define BSG_ZYNQ_PL_H #include -#include -#include -#include #include #include #include +#include +#include +#include #include #include -#include #include #include "bsg_zynq_pl_simulation.h" -extern "C" { void bsg_dpi_next(); } -extern "C" { int bsg_dpi_time(); } +extern "C" { +void bsg_dpi_next(); +} +extern "C" { +int bsg_dpi_time(); +} class bsg_zynq_pl : public bsg_zynq_pl_simulation { - public: - bsg_zynq_pl(int argc, char *argv[]) { - tick(); - init(); - } + public: + bsg_zynq_pl(int argc, char *argv[]) { + tick(); + init(); + } - ~bsg_zynq_pl(void) { } + ~bsg_zynq_pl(void) {} - void tick(void) override { - bsg_dpi_next(); - } + void tick(void) override { bsg_dpi_next(); } - void done(void) override { - bsg_pr_info(" bsg_zynq_pl: done() called, exiting\n"); - } + void done(void) override { + bsg_pr_info(" bsg_zynq_pl: done() called, exiting\n"); + } - void *allocate_dram(unsigned long len_in_bytes, unsigned long *physical_ptr) { - bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); - void *virtual_ptr = (unsigned long *)malloc(len_in_bytes); - *physical_ptr = (unsigned long)virtual_ptr; + void *allocate_dram(unsigned long len_in_bytes, + unsigned long *physical_ptr) { + bsg_pr_info(" bsg_zynq_pl: Allocated dummy DRAM\n"); + void *virtual_ptr = (unsigned long *)malloc(len_in_bytes); + *physical_ptr = (unsigned long)virtual_ptr; - return virtual_ptr; - } + return virtual_ptr; + } - void free_dram(void *virtual_ptr) { - printf("bsg_zynq_pl: Freeing dummy DRAM\n"); - } + void free_dram(void *virtual_ptr) { + printf("bsg_zynq_pl: Freeing dummy DRAM\n"); + } }; #endif - diff --git a/cosim/include/zynq/bsg_zynq_pl.h b/cosim/include/zynq/bsg_zynq_pl.h index 7e2e42a8..c9bb0a2c 100644 --- a/cosim/include/zynq/bsg_zynq_pl.h +++ b/cosim/include/zynq/bsg_zynq_pl.h @@ -26,113 +26,108 @@ extern "C" { #include "/usr/include/libxlnk_cma.h" - void _xlnk_reset(); +void _xlnk_reset(); }; -#include -#include -#include -#include -#include -#include -#include +#include "bsg_argparse.h" +#include "bsg_printing.h" +#include "zynq_headers.h" #include -#include -#include -#include #include +#include +#include +#include #include +#include #include -#include "bsg_argparse.h" -#include "bsg_printing.h" -#include "zynq_headers.h" +#include +#include +#include +#include +#include +#include #include "bsg_zynq_pl_hardware.h" using namespace std; class bsg_zynq_pl : public bsg_zynq_pl_hardware { - public: - bsg_zynq_pl(int argc, char *argv[]) { - printf("// bsg_zynq_pl: be sure to run as root\n"); - init(); - } - - ~bsg_zynq_pl(void) { - deinit(); - } - - void tick(void) override { - /* Does nothing on PS */ - } - - void start(void) override { - printf("bsg_zynq_pl: start() called\n"); - } - - void stop(void) override { - printf("bsg_zynq_pl: stop() called\n"); - } - - void done(void) override { - printf("bsg_zynq_pl: done() called, exiting\n"); - } - - // returns virtual pointer, writes physical parameter into arguments - void *allocate_dram(unsigned long len_in_bytes, unsigned long *physical_ptr) override { - - // resets all CMA buffers across system (eek!) - _xlnk_reset(); - - // for now, we do uncacheable to keep things simple, memory accesses go - // straight to DRAM and - // thus would be coherent with the PL - - void *virtual_ptr = - cma_alloc(len_in_bytes, 0); // 1 = cacheable, 0 = uncacheable - assert(virtual_ptr != NULL); - *physical_ptr = cma_get_phy_addr(virtual_ptr); - printf("bsg_zynq_pl: allocate_dram() called with size %ld bytes --> virtual " - "ptr=%p, physical ptr=0x%8.8lx\n", - len_in_bytes, virtual_ptr, *physical_ptr); - return virtual_ptr; - } - - void free_dram(void *virtual_ptr) override { - printf("bsg_zynq_pl: free_dram() called on virtual ptr=%p\n", virtual_ptr); - cma_free(virtual_ptr); - } - - int32_t shell_read(uintptr_t addr) override { - return axil_read(addr); - } - - void shell_write(uintptr_t addr, int32_t data, uint8_t wmask) override { - axil_write(addr, data, wmask); - } + public: + bsg_zynq_pl(int argc, char *argv[]) { + printf("// bsg_zynq_pl: be sure to run as root\n"); + init(); + } + + ~bsg_zynq_pl(void) { deinit(); } + + void tick(void) override { /* Does nothing on PS */ + } + + void start(void) override { printf("bsg_zynq_pl: start() called\n"); } + + void stop(void) override { printf("bsg_zynq_pl: stop() called\n"); } + + void done(void) override { + printf("bsg_zynq_pl: done() called, exiting\n"); + } + + // returns virtual pointer, writes physical parameter into arguments + void *allocate_dram(unsigned long len_in_bytes, + unsigned long *physical_ptr) override { + + // resets all CMA buffers across system (eek!) + _xlnk_reset(); + + // for now, we do uncacheable to keep things simple, memory accesses go + // straight to DRAM and + // thus would be coherent with the PL + + void *virtual_ptr = + cma_alloc(len_in_bytes, 0); // 1 = cacheable, 0 = uncacheable + assert(virtual_ptr != NULL); + *physical_ptr = cma_get_phy_addr(virtual_ptr); + printf("bsg_zynq_pl: allocate_dram() called with size %ld bytes --> " + "virtual " + "ptr=%p, physical ptr=0x%8.8lx\n", + len_in_bytes, virtual_ptr, *physical_ptr); + return virtual_ptr; + } + + void free_dram(void *virtual_ptr) override { + printf("bsg_zynq_pl: free_dram() called on virtual ptr=%p\n", + virtual_ptr); + cma_free(virtual_ptr); + } + + int32_t shell_read(uintptr_t addr) override { return axil_read(addr); } + + void shell_write(uintptr_t addr, int32_t data, uint8_t wmask) override { + axil_write(addr, data, wmask); + } #ifdef NEON - //typedef uint32_t uint32x4_t[4]; - void shell_write4(uintptr_t addr, int32_t data0, int32_t data1, int32_t data2, int32_t data3) override { - volatile uint32x4_t *ptr = (volatile uint32x4_t *) addr; - int32_t sarray[4] = {data0, data1, data2, data3}; - uint32_t *array{reinterpret_cast(sarray)}; - uint32x4_t val = vld1q_u32(array); - - *ptr = val; - } - - void shell_read4(uintptr_t addr, int32_t *data0, int32_t *data1, int32_t *data2, int32_t *data3) override { - volatile uint32x4_t *ptr = (volatile uint32x4_t *) addr; - uint32x4_t val = *ptr; - - *data0 = val[0]; - *data1 = val[1]; - *data2 = val[2]; - *data3 = val[3]; - } + // typedef uint32_t uint32x4_t[4]; + void shell_write4(uintptr_t addr, int32_t data0, int32_t data1, + int32_t data2, int32_t data3) override { + volatile uint32x4_t *ptr = (volatile uint32x4_t *)addr; + int32_t sarray[4] = {data0, data1, data2, data3}; + uint32_t *array{reinterpret_cast(sarray)}; + uint32x4_t val = vld1q_u32(array); + + *ptr = val; + } + + void shell_read4(uintptr_t addr, int32_t *data0, int32_t *data1, + int32_t *data2, int32_t *data3) override { + volatile uint32x4_t *ptr = (volatile uint32x4_t *)addr; + uint32x4_t val = *ptr; + + *data0 = val[0]; + *data1 = val[1]; + *data2 = val[2]; + *data3 = val[3]; + } #endif }; #endif - diff --git a/cosim/manycore-example/ps.cpp b/cosim/manycore-example/ps.cpp index 46a6bfe3..7a71be84 100644 --- a/cosim/manycore-example/ps.cpp +++ b/cosim/manycore-example/ps.cpp @@ -4,21 +4,21 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include +#include #include #include -#include #include +#include +#include +#include #include -#include #include "ps.hpp" -#include "bsg_zynq_pl.h" +#include "bsg_argparse.h" #include "bsg_printing.h" #include "bsg_tag_bitbang.h" -#include "bsg_argparse.h" +#include "bsg_zynq_pl.h" #include "bsg_manycore_machine.h" #include "bsg_manycore_packet.h" @@ -34,210 +34,228 @@ void nbf_load(bsg_zynq_pl *zpl, char *filename); -inline void send_mc_request_packet(bsg_zynq_pl *zpl, hb_mc_request_packet_t *packet) { - int axil_len = sizeof(hb_mc_request_packet_t) / 4; +inline void send_mc_request_packet(bsg_zynq_pl *zpl, + hb_mc_request_packet_t *packet) { + int axil_len = sizeof(hb_mc_request_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_EP_REQ_FIFO_CTR)); - zpl->shell_write(GP0_WR_EP_REQ_FIFO_DATA, pkt_data[i], 0xf); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_EP_REQ_FIFO_CTR)) + ; + zpl->shell_write(GP0_WR_EP_REQ_FIFO_DATA, pkt_data[i], 0xf); + } } -inline void recv_mc_response_packet(bsg_zynq_pl *zpl, hb_mc_response_packet_t *packet) { - int axil_len = sizeof(hb_mc_response_packet_t) / 4; +inline void recv_mc_response_packet(bsg_zynq_pl *zpl, + hb_mc_response_packet_t *packet) { + int axil_len = sizeof(hb_mc_response_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_MC_RSP_FIFO_CTR)); - pkt_data[i] = zpl->shell_read(GP0_RD_MC_RSP_FIFO_DATA); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_MC_RSP_FIFO_CTR)) + ; + pkt_data[i] = zpl->shell_read(GP0_RD_MC_RSP_FIFO_DATA); + } } -inline void recv_mc_request_packet(bsg_zynq_pl *zpl, hb_mc_request_packet_t *packet) { - int axil_len = sizeof(hb_mc_request_packet_t) / 4; +inline void recv_mc_request_packet(bsg_zynq_pl *zpl, + hb_mc_request_packet_t *packet) { + int axil_len = sizeof(hb_mc_request_packet_t) / 4; - uint32_t *pkt_data = reinterpret_cast(packet); - for (int i = 0; i < axil_len; i++) { - while (!zpl->shell_read(GP0_RD_MC_REQ_FIFO_CTR)); - pkt_data[i] = zpl->shell_read(GP0_RD_MC_REQ_FIFO_DATA); - } + uint32_t *pkt_data = reinterpret_cast(packet); + for (int i = 0; i < axil_len; i++) { + while (!zpl->shell_read(GP0_RD_MC_REQ_FIFO_CTR)) + ; + pkt_data[i] = zpl->shell_read(GP0_RD_MC_REQ_FIFO_DATA); + } } -inline void send_mc_write(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, uint32_t epa, int32_t data) { - bsg_pr_dbg_ps("Writing: (%x %x) [%x]<-%x\n", x, y, epa, data); - hb_mc_request_packet_t req_pkt; - - req_pkt.op_v2 = 2; // SW - req_pkt.reg_id = 0xff; // unused - req_pkt.payload = data; - req_pkt.x_src = BSG_MANYCORE_MACHINE_HOST_COORD_X; - req_pkt.y_src = BSG_MANYCORE_MACHINE_HOST_COORD_Y; - req_pkt.x_dst = x; - req_pkt.y_dst = y; - req_pkt.addr = epa >> 2; - - send_mc_request_packet(zpl, &req_pkt); +inline void send_mc_write(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, uint32_t epa, + int32_t data) { + bsg_pr_dbg_ps("Writing: (%x %x) [%x]<-%x\n", x, y, epa, data); + hb_mc_request_packet_t req_pkt; + + req_pkt.op_v2 = 2; // SW + req_pkt.reg_id = 0xff; // unused + req_pkt.payload = data; + req_pkt.x_src = BSG_MANYCORE_MACHINE_HOST_COORD_X; + req_pkt.y_src = BSG_MANYCORE_MACHINE_HOST_COORD_Y; + req_pkt.x_dst = x; + req_pkt.y_dst = y; + req_pkt.addr = epa >> 2; + + send_mc_request_packet(zpl, &req_pkt); } -inline int32_t send_mc_read(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, uint32_t epa) { - hb_mc_request_packet_t req_pkt; +inline int32_t send_mc_read(bsg_zynq_pl *zpl, uint8_t x, uint8_t y, + uint32_t epa) { + hb_mc_request_packet_t req_pkt; - req_pkt.op_v2 = 0; // LD - req_pkt.reg_id = 0xff; // unused - req_pkt.payload = 0; // Ignore payload - req_pkt.x_src = BSG_MANYCORE_MACHINE_HOST_COORD_X; - req_pkt.y_src = BSG_MANYCORE_MACHINE_HOST_COORD_Y; - req_pkt.x_dst = x; - req_pkt.y_dst = y; - req_pkt.addr = epa >> 2; + req_pkt.op_v2 = 0; // LD + req_pkt.reg_id = 0xff; // unused + req_pkt.payload = 0; // Ignore payload + req_pkt.x_src = BSG_MANYCORE_MACHINE_HOST_COORD_X; + req_pkt.y_src = BSG_MANYCORE_MACHINE_HOST_COORD_Y; + req_pkt.x_dst = x; + req_pkt.y_dst = y; + req_pkt.addr = epa >> 2; - send_mc_request_packet(zpl, &req_pkt); + send_mc_request_packet(zpl, &req_pkt); - hb_mc_response_packet_t resp_pkt; - recv_mc_response_packet(zpl, &resp_pkt); - bsg_pr_dbg_ps("Querying: [%x] == %x\n", epa, resp_pkt.data); + hb_mc_response_packet_t resp_pkt; + recv_mc_response_packet(zpl, &resp_pkt); + bsg_pr_dbg_ps("Querying: [%x] == %x\n", epa, resp_pkt.data); - return resp_pkt.data; + return resp_pkt.data; } int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - - bsg_pr_info("ps.cpp: reading three base registers\n"); - bsg_pr_info("ps.cpp: dram_base=%lx\n", zpl->shell_read(0x00 + gp0_addr_base)); - - uint32_t val; - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, 0xf); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, 0xf); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == val)); - - bsg_tag_bitbang *btb = new bsg_tag_bitbang(zpl, GP0_WR_CSR_TAG_BITBANG, TAG_NUM_CLIENTS, TAG_MAX_LEN); - bsg_tag_client *mc_reset_client = new bsg_tag_client(TAG_CLIENT_MC_RESET_ID, TAG_CLIENT_MC_RESET_WIDTH); - - // Reset the bsg tag master - btb->reset_master(); - // Reset bsg client0 - btb->reset_client(mc_reset_client); - // Set bsg client0 to 1 (assert BP reset) - btb->set_client(mc_reset_client, 0x1); - // Set bsg client0 to 0 (deassert BP reset) - btb->set_client(mc_reset_client, 0x0); - - // We need some additional toggles for data to propagate through - btb->idle(50); - // Deassert the active-low system reset as we finish initializing the whole system - zpl->shell_write(GP0_WR_CSR_SYS_RESETN, 0x1, 0xF); - - unsigned long phys_ptr; - volatile int32_t *buf; - long allocated_dram = DRAM_ALLOCATE_SIZE; - bsg_pr_info("ps.cpp: calling allocate dram with size %ld\n", allocated_dram); - buf = (volatile int32_t *)zpl->allocate_dram(allocated_dram, &phys_ptr); - bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); - zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, 0xf); - assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)phys_ptr)); - bsg_pr_info("ps.cpp: wrote and verified base register\n"); - - if (argc == 1) { - bsg_pr_warn( - "No nbf file specified, sleeping for 2^31 seconds (this will hold " - "onto allocated DRAM)\n"); - sleep(1U << 31); - delete zpl; - return -1; - } - - nbf_load(zpl, argv[1]); - - int finished = 0; - while (finished != NUM_FINISH) { - bsg_pr_dbg_ps("Waiting for incoming request packet\n"); - hb_mc_request_packet_t mc_pkt; - recv_mc_request_packet(zpl, &mc_pkt); - bsg_pr_dbg_ps("Request packet signaled\n"); - int mc_epa = (mc_pkt.addr << 2) & 0xffff; // Trim to 16b EPA - int mc_data = mc_pkt.payload; - bsg_pr_dbg_ps("Request packet [%x] = %x\n", mc_epa, mc_data); - if (mc_epa == 0xeadc || mc_epa == 0xeee0) { - printf("%c", mc_data & 0xff); - fflush(stdout); - } else if (mc_epa == 0xead0) { - bsg_pr_info("Finish packet received %d\n", ++finished); - } else { - bsg_pr_info("Errant request packet: %x %x\n", mc_epa, mc_data); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + + bsg_pr_info("ps.cpp: reading three base registers\n"); + bsg_pr_info("ps.cpp: dram_base=%lx\n", + zpl->shell_read(0x00 + gp0_addr_base)); + + uint32_t val; + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, 0xDEADBEEF, 0xf); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (0xDEADBEEF))); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, val, 0xf); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == val)); + + bsg_tag_bitbang *btb = new bsg_tag_bitbang(zpl, GP0_WR_CSR_TAG_BITBANG, + TAG_NUM_CLIENTS, TAG_MAX_LEN); + bsg_tag_client *mc_reset_client = + new bsg_tag_client(TAG_CLIENT_MC_RESET_ID, TAG_CLIENT_MC_RESET_WIDTH); + + // Reset the bsg tag master + btb->reset_master(); + // Reset bsg client0 + btb->reset_client(mc_reset_client); + // Set bsg client0 to 1 (assert BP reset) + btb->set_client(mc_reset_client, 0x1); + // Set bsg client0 to 0 (deassert BP reset) + btb->set_client(mc_reset_client, 0x0); + + // We need some additional toggles for data to propagate through + btb->idle(50); + // Deassert the active-low system reset as we finish initializing the whole + // system + zpl->shell_write(GP0_WR_CSR_SYS_RESETN, 0x1, 0xF); + + unsigned long phys_ptr; + volatile int32_t *buf; + long allocated_dram = DRAM_ALLOCATE_SIZE; + bsg_pr_info("ps.cpp: calling allocate dram with size %ld\n", + allocated_dram); + buf = (volatile int32_t *)zpl->allocate_dram(allocated_dram, &phys_ptr); + bsg_pr_info("ps.cpp: received %p (phys = %lx)\n", buf, phys_ptr); + zpl->shell_write(GP0_WR_CSR_DRAM_BASE, phys_ptr, 0xf); + assert((zpl->shell_read(GP0_RD_CSR_DRAM_BASE) == (int32_t)phys_ptr)); + bsg_pr_info("ps.cpp: wrote and verified base register\n"); + + if (argc == 1) { + bsg_pr_warn( + "No nbf file specified, sleeping for 2^31 seconds (this will hold " + "onto allocated DRAM)\n"); + sleep(1U << 31); + delete zpl; + return -1; + } + + nbf_load(zpl, argv[1]); + + int finished = 0; + while (finished != NUM_FINISH) { + bsg_pr_dbg_ps("Waiting for incoming request packet\n"); + hb_mc_request_packet_t mc_pkt; + recv_mc_request_packet(zpl, &mc_pkt); + bsg_pr_dbg_ps("Request packet signaled\n"); + int mc_epa = (mc_pkt.addr << 2) & 0xffff; // Trim to 16b EPA + int mc_data = mc_pkt.payload; + bsg_pr_dbg_ps("Request packet [%x] = %x\n", mc_epa, mc_data); + if (mc_epa == 0xeadc || mc_epa == 0xeee0) { + printf("%c", mc_data & 0xff); + fflush(stdout); + } else if (mc_epa == 0xead0) { + bsg_pr_info("Finish packet received %d\n", ++finished); + } else { + bsg_pr_info("Errant request packet: %x %x\n", mc_epa, mc_data); + } } - } - zpl->done(); - delete zpl; - return 0; + zpl->done(); + delete zpl; + return 0; } void nbf_load(bsg_zynq_pl *zpl, char *nbf_filename) { - string nbf_command; - string tmp; - string delimiter = "_"; - - long long int nbf[4]; - int pos = 0; - long unsigned int base_addr; - int data; - ifstream nbf_file(nbf_filename); - - if (!nbf_file.is_open()) { - bsg_pr_err("ps.cpp: error opening nbf file.\n"); - delete zpl; - return; - } - - int line_count = 0; - while (getline(nbf_file, nbf_command)) { - line_count++; - int i = 0; - while ((pos = nbf_command.find(delimiter)) != std::string::npos) { - tmp = nbf_command.substr(0, pos); - nbf[i] = std::stoull(tmp, nullptr, 16); - nbf_command.erase(0, pos + 1); - i++; + string nbf_command; + string tmp; + string delimiter = "_"; + + long long int nbf[4]; + int pos = 0; + long unsigned int base_addr; + int data; + ifstream nbf_file(nbf_filename); + + if (!nbf_file.is_open()) { + bsg_pr_err("ps.cpp: error opening nbf file.\n"); + delete zpl; + return; } - nbf[i] = std::stoull(nbf_command, nullptr, 16); - - int x_tile = nbf[0]; - int y_tile = nbf[1]; - int epa = nbf[2]; // word addr - int nbf_data = nbf[3]; - - bool finish = (x_tile == 0xff) && (y_tile == 0xff) && (epa == 0x00000000) && (nbf_data == 0x00000000); - bool fence = (x_tile == 0xff) && (y_tile == 0xff) && (epa == 0xffffffff) && (nbf_data == 0xffffffff); - - if (finish) { - bsg_pr_dbg_ps("ps.cpp: nbf finish command, line %d\n", line_count); - continue; - } else if (fence) { - bsg_pr_dbg_ps("ps.cpp: nbf fence command (ignoring), line %d\n", line_count); - bsg_pr_info("Waiting for credit drain\n"); - while(zpl->shell_read(GP0_RD_CREDIT_COUNT) > 0); - bsg_pr_info("Credits drained\n"); - continue; - } else { - send_mc_write(zpl, x_tile, y_tile, epa << 2, nbf_data); + + int line_count = 0; + while (getline(nbf_file, nbf_command)) { + line_count++; + int i = 0; + while ((pos = nbf_command.find(delimiter)) != std::string::npos) { + tmp = nbf_command.substr(0, pos); + nbf[i] = std::stoull(tmp, nullptr, 16); + nbf_command.erase(0, pos + 1); + i++; + } + nbf[i] = std::stoull(nbf_command, nullptr, 16); + + int x_tile = nbf[0]; + int y_tile = nbf[1]; + int epa = nbf[2]; // word addr + int nbf_data = nbf[3]; + + bool finish = (x_tile == 0xff) && (y_tile == 0xff) && + (epa == 0x00000000) && (nbf_data == 0x00000000); + bool fence = (x_tile == 0xff) && (y_tile == 0xff) && + (epa == 0xffffffff) && (nbf_data == 0xffffffff); + + if (finish) { + bsg_pr_dbg_ps("ps.cpp: nbf finish command, line %d\n", line_count); + continue; + } else if (fence) { + bsg_pr_dbg_ps("ps.cpp: nbf fence command (ignoring), line %d\n", + line_count); + bsg_pr_info("Waiting for credit drain\n"); + while (zpl->shell_read(GP0_RD_CREDIT_COUNT) > 0) + ; + bsg_pr_info("Credits drained\n"); + continue; + } else { + send_mc_write(zpl, x_tile, y_tile, epa << 2, nbf_data); #ifdef VERIFY_NBF - int32_t verif_data; - - verif_data = send_mc_read(zpl, x_tile, y_tile, epa << 2); + int32_t verif_data; + + verif_data = send_mc_read(zpl, x_tile, y_tile, epa << 2); - // Some verification reads are expected to fail e.g. CSRs - if (req_pkt.payload == resp_pkt.data) { - bsg_pr_info("Received verification: %x==%x\n", req_pkt.payload, resp_pkt.data); - } else { - bsg_pr_info("Failed verification: %x!=%x\n", req_pkt.payload, resp_pkt.data); - } + // Some verification reads are expected to fail e.g. CSRs + if (req_pkt.payload == resp_pkt.data) { + bsg_pr_info("Received verification: %x==%x\n", req_pkt.payload, + resp_pkt.data); + } else { + bsg_pr_info("Failed verification: %x!=%x\n", req_pkt.payload, + resp_pkt.data); + } #endif + } } - } } - diff --git a/cosim/shell-example/ps.cpp b/cosim/shell-example/ps.cpp index f24d762d..2cf6581b 100644 --- a/cosim/shell-example/ps.cpp +++ b/cosim/shell-example/ps.cpp @@ -4,126 +4,126 @@ // the API we provide abstracts away the // communication plumbing differences. -#include -#include -#include "bsg_zynq_pl.h" -#include "bsg_printing.h" #include "bsg_argparse.h" +#include "bsg_printing.h" +#include "bsg_zynq_pl.h" +#include +#include #include #define DRAM_ALLOC_SIZE_BYTES 16384 int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - - // the read memory map is essentially - // - // 0,4,8,C: registers - // 10, 14: output fifo heads - // 18, 1C: output fifo counts - // 20,24,28,2C: input fifo counts - // 30: last address of write - - // the write memory map is essentially - // - // 0,4,8,C: registers - // 10,14,18,1C: input fifo - - int val1 = 0xDEADBEEF; - int val2 = 0xCAFEBABE; - int val3 = 0x0000CADE; - int val4 = 0xC0DE0000; - int mask1 = 0xf; - int mask2 = 0xf; - - // write to two registers, checking our address snoop to see - // actual address that was received over the AXI bus - zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); - assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x0); - zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); - assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x4); - // 8,12 - - // check output fifo counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == 0)); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == 0)); - - // check input fifo counters - bsg_pr_dbg_ps("%d\n", zpl->shell_read(0x20 + GP0_ADDR_BASE)); - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x28 + GP0_ADDR_BASE) == 4)); - assert((zpl->shell_read(0x2C + GP0_ADDR_BASE) == 4)); - - // write to fifos - zpl->shell_write(0x10 + GP0_ADDR_BASE, val3, mask1); - - // checker counters - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (3))); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); - - // write to fifo - zpl->shell_write(0x10 + GP0_ADDR_BASE, val1, mask1); - // checker counters - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (2))); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); - - zpl->shell_write(0x14 + GP0_ADDR_BASE, val4, mask2); - zpl->shell_write(0x14 + GP0_ADDR_BASE, val2, mask2); - - // checker counters - assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (4))); - assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); - - // check register writes - assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (val1))); - assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == (val2))); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (2))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); - - // check that the output fifo has the sum of the input fifos - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val3 + val4))); - assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val1 + val2))); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); - - // try a different set of input and output fifos - zpl->shell_write(0x18 + GP0_ADDR_BASE, val1, mask1); - zpl->shell_write(0x1C + GP0_ADDR_BASE, val2, mask2); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (1))); - - // read value out of fifo - assert((zpl->shell_read(0x14 + GP0_ADDR_BASE) == (val1 + val2))); - - // checker output counters - assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); - assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); - - // DRAM test - unsigned long phys_ptr; - volatile int *buf; - - buf = (volatile int *)zpl->allocate_dram(DRAM_ALLOC_SIZE_BYTES, &phys_ptr); - - // write all of the dram - for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) - buf[i] = i; - - // read all of the dram - for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) - assert(buf[i] == i); - - printf("## everything passed; at end of test\n"); - zpl->done(); - - delete zpl; - return 0; + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + + // the read memory map is essentially + // + // 0,4,8,C: registers + // 10, 14: output fifo heads + // 18, 1C: output fifo counts + // 20,24,28,2C: input fifo counts + // 30: last address of write + + // the write memory map is essentially + // + // 0,4,8,C: registers + // 10,14,18,1C: input fifo + + int val1 = 0xDEADBEEF; + int val2 = 0xCAFEBABE; + int val3 = 0x0000CADE; + int val4 = 0xC0DE0000; + int mask1 = 0xf; + int mask2 = 0xf; + + // write to two registers, checking our address snoop to see + // actual address that was received over the AXI bus + zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); + assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x0); + zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); + assert(zpl->shell_read(0x30 + GP0_ADDR_BASE) == 0x4); + // 8,12 + + // check output fifo counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == 0)); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == 0)); + + // check input fifo counters + bsg_pr_dbg_ps("%d\n", zpl->shell_read(0x20 + GP0_ADDR_BASE)); + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x28 + GP0_ADDR_BASE) == 4)); + assert((zpl->shell_read(0x2C + GP0_ADDR_BASE) == 4)); + + // write to fifos + zpl->shell_write(0x10 + GP0_ADDR_BASE, val3, mask1); + + // checker counters + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (3))); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); + + // write to fifo + zpl->shell_write(0x10 + GP0_ADDR_BASE, val1, mask1); + // checker counters + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (2))); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); + + zpl->shell_write(0x14 + GP0_ADDR_BASE, val4, mask2); + zpl->shell_write(0x14 + GP0_ADDR_BASE, val2, mask2); + + // checker counters + assert((zpl->shell_read(0x20 + GP0_ADDR_BASE) == (4))); + assert((zpl->shell_read(0x24 + GP0_ADDR_BASE) == (4))); + + // check register writes + assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (val1))); + assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == (val2))); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (2))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); + + // check that the output fifo has the sum of the input fifos + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val3 + val4))); + assert((zpl->shell_read(0x10 + GP0_ADDR_BASE) == (val1 + val2))); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); + + // try a different set of input and output fifos + zpl->shell_write(0x18 + GP0_ADDR_BASE, val1, mask1); + zpl->shell_write(0x1C + GP0_ADDR_BASE, val2, mask2); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (1))); + + // read value out of fifo + assert((zpl->shell_read(0x14 + GP0_ADDR_BASE) == (val1 + val2))); + + // checker output counters + assert((zpl->shell_read(0x18 + GP0_ADDR_BASE) == (0))); + assert((zpl->shell_read(0x1C + GP0_ADDR_BASE) == (0))); + + // DRAM test + unsigned long phys_ptr; + volatile int *buf; + + buf = (volatile int *)zpl->allocate_dram(DRAM_ALLOC_SIZE_BYTES, &phys_ptr); + + // write all of the dram + for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) + buf[i] = i; + + // read all of the dram + for (int i = 0; i < DRAM_ALLOC_SIZE_BYTES / 4; i++) + assert(buf[i] == i); + + printf("## everything passed; at end of test\n"); + zpl->done(); + + delete zpl; + return 0; } diff --git a/cosim/simple-example/ps.cpp b/cosim/simple-example/ps.cpp index 5fae9a40..c360afc0 100644 --- a/cosim/simple-example/ps.cpp +++ b/cosim/simple-example/ps.cpp @@ -4,31 +4,30 @@ // the API we provide abstracts away the // communication plumbing differences. +#include "bsg_zynq_pl.h" #include -#include #include -#include "bsg_zynq_pl.h" +#include int ps_main(int argc, char **argv) { - bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); + bsg_zynq_pl *zpl = new bsg_zynq_pl(argc, argv); - // this program just communicates with a "loopback accelerator" - // that has 4 control registers that you can read and write + // this program just communicates with a "loopback accelerator" + // that has 4 control registers that you can read and write - int val1 = 0xDEADBEEF; - int val2 = 0xCAFEBABE; - int mask1 = 0xf; - int mask2 = 0xf; + int val1 = 0xDEADBEEF; + int val2 = 0xCAFEBABE; + int mask1 = 0xf; + int mask2 = 0xf; - zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); - zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); + zpl->shell_write(0x0 + GP0_ADDR_BASE, val1, mask1); + zpl->shell_write(0x4 + GP0_ADDR_BASE, val2, mask2); - assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (val1))); - assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == (val2))); + assert((zpl->shell_read(0x0 + GP0_ADDR_BASE) == (val1))); + assert((zpl->shell_read(0x4 + GP0_ADDR_BASE) == (val2))); - zpl->done(); + zpl->done(); - delete zpl; - return 0; + delete zpl; + return 0; } - diff --git a/cosim/src/main.cpp b/cosim/src/main.cpp index 41ea5da7..22b0ba25 100644 --- a/cosim/src/main.cpp +++ b/cosim/src/main.cpp @@ -12,12 +12,11 @@ extern "C" int cosim_main(char *argstr) { char *argv[argc]; get_argv(argstr, argc, argv); #else - int main(int argc, char **argv) { +int main(int argc, char **argv) { #endif - // this ensures that even with tee, the output is line buffered - // so that we can see what is happening in real time - setvbuf(stdout, NULL, _IOLBF, 0); - - return ps_main(argc, argv); - } + // this ensures that even with tee, the output is line buffered + // so that we can see what is happening in real time + setvbuf(stdout, NULL, _IOLBF, 0); + return ps_main(argc, argv); +}