diff --git a/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg.hjson b/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg.hjson index c80b34c..5c3537d 100644 --- a/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg.hjson +++ b/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg.hjson @@ -415,6 +415,18 @@ desc: "Indicates the cluster is computing a kernel." }] }, + { + name: "SPATZ_CYCLE", + desc: '''Store cycle counts of kernels''' + swaccess: "rw", + hwaccess: "hrw", + resval: "0", + fields: [{ + bits: "31:0", + name: "SPATZ_CYC", + desc: "Store cycle counts of kernels." + }] + }, { name: "CLUSTER_BOOT_CONTROL", desc: '''Controls the cluster boot process.''' diff --git a/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_pkg.sv b/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_pkg.sv index c39af85..bc9db5f 100644 --- a/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_pkg.sv +++ b/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_pkg.sv @@ -143,6 +143,10 @@ package spatz_cluster_peripheral_reg_pkg; logic q; } spatz_cluster_peripheral_reg2hw_spatz_status_reg_t; + typedef struct packed { + logic [31:0] q; + } spatz_cluster_peripheral_reg2hw_spatz_cycle_reg_t; + typedef struct packed { logic [31:0] q; } spatz_cluster_peripheral_reg2hw_cluster_boot_control_reg_t; @@ -175,6 +179,11 @@ package spatz_cluster_peripheral_reg_pkg; logic [31:0] d; } spatz_cluster_peripheral_hw2reg_hw_barrier_reg_t; + typedef struct packed { + logic [31:0] d; + logic de; + } spatz_cluster_peripheral_hw2reg_spatz_cycle_reg_t; + typedef struct packed { logic d; logic de; @@ -191,14 +200,15 @@ package spatz_cluster_peripheral_reg_pkg; // Register -> HW type typedef struct packed { - spatz_cluster_peripheral_reg2hw_perf_counter_enable_mreg_t [1:0] perf_counter_enable; // [326:265] - spatz_cluster_peripheral_reg2hw_hart_select_mreg_t [1:0] hart_select; // [264:245] - spatz_cluster_peripheral_reg2hw_perf_counter_mreg_t [1:0] perf_counter; // [244:147] - spatz_cluster_peripheral_reg2hw_cl_clint_set_reg_t cl_clint_set; // [146:114] - spatz_cluster_peripheral_reg2hw_cl_clint_clear_reg_t cl_clint_clear; // [113:81] - spatz_cluster_peripheral_reg2hw_hw_barrier_reg_t hw_barrier; // [80:49] - spatz_cluster_peripheral_reg2hw_icache_prefetch_enable_reg_t icache_prefetch_enable; // [48:48] - spatz_cluster_peripheral_reg2hw_spatz_status_reg_t spatz_status; // [47:47] + spatz_cluster_peripheral_reg2hw_perf_counter_enable_mreg_t [1:0] perf_counter_enable; // [358:297] + spatz_cluster_peripheral_reg2hw_hart_select_mreg_t [1:0] hart_select; // [296:277] + spatz_cluster_peripheral_reg2hw_perf_counter_mreg_t [1:0] perf_counter; // [276:179] + spatz_cluster_peripheral_reg2hw_cl_clint_set_reg_t cl_clint_set; // [178:146] + spatz_cluster_peripheral_reg2hw_cl_clint_clear_reg_t cl_clint_clear; // [145:113] + spatz_cluster_peripheral_reg2hw_hw_barrier_reg_t hw_barrier; // [112:81] + spatz_cluster_peripheral_reg2hw_icache_prefetch_enable_reg_t icache_prefetch_enable; // [80:80] + spatz_cluster_peripheral_reg2hw_spatz_status_reg_t spatz_status; // [79:79] + spatz_cluster_peripheral_reg2hw_spatz_cycle_reg_t spatz_cycle; // [78:47] spatz_cluster_peripheral_reg2hw_cluster_boot_control_reg_t cluster_boot_control; // [46:15] spatz_cluster_peripheral_reg2hw_cluster_eoc_exit_reg_t cluster_eoc_exit; // [14:14] spatz_cluster_peripheral_reg2hw_cfg_l1d_spm_reg_t cfg_l1d_spm; // [13:4] @@ -209,8 +219,9 @@ package spatz_cluster_peripheral_reg_pkg; // HW -> register type typedef struct packed { - spatz_cluster_peripheral_hw2reg_perf_counter_mreg_t [1:0] perf_counter; // [132:37] - spatz_cluster_peripheral_hw2reg_hw_barrier_reg_t hw_barrier; // [36:5] + spatz_cluster_peripheral_hw2reg_perf_counter_mreg_t [1:0] perf_counter; // [165:70] + spatz_cluster_peripheral_hw2reg_hw_barrier_reg_t hw_barrier; // [69:38] + spatz_cluster_peripheral_hw2reg_spatz_cycle_reg_t spatz_cycle; // [37:5] spatz_cluster_peripheral_hw2reg_l1d_spm_commit_reg_t l1d_spm_commit; // [4:3] spatz_cluster_peripheral_hw2reg_l1d_insn_commit_reg_t l1d_insn_commit; // [2:1] spatz_cluster_peripheral_hw2reg_l1d_flush_status_reg_t l1d_flush_status; // [0:0] @@ -228,13 +239,14 @@ package spatz_cluster_peripheral_reg_pkg; parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_OFFSET = 8'h 40; parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_ICACHE_PREFETCH_ENABLE_OFFSET = 8'h 48; parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_OFFSET = 8'h 50; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_OFFSET = 8'h 58; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_OFFSET = 8'h 60; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_OFFSET = 8'h 68; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_OFFSET = 8'h 70; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_OFFSET = 8'h 78; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_OFFSET = 8'h 80; - parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_OFFSET = 8'h 88; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_OFFSET = 8'h 58; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_OFFSET = 8'h 60; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_OFFSET = 8'h 68; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_OFFSET = 8'h 70; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_OFFSET = 8'h 78; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_OFFSET = 8'h 80; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_OFFSET = 8'h 88; + parameter logic [BlockAw-1:0] SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_OFFSET = 8'h 90; // Reset values for hwext registers and their fields parameter logic [47:0] SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_RESVAL = 48'h 0; @@ -258,6 +270,7 @@ package spatz_cluster_peripheral_reg_pkg; SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER, SPATZ_CLUSTER_PERIPHERAL_ICACHE_PREFETCH_ENABLE, SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS, + SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE, SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL, SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT, SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM, @@ -268,7 +281,7 @@ package spatz_cluster_peripheral_reg_pkg; } spatz_cluster_peripheral_id_e; // Register width information to check illegal writes - parameter logic [3:0] SPATZ_CLUSTER_PERIPHERAL_PERMIT [18] = '{ + parameter logic [3:0] SPATZ_CLUSTER_PERIPHERAL_PERMIT [19] = '{ 4'b 1111, // index[ 0] SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0 4'b 1111, // index[ 1] SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_1 4'b 0011, // index[ 2] SPATZ_CLUSTER_PERIPHERAL_HART_SELECT_0 @@ -280,13 +293,14 @@ package spatz_cluster_peripheral_reg_pkg; 4'b 1111, // index[ 8] SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER 4'b 0001, // index[ 9] SPATZ_CLUSTER_PERIPHERAL_ICACHE_PREFETCH_ENABLE 4'b 0001, // index[10] SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS - 4'b 1111, // index[11] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL - 4'b 0001, // index[12] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT - 4'b 0011, // index[13] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM - 4'b 0001, // index[14] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN - 4'b 0001, // index[15] SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT - 4'b 0001, // index[16] SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT - 4'b 0001 // index[17] SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS + 4'b 1111, // index[11] SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE + 4'b 1111, // index[12] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL + 4'b 0001, // index[13] SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT + 4'b 0011, // index[14] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM + 4'b 0001, // index[15] SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN + 4'b 0001, // index[16] SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT + 4'b 0001, // index[17] SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT + 4'b 0001 // index[18] SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS }; endpackage diff --git a/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_top.sv b/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_top.sv index d019907..3ec047a 100644 --- a/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_top.sv +++ b/hw/system/spatz_cluster/src/spatz_cluster_peripheral/spatz_cluster_peripheral_reg_top.sv @@ -278,6 +278,9 @@ module spatz_cluster_peripheral_reg_top #( logic icache_prefetch_enable_we; logic spatz_status_wd; logic spatz_status_we; + logic [31:0] spatz_cycle_qs; + logic [31:0] spatz_cycle_wd; + logic spatz_cycle_we; logic [31:0] cluster_boot_control_qs; logic [31:0] cluster_boot_control_wd; logic cluster_boot_control_we; @@ -2110,6 +2113,33 @@ module spatz_cluster_peripheral_reg_top #( ); + // R[spatz_cycle]: V(False) + + prim_subreg #( + .DW (32), + .SWACCESS("RW"), + .RESVAL (32'h0) + ) u_spatz_cycle ( + .clk_i (clk_i ), + .rst_ni (rst_ni ), + + // from register interface + .we (spatz_cycle_we), + .wd (spatz_cycle_wd), + + // from internal hardware + .de (hw2reg.spatz_cycle.de), + .d (hw2reg.spatz_cycle.d ), + + // to internal hardware + .qe (), + .q (reg2hw.spatz_cycle.q ), + + // to register interface (read) + .qs (spatz_cycle_qs) + ); + + // R[cluster_boot_control]: V(False) prim_subreg #( @@ -2290,7 +2320,7 @@ module spatz_cluster_peripheral_reg_top #( - logic [17:0] addr_hit; + logic [18:0] addr_hit; always_comb begin addr_hit = '0; addr_hit[ 0] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_ENABLE_0_OFFSET); @@ -2304,13 +2334,14 @@ module spatz_cluster_peripheral_reg_top #( addr_hit[ 8] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_HW_BARRIER_OFFSET); addr_hit[ 9] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_ICACHE_PREFETCH_ENABLE_OFFSET); addr_hit[10] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_OFFSET); - addr_hit[11] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_OFFSET); - addr_hit[12] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_OFFSET); - addr_hit[13] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_OFFSET); - addr_hit[14] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_OFFSET); - addr_hit[15] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_OFFSET); - addr_hit[16] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_OFFSET); - addr_hit[17] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_OFFSET); + addr_hit[11] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_OFFSET); + addr_hit[12] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_OFFSET); + addr_hit[13] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_OFFSET); + addr_hit[14] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_OFFSET); + addr_hit[15] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_OFFSET); + addr_hit[16] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_OFFSET); + addr_hit[17] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_OFFSET); + addr_hit[18] = (reg_addr == SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_OFFSET); end assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0 ; @@ -2335,7 +2366,8 @@ module spatz_cluster_peripheral_reg_top #( (addr_hit[14] & (|(SPATZ_CLUSTER_PERIPHERAL_PERMIT[14] & ~reg_be))) | (addr_hit[15] & (|(SPATZ_CLUSTER_PERIPHERAL_PERMIT[15] & ~reg_be))) | (addr_hit[16] & (|(SPATZ_CLUSTER_PERIPHERAL_PERMIT[16] & ~reg_be))) | - (addr_hit[17] & (|(SPATZ_CLUSTER_PERIPHERAL_PERMIT[17] & ~reg_be))))); + (addr_hit[17] & (|(SPATZ_CLUSTER_PERIPHERAL_PERMIT[17] & ~reg_be))) | + (addr_hit[18] & (|(SPATZ_CLUSTER_PERIPHERAL_PERMIT[18] & ~reg_be))))); end assign perf_counter_enable_0_cycle_0_we = addr_hit[0] & reg_we & !reg_error; @@ -2552,25 +2584,28 @@ module spatz_cluster_peripheral_reg_top #( assign spatz_status_we = addr_hit[10] & reg_we & !reg_error; assign spatz_status_wd = reg_wdata[0]; - assign cluster_boot_control_we = addr_hit[11] & reg_we & !reg_error; + assign spatz_cycle_we = addr_hit[11] & reg_we & !reg_error; + assign spatz_cycle_wd = reg_wdata[31:0]; + + assign cluster_boot_control_we = addr_hit[12] & reg_we & !reg_error; assign cluster_boot_control_wd = reg_wdata[31:0]; - assign cluster_eoc_exit_we = addr_hit[12] & reg_we & !reg_error; + assign cluster_eoc_exit_we = addr_hit[13] & reg_we & !reg_error; assign cluster_eoc_exit_wd = reg_wdata[0]; - assign cfg_l1d_spm_we = addr_hit[13] & reg_we & !reg_error; + assign cfg_l1d_spm_we = addr_hit[14] & reg_we & !reg_error; assign cfg_l1d_spm_wd = reg_wdata[9:0]; - assign cfg_l1d_insn_we = addr_hit[14] & reg_we & !reg_error; + assign cfg_l1d_insn_we = addr_hit[15] & reg_we & !reg_error; assign cfg_l1d_insn_wd = reg_wdata[1:0]; - assign l1d_spm_commit_we = addr_hit[15] & reg_we & !reg_error; + assign l1d_spm_commit_we = addr_hit[16] & reg_we & !reg_error; assign l1d_spm_commit_wd = reg_wdata[0]; - assign l1d_insn_commit_we = addr_hit[16] & reg_we & !reg_error; + assign l1d_insn_commit_we = addr_hit[17] & reg_we & !reg_error; assign l1d_insn_commit_wd = reg_wdata[0]; - assign l1d_flush_status_re = addr_hit[17] & reg_re & !reg_error; + assign l1d_flush_status_re = addr_hit[18] & reg_re & !reg_error; // Read data return always_comb begin @@ -2681,30 +2716,34 @@ module spatz_cluster_peripheral_reg_top #( end addr_hit[11]: begin - reg_rdata_next[31:0] = cluster_boot_control_qs; + reg_rdata_next[31:0] = spatz_cycle_qs; end addr_hit[12]: begin - reg_rdata_next[0] = cluster_eoc_exit_qs; + reg_rdata_next[31:0] = cluster_boot_control_qs; end addr_hit[13]: begin - reg_rdata_next[9:0] = cfg_l1d_spm_qs; + reg_rdata_next[0] = cluster_eoc_exit_qs; end addr_hit[14]: begin - reg_rdata_next[1:0] = cfg_l1d_insn_qs; + reg_rdata_next[9:0] = cfg_l1d_spm_qs; end addr_hit[15]: begin - reg_rdata_next[0] = l1d_spm_commit_qs; + reg_rdata_next[1:0] = cfg_l1d_insn_qs; end addr_hit[16]: begin - reg_rdata_next[0] = l1d_insn_commit_qs; + reg_rdata_next[0] = l1d_spm_commit_qs; end addr_hit[17]: begin + reg_rdata_next[0] = l1d_insn_commit_qs; + end + + addr_hit[18]: begin reg_rdata_next[0] = l1d_flush_status_qs; end diff --git a/hw/system/spatz_cluster/test/bootrom.bin b/hw/system/spatz_cluster/test/bootrom.bin index 25b664e..94fc89b 100755 Binary files a/hw/system/spatz_cluster/test/bootrom.bin and b/hw/system/spatz_cluster/test/bootrom.bin differ diff --git a/hw/system/spatz_cluster/test/bootrom.dump b/hw/system/spatz_cluster/test/bootrom.dump index 311a50d..6cda4b2 100644 --- a/hw/system/spatz_cluster/test/bootrom.dump +++ b/hw/system/spatz_cluster/test/bootrom.dump @@ -6,64 +6,68 @@ Disassembly of section .text: 00001000 <_start>: 1000: 00000317 auipc t1,0x0 - 1004: 07032303 lw t1,112(t1) # 1070 <_GLOBAL_OFFSET_TABLE_+0x4> + 1004: 07832303 lw t1,120(t1) # 1078 <_GLOBAL_OFFSET_TABLE_+0x4> 1008: 30531073 csrw mtvec,t1 100c: f1402573 csrr a0,mhartid 1010: 00000597 auipc a1,0x0 - 1014: 0645a583 lw a1,100(a1) # 1074 <_GLOBAL_OFFSET_TABLE_+0x8> - 1018: 10500073 wfi - 101c: 00c5a383 lw t2,12(a1) - 1020: 0105ae03 lw t3,16(a1) - 1024: 01c383b3 add t2,t2,t3 - 1028: 05838393 addi t2,t2,88 - 102c: 00038393 mv t2,t2 - 1030: 0003a383 lw t2,0(t2) - 1034: 00038067 jr t2 - -00001038 : - 1038: 10500073 wfi - 103c: ffdff06f j 1038 + 1014: 06c5a583 lw a1,108(a1) # 107c <_GLOBAL_OFFSET_TABLE_+0x8> + 1018: 3047d073 csrwi mie,15 + 101c: 10500073 wfi + 1020: 00c5a383 lw t2,12(a1) + 1024: 0105ae03 lw t3,16(a1) + 1028: 01c383b3 add t2,t2,t3 + 102c: 06038393 addi t2,t2,96 + 1030: 00038393 mv t2,t2 + 1034: 0003a383 lw t2,0(t2) + 1038: 00038067 jr t2 + +0000103c : + 103c: 10500073 wfi + 1040: ffdff06f j 103c Disassembly of section .rodata: -00001040 : - 1040: 1000 .2byte 0x1000 - 1042: 0000 .2byte 0x0 - 1044: 0002 .2byte 0x2 - 1046: 0000 .2byte 0x0 - 1048: 0010 .2byte 0x10 +00001048 : + 1048: 1000 .2byte 0x1000 104a: 0000 .2byte 0x0 - 104c: 0000 .2byte 0x0 - 104e: 5100 .2byte 0x5100 - 1050: 8000 .2byte 0x8000 + 104c: 0002 .2byte 0x2 + 104e: 0000 .2byte 0x0 + 1050: 0010 .2byte 0x10 + 1052: 0000 .2byte 0x0 + 1054: 0000 .2byte 0x0 + 1056: 5100 .2byte 0x5100 + 1058: 0000 .2byte 0x0 + 105a: 0002 .2byte 0x2 + 105c: 0000 .2byte 0x0 + 105e: 0000 .2byte 0x0 + 1060: 0000 .2byte 0x0 + 1062: 8000 .2byte 0x8000 ... - 105a: 8000 .2byte 0x8000 - ... - 1064: 0001 .2byte 0x1 + 106c: 0001 .2byte 0x1 ... Disassembly of section .boot_section: -00001068 : - 1068: 1038 .2byte 0x1038 +00001070 : + 1070: 103c .2byte 0x103c ... Disassembly of section .got: -0000106c <_GLOBAL_OFFSET_TABLE_>: - 106c: 0000 .2byte 0x0 - 106e: 0000 .2byte 0x0 - 1070: 1038 .2byte 0x1038 - 1072: 0000 .2byte 0x0 - 1074: 1040 .2byte 0x1040 +00001074 <_GLOBAL_OFFSET_TABLE_>: + 1074: 0000 .2byte 0x0 + 1076: 0000 .2byte 0x0 + 1078: 103c .2byte 0x103c + 107a: 0000 .2byte 0x0 + 107c: 1048 .2byte 0x1048 ... Disassembly of section .got.plt: -00001078 <.got.plt>: - 1078: ffff .2byte 0xffff - 107a: ffff .2byte 0xffff - 107c: 0000 .2byte 0x0 +00001080 <.got.plt>: + 1080: ffff .2byte 0xffff + 1082: ffff .2byte 0xffff + 1084: 0000 .2byte 0x0 ... Disassembly of section .riscv.attributes: diff --git a/hw/system/spatz_cluster/test/bootrom.elf b/hw/system/spatz_cluster/test/bootrom.elf index 51a9616..7075551 100755 Binary files a/hw/system/spatz_cluster/test/bootrom.elf and b/hw/system/spatz_cluster/test/bootrom.elf differ diff --git a/sw/snRuntime/include/spatz_cluster_peripheral.h b/sw/snRuntime/include/spatz_cluster_peripheral.h index 508f7ca..8738c32 100644 --- a/sw/snRuntime/include/spatz_cluster_peripheral.h +++ b/sw/snRuntime/include/spatz_cluster_peripheral.h @@ -181,8 +181,17 @@ extern "C" { #define SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_REG_OFFSET 0x50 #define SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_SPATZ_CLUSTER_PROBE_BIT 0 +// Store cycle counts of kernels +#define SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_REG_OFFSET 0x58 +#define SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_SPATZ_CYC_MASK 0xffffffff +#define SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_SPATZ_CYC_OFFSET 0 +#define SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_SPATZ_CYC_FIELD \ + ((bitfield_field32_t){ \ + .mask = SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_SPATZ_CYC_MASK, \ + .index = SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_SPATZ_CYC_OFFSET}) + // Controls the cluster boot process. -#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_REG_OFFSET 0x58 +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_REG_OFFSET 0x60 #define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_MASK \ 0xffffffff #define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_OFFSET 0 @@ -193,11 +202,11 @@ extern "C" { SPATZ_CLUSTER_PERIPHERAL_CLUSTER_BOOT_CONTROL_ENTRY_POINT_OFFSET}) // End of computation and exit status register -#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_REG_OFFSET 0x60 +#define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_REG_OFFSET 0x68 #define SPATZ_CLUSTER_PERIPHERAL_CLUSTER_EOC_EXIT_EOC_EXIT_BIT 0 // Controls the configurations of L1 DCache SPM size. -#define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_REG_OFFSET 0x68 +#define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_REG_OFFSET 0x70 #define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_SPM_SIZE_MASK 0x3ff #define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_SPM_SIZE_OFFSET 0 #define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_SPM_SIZE_FIELD \ @@ -206,7 +215,7 @@ extern "C" { .index = SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_SPM_SPM_SIZE_OFFSET}) // Controls the L1 DCache flushing and invalidation. -#define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_REG_OFFSET 0x70 +#define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_REG_OFFSET 0x78 #define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_INSN_MASK 0x3 #define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_INSN_OFFSET 0 #define SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_INSN_FIELD \ @@ -215,15 +224,15 @@ extern "C" { .index = SPATZ_CLUSTER_PERIPHERAL_CFG_L1D_INSN_INSN_OFFSET}) // Controls the L1 DCache flushing and invalidation. -#define SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_REG_OFFSET 0x78 +#define SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_REG_OFFSET 0x80 #define SPATZ_CLUSTER_PERIPHERAL_L1D_SPM_COMMIT_COMMIT_BIT 0 // Controls the L1 DCache flushing and invalidation. -#define SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_REG_OFFSET 0x80 +#define SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_REG_OFFSET 0x88 #define SPATZ_CLUSTER_PERIPHERAL_L1D_INSN_COMMIT_COMMIT_BIT 0 // Indicate the status of flushing -#define SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_REG_OFFSET 0x88 +#define SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_REG_OFFSET 0x90 #define SPATZ_CLUSTER_PERIPHERAL_L1D_FLUSH_STATUS_STATUS_BIT 0 #ifdef __cplusplus diff --git a/sw/spatzBenchmarks/benchmark/benchmark.c b/sw/spatzBenchmarks/benchmark/benchmark.c index 5094acd..cbb136e 100644 --- a/sw/spatzBenchmarks/benchmark/benchmark.c +++ b/sw/spatzBenchmarks/benchmark/benchmark.c @@ -17,11 +17,11 @@ void start_kernel() { (uint32_t *)(_snrt_team_current->root->cluster_mem.end + SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_REG_OFFSET); *bench = 1; - snrt_start_perf_counter(SNRT_PERF_CNT0, SNRT_PERF_CNT_CYCLES, 0); + // snrt_start_perf_counter(SNRT_PERF_CNT0, SNRT_PERF_CNT_CYCLES, 0); } void stop_kernel() { - snrt_stop_perf_counter(SNRT_PERF_CNT0); + // snrt_stop_perf_counter(SNRT_PERF_CNT0); uint32_t *bench = (uint32_t *)(_snrt_team_current->root->cluster_mem.end + SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_REG_OFFSET); @@ -32,8 +32,17 @@ void stop_kernel() { size_t get_perf() { volatile uint32_t *perf = (uint32_t *)(_snrt_team_current->root->cluster_mem.end + - SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_REG_OFFSET); + SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_REG_OFFSET); // There is a constant delay of using performance counter for cycle recording // substract the constant delay - return (*perf-60); + return (*perf); +} + +void write_cyc(uint32_t cyc) { + volatile uint32_t *perf = + (uint32_t *)(_snrt_team_current->root->cluster_mem.end + + SPATZ_CLUSTER_PERIPHERAL_SPATZ_CYCLE_REG_OFFSET); + // There is a constant delay of using performance counter for cycle recording + // substract the constant delay + *perf = cyc; } diff --git a/sw/spatzBenchmarks/dp-faxpy/main.c b/sw/spatzBenchmarks/dp-faxpy/main.c index eb80aac..0cfb04c 100644 --- a/sw/spatzBenchmarks/dp-faxpy/main.c +++ b/sw/spatzBenchmarks/dp-faxpy/main.c @@ -61,6 +61,7 @@ int main() { // Reset timer unsigned int timer = (unsigned int)-1; + uint32_t timer_start, timer_end; const unsigned int dim = axpy_l.M; const unsigned int dim_core = dim / num_cores; @@ -99,19 +100,24 @@ int main() { if (cid == 0) start_kernel(); + timer_start = benchmark_get_cycle(); + // Call AXPY faxpy_v64b(*a, x_int, y_int, dim_core); // Wait for all cores to finish snrt_cluster_hw_barrier(); + timer_end = benchmark_get_cycle(); + // End dump if (cid == 0) stop_kernel(); // Check and display results if (cid == 0) { - timer = get_perf(); + timer = timer_end - timer_start; + write_cyc(timer); long unsigned int performance = 1000 * 2 * dim / timer; long unsigned int utilization = performance / (2 * num_cores * 4); #ifdef PRINT_CHECK diff --git a/sw/spatzBenchmarks/dp-fdotp/main.c b/sw/spatzBenchmarks/dp-fdotp/main.c index e59e1da..015490a 100644 --- a/sw/spatzBenchmarks/dp-fdotp/main.c +++ b/sw/spatzBenchmarks/dp-fdotp/main.c @@ -23,7 +23,8 @@ #include DATAHEADER #include "kernel/fdotp.c" -#define USE_CACHE +// #define USE_CACHE +// #define ENABLE_PRINT double *a; double *b; @@ -136,6 +137,7 @@ int main() { if (cid == 0) { timer_tmp = benchmark_get_cycle() - timer_tmp; timer = (timer < timer_tmp) ? timer : timer_tmp; + write_cyc(timer); } snrt_cluster_hw_barrier(); @@ -145,16 +147,19 @@ int main() { if (cid == 0) { long unsigned int performance = 1000 * 2 * dotp_l.M / timer; long unsigned int utilization = performance / (2 * num_cores * 4); - + #ifdef ENABLE_PRINT printf("\n----- (%d) dp fdotp -----\n", dotp_l.M); printf("The execution took %u cycles.\n", timer); printf("The performance is %ld OP/1000cycle (%ld%%o utilization).\n", performance, utilization); + #endif } if (cid == 0) if (fp_check(result[0], dotp_result*measure_iter)) { + #ifdef ENABLE_PRINT printf("Error: Result = %f, Golden = %f\n", result[0], dotp_result*measure_iter); + #endif return -1; } diff --git a/sw/spatzBenchmarks/dp-fmatmul-4x4vl/main.c b/sw/spatzBenchmarks/dp-fmatmul-4x4vl/main.c index 2666e41..dc193be 100644 --- a/sw/spatzBenchmarks/dp-fmatmul-4x4vl/main.c +++ b/sw/spatzBenchmarks/dp-fmatmul-4x4vl/main.c @@ -23,7 +23,8 @@ #include DATAHEADER #include "kernel/dp-fmatmul.c" -#define USE_CACHE +// #define USE_CACHE +// #define ENABLE_PRINT #ifndef KERNEL_SIZE #define KERNEL_SIZE 4 @@ -162,11 +163,13 @@ int main() { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; long unsigned int utilization = performance / (2 * num_cores * 4); - + write_cyc(timer); + #ifdef ENABLE_PRINT printf("\n----- (%dx%d) dp fmatmul -----\n", gemm_l.M, gemm_l.N); printf("The execution took %u cycles.\n", timer); printf("The performance is %ld OP/1000cycle (%ld%%o utilization).\n", performance, utilization); + #endif } if (cid == 0) { @@ -174,8 +177,10 @@ int main() { verify_matrix(c, (const double *)gemm_checksum, gemm_l.M, gemm_l.N); if (error != 0) { + #ifdef ENABLE_PRINT printf("Error core %d: c[%d]=%u\n", cid, error, (int)c[error]); return error; + #endif } } diff --git a/sw/spatzBenchmarks/dp-fmatmul-8x2vl/main.c b/sw/spatzBenchmarks/dp-fmatmul-8x2vl/main.c index 5e6b339..57ba387 100644 --- a/sw/spatzBenchmarks/dp-fmatmul-8x2vl/main.c +++ b/sw/spatzBenchmarks/dp-fmatmul-8x2vl/main.c @@ -23,7 +23,7 @@ #include DATAHEADER #include "kernel/dp-fmatmul.c" -// #define USE_CACHE +#define USE_CACHE #ifndef KERNEL_SIZE #define KERNEL_SIZE 8 @@ -149,6 +149,7 @@ int main() { if (cid == 0) { if (timer_temp < timer) { timer = timer_temp; + write_cyc(timer); } } @@ -161,9 +162,9 @@ int main() { long unsigned int performance = 1000 * 2 * gemm_l.M * gemm_l.N * gemm_l.K / timer; long unsigned int utilization = performance / (2 * num_cores * 4); - + uint32_t cyc = get_perf(); printf("\n----- (%dx%d) dp fmatmul -----\n", gemm_l.M, gemm_l.N); - printf("The execution took %u cycles.\n", timer); + printf("The execution took %u/%u cycles.\n", timer, cyc); printf("The performance is %ld OP/1000cycle (%ld%%o utilization).\n", performance, utilization); } diff --git a/sw/spatzBenchmarks/include/benchmark.h b/sw/spatzBenchmarks/include/benchmark.h index 888b71b..cf3af09 100644 --- a/sw/spatzBenchmarks/include/benchmark.h +++ b/sw/spatzBenchmarks/include/benchmark.h @@ -12,3 +12,4 @@ size_t benchmark_get_cycle(); void start_kernel(); void stop_kernel(); size_t get_perf(); +void write_cyc(uint32_t cyc);