Skip to content

Commit

Permalink
[SW] Merge cache-based and spm-based axpy kernel into a single file. …
Browse files Browse the repository at this point in the history
…Update method of getting cycle counter to support Flamingo.
  • Loading branch information
DiyouS committed Dec 12, 2024
1 parent 967b1c4 commit f7f6c42
Show file tree
Hide file tree
Showing 12 changed files with 55 additions and 2,539 deletions.
5 changes: 0 additions & 5 deletions sw/spatzBenchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ add_library(sdotp-bp-fmatmul sdotp-bp-fmatmul/kernel/sdotp-fmatmul.c)

add_library(dp-faxpy dp-faxpy/kernel/faxpy.c)

add_library(dp-faxpy-cache dp-faxpy-cache/kernel/faxpy.c)

add_library(dp-fdotp dp-fdotp/kernel/fdotp.c)

add_library(dp-fconv2d dp-fconv2d/kernel/fconv2d.c)
Expand Down Expand Up @@ -136,9 +134,6 @@ add_spatz_test_threeParam(sdotp-bp-fmatmul sdotp-bp-fmatmul/main.c 128 256 128)
add_spatz_test_oneParam(dp-faxpy dp-faxpy/main.c 256)
add_spatz_test_oneParam(dp-faxpy dp-faxpy/main.c 1024)

add_spatz_test_oneParam(dp-faxpy-cache dp-faxpy-cache/main.c 256)
add_spatz_test_oneParam(dp-faxpy-cache dp-faxpy-cache/main.c 1024)

add_spatz_test_oneParam(dp-fdotp dp-fdotp/main.c 128)
add_spatz_test_oneParam(dp-fdotp dp-fdotp/main.c 4096)

Expand Down
13 changes: 13 additions & 0 deletions sw/spatzBenchmarks/benchmark/benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "encoding.h"
#include "spatz_cluster_peripheral.h"
#include "team.h"
#include "perf_cnt.h"

extern __thread struct snrt_team *_snrt_team_current;

Expand All @@ -16,11 +17,23 @@ void start_kernel() {
(uint32_t *)(_snrt_team_current->root->cluster_mem.end +
SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_REG_OFFSET);
*bench = 1;
snrt_start_perf_counter(SNRT_PERF_CNT0, SNRT_PERF_CNT_CYCLES, 0);
}

void stop_kernel() {
snrt_stop_perf_counter(SNRT_PERF_CNT0);
uint32_t *bench =
(uint32_t *)(_snrt_team_current->root->cluster_mem.end +
SPATZ_CLUSTER_PERIPHERAL_SPATZ_STATUS_REG_OFFSET);
*bench = 0;
}


size_t get_perf() {
volatile uint32_t *perf =
(uint32_t *)(_snrt_team_current->root->cluster_mem.end +
SPATZ_CLUSTER_PERIPHERAL_PERF_COUNTER_0_REG_OFFSET);
// There is a constant delay of using performance counter for cycle recording
// substract the constant delay
return (*perf-60);
}
1,047 changes: 0 additions & 1,047 deletions sw/spatzBenchmarks/dp-faxpy-cache/data/data_1024.h

This file was deleted.

279 changes: 0 additions & 279 deletions sw/spatzBenchmarks/dp-faxpy-cache/data/data_256.h

This file was deleted.

145 changes: 0 additions & 145 deletions sw/spatzBenchmarks/dp-faxpy-cache/data/layer.h

This file was deleted.

100 changes: 0 additions & 100 deletions sw/spatzBenchmarks/dp-faxpy-cache/kernel/faxpy.c

This file was deleted.

29 changes: 0 additions & 29 deletions sw/spatzBenchmarks/dp-faxpy-cache/kernel/faxpy.h

This file was deleted.

Loading

0 comments on commit f7f6c42

Please sign in to comment.