add simba and simba_small architectures

KULeuven-MICAS · Nov 8, 2024 · 56b1175 · 56b1175
1 parent b8ba3ea
commit 56b1175
Show file tree

Hide file tree

Showing 7 changed files with 445 additions and 0 deletions.
diff --git a/stream/inputs/examples/hardware/cores/simba_chiplet.yaml b/stream/inputs/examples/hardware/cores/simba_chiplet.yaml
@@ -0,0 +1,114 @@
+name: simba_chiplet
+
+memories:
+
+  weight_registers:
+    size: 512  # 8 word-bits * 64 cluster_size
+    r_bw: 8
+    w_bw: 8
+    r_cost: 0.08  # TODO
+    w_cost: 0.08  # TODO
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    operands: [I2]  # Weights
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: []
+
+  weight_buffer:
+    size: 32768  # 4096 depth * 8 width
+    r_bw: 64  # 8 bits/bank * 8 banks
+    w_bw: 64
+    r_cost: 0.5  # TODO
+    w_cost: 0.5
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    operands: [I2]  # Weights
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D3, D4]
+
+  accumulation_buffer:
+    size: 3072   # 128 depth * 24 width
+    r_bw: 192  # partial sums are 24 bits * 8 units reading in parallel
+    w_bw: 192
+    r_cost: 0.1  # TODO
+    w_cost: 0.1
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    operands: [O]  # Partial sums
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_1
+        th: r_port_1
+    served_dimensions: [D3, D4]
+
+  input_buffer:
+    size: 524288   # 8192 depth * 64 width
+    r_bw: 64
+    w_bw: 64
+    r_cost: 7  # TODO
+    w_cost: 7  # TODO
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    operands: [I1]  # Input activations
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+    served_dimensions: [D3, D4]
+
+  global_buffer:
+    size: 2097152  # 2048 depth * 256 width * 4 banks
+    r_bw: 1024  # 256 bits width * 4 banks
+    w_bw: 1024
+    r_cost: 10  # Example cost, refine with more details
+    w_cost: 10
+    area: 0
+    r_port: 1
+    w_port: 1
+    rw_port: 0
+    latency: 1
+    operands: [I1, I2, O]  # Input activations, weights, partial sums
+    ports:
+      - fh: w_port_1
+        tl: r_port_1
+      - fh: w_port_1
+        tl: r_port_1
+      - fh: w_port_1
+        tl: r_port_1
+        fl: w_port_1
+        th: r_port_1
+    served_dimensions: [D1, D2, D3, D4]
+
+
+operational_array:
+  multiplier_energy: 0.04  # Refine with more accurate data if available
+  multiplier_area: 1  # unit
+  # D1/2 = 4x4 PE array. Each PE has 8 vector MACS (D3) that process 8 elements (D4) in parallel
+  dimensions: [D1, D2, D3, D4]
+  sizes: [4, 4, 8, 8]
+
+dataflows:
+  D1:
+    - K, 4
+  D2:
+    - C, 4
+  D3:
+    - K, 8
+  D4:
+    - C, 8
diff --git a/stream/inputs/examples/hardware/cores/simba_offchip.yaml b/stream/inputs/examples/hardware/cores/simba_offchip.yaml
@@ -0,0 +1,31 @@
+name: simba_offchip
+
+memories:
+  dram:
+    size: 10000000000
+    r_bw: 64
+    w_bw: 64
+    r_cost: 100
+    w_cost: 100
+    area: 0
+    r_port: 0
+    w_port: 0
+    rw_port: 1
+    latency: 1
+    operands: [I1, I2, O]
+    ports:
+      - fh: rw_port_1
+        tl: rw_port_1
+      - fh: rw_port_1
+        tl: rw_port_1
+      - fh: rw_port_1
+        tl: rw_port_1
+        fl: rw_port_1
+        th: rw_port_1
+    served_dimensions: [D1, D2]
+
+operational_array:
+  multiplier_energy: 0
+  multiplier_area: 0
+  dimensions: [D1, D2]
+  sizes: [0, 0]
diff --git a/stream/inputs/examples/hardware/simba.yaml b/stream/inputs/examples/hardware/simba.yaml
@@ -0,0 +1,107 @@
+name: simba_package
+
+cores:
+  # 36 simba chiplets
+  0: simba_chiplet.yaml
+  1: simba_chiplet.yaml
+  2: simba_chiplet.yaml
+  3: simba_chiplet.yaml
+  4: simba_chiplet.yaml
+  5: simba_chiplet.yaml
+  6: simba_chiplet.yaml
+  7: simba_chiplet.yaml
+  8: simba_chiplet.yaml
+  9: simba_chiplet.yaml
+  10: simba_chiplet.yaml
+  11: simba_chiplet.yaml
+  12: simba_chiplet.yaml
+  13: simba_chiplet.yaml
+  14: simba_chiplet.yaml
+  15: simba_chiplet.yaml
+  16: simba_chiplet.yaml
+  17: simba_chiplet.yaml
+  18: simba_chiplet.yaml
+  19: simba_chiplet.yaml
+  20: simba_chiplet.yaml
+  21: simba_chiplet.yaml
+  22: simba_chiplet.yaml
+  23: simba_chiplet.yaml
+  24: simba_chiplet.yaml
+  25: simba_chiplet.yaml
+  26: simba_chiplet.yaml
+  27: simba_chiplet.yaml
+  28: simba_chiplet.yaml
+  29: simba_chiplet.yaml
+  30: simba_chiplet.yaml
+  31: simba_chiplet.yaml
+  32: simba_chiplet.yaml
+  33: simba_chiplet.yaml
+  34: simba_chiplet.yaml
+  35: simba_chiplet.yaml
+offchip_core: simba_offchip.yaml
+
+core_connectivity:
+  # 2D mesh
+  - 0, 1
+  - 0, 6
+  - 1, 2
+  - 1, 7
+  - 2, 3
+  - 2, 8
+  - 3, 4
+  - 3, 9
+  - 4, 5
+  - 4, 10
+  - 5, 11
+  - 6, 7
+  - 6, 12
+  - 7, 8
+  - 7, 13
+  - 8, 9
+  - 8, 14
+  - 9, 10
+  - 9, 15
+  - 10, 11
+  - 10, 16
+  - 11, 17
+  - 12, 13
+  - 12, 18
+  - 13, 14
+  - 13, 19
+  - 14, 15
+  - 14, 20
+  - 15, 16
+  - 15, 21
+  - 16, 17
+  - 16, 22
+  - 17, 23
+  - 18, 19
+  - 18, 24
+  - 19, 20
+  - 19, 25
+  - 20, 21
+  - 20, 26
+  - 21, 22
+  - 21, 27
+  - 22, 23
+  - 22, 28
+  - 23, 29
+  - 24, 25
+  - 24, 30
+  - 25, 26
+  - 25, 31
+  - 26, 27
+  - 26, 32
+  - 27, 28
+  - 27, 33
+  - 28, 29
+  - 28, 34
+  - 29, 35
+  - 30, 31
+  - 31, 32
+  - 32, 33
+  - 33, 34
+  - 34, 35
+
+bandwidth: 32
+unit_energy_cost: 0
diff --git a/stream/inputs/examples/hardware/simba_small.yaml b/stream/inputs/examples/hardware/simba_small.yaml
@@ -0,0 +1,19 @@
+name: simba_package_small
+
+cores:
+  # 4 simba chiplets
+  0: simba_chiplet.yaml
+  1: simba_chiplet.yaml
+  2: simba_chiplet.yaml
+  3: simba_chiplet.yaml
+offchip_core: simba_offchip.yaml
+
+core_connectivity:
+  # 2D mesh
+  - 0, 1
+  - 0, 2
+  - 1, 3
+  - 2, 3
+
+bandwidth: 32
+unit_energy_cost: 0
diff --git a/stream/inputs/examples/mapping/simba.yaml b/stream/inputs/examples/mapping/simba.yaml
@@ -0,0 +1,55 @@
+- name: default
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - D, all
+  inter_core_tiling:
+    - K, *
+
+- name: Conv
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: Gemm
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - D, all
+  inter_core_tiling:
+    - H, *
+
+- name: Pool
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: MaxPool
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: AveragePool
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: GlobalAveragePool
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: Add
+  core_allocation: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+  intra_core_tiling:
+    - D, all
+  inter_core_tiling:
+    - H, *
diff --git a/stream/inputs/examples/mapping/simba_small.yaml b/stream/inputs/examples/mapping/simba_small.yaml
@@ -0,0 +1,56 @@
+- name: default
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - D, all
+  inter_core_tiling:
+    - K, *
+
+- name: Conv
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: Gemm
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - D, all
+  inter_core_tiling:
+    - H, *
+
+- name: Pool
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: MaxPool
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: AveragePool
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: GlobalAveragePool
+  core_allocation: [0, 1, 2, 3]
+  intra_core_tiling:
+    - OY, all
+  inter_core_tiling:
+    - K, *
+
+- name: Add
+  core_allocation: [0, 1, 2, 3]
+
+  intra_core_tiling:
+    - D, all
+  inter_core_tiling:
+    - H, *