Merge pull request #115 from PAICookers/feat-ann

Feature: support ANN deployment
PAICookers · Jul 22, 2024 · 504aaf1 · 504aaf1
2 parents 064beb0 + 36210ac
commit 504aaf1
Show file tree

Hide file tree

Showing 39 changed files with 1,722 additions and 1,250 deletions.
diff --git a/.github/workflows/poetry-publish.yml b/.github/workflows/poetry-publish.yml
@@ -27,6 +27,5 @@ jobs:
       - name: Publish python poetry package
         uses: JRubics/[email protected]
         with:
-          python_version: "3.11"
+          poetry_install_options: "--sync"
           pypi_token: ${{ secrets.PYPI_API_TOKEN }}
-          ignore_dev_requirements: "yes"
diff --git a/.github/workflows/pytest-ci.yml b/.github/workflows/pytest-ci.yml
@@ -31,7 +31,7 @@ jobs:
 
       - name: Install test dependencies
         run: |
-          poetry install --with test
+          poetry install --with test --sync
 
       - name: Run pytest
         uses: pavelzw/pytest-action@v2

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -46,7 +46,7 @@ repos:
         args: [--pytest-test-first]
       - id: requirements-txt-fixer
       - id: pretty-format-json
-        args: [--autofix]
+        args: [--autofix, --indent 2]
       - id: no-commit-to-branch
 
   - repo: https://github.com/python-poetry/poetry

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,7 +21,7 @@
 
 ## v1.0.0a6
 
-- 新增 `Always1Neuron` 神经元，该神经元将在工作期间持续输出1，不得单独存在，需存在前向突触与其连接。
+- 新增 `Always1Neuron` 神经元，该神经元将在工作期间持续输出1，不得单独存在，需存在前向突触与其连接
 
 ## v1.0.0a7
 
@@ -61,13 +61,18 @@
 
 - 支持神经元的随机突触整合、随机阈值、随机泄露配置的设置，但不支持仿真
 - 支持多芯片部署
-- 重构路由算法，现在的算法不会出现路由死锁
+- 重构路由算法，现在路由不会出现死锁
 - 行为变更：
 
   1. 子网络现在直接在主网络内部 `self.subnet=...` 例化即可
   2. 编译选项现在直接通过 `paibox.Mapper.compile(...)` 传入，默认配置不变
-  3. 在 `paibox.Mapper.export()` 中使用 `split_by_chip` 指定配置帧文件是否以芯片分割，默认不分割。原 `split_by_coord` 弃用。
+  3. 在 `paibox.Mapper.export()` 中使用 `split_by_chip` 指定配置帧文件是否以芯片分割，默认不分割。原 `split_by_coord` 弃用
 
 ## v1.1.1
 
 - 修复对权重RAM错误的配置
+
+## v1.2.0a1
+
+- 提高 `paicorelib` 依赖版本至 `~1.3`
+- 支持 ANN 网络的构建与部署
diff --git a/docs/Guide-of-PAIBox.md b/docs/Guide-of-PAIBox.md
@@ -1,3 +1,13 @@
+<style>
+.center
+{
+    width: auto;
+    display: table;
+    margin-left: auto;
+    margin-right: auto;
+}
+</style>
+
 <div align="center">
 # PAIBox使用指南
 
@@ -9,7 +19,7 @@
 python = "^3.9"
 pydantic = "^2.0.3"
 numpy = "^1.26.0"
-paicorelib = "^1.1.6"
+paicorelib = "~1.3"
 ```
 
 可选依赖：
@@ -67,11 +77,49 @@ n1 = pb.IF(shape=10, threshold=127, reset_v=0, neg_threshold=-100, keep_shape=Fa
 - `delay`：设定神经元输出的延迟。默认为1，即本时间步的计算结果，**下一时间步**传递至后继节点。
 - `tick_wait_start`：设定神经元启动时间。神经元将在第 `T` 个时间步时启动。0表示不启动。默认为1。
 - `tick_wait_end`：设定神经元持续工作时长。神经元将持续工作 `T` 个时间步。0表示**持续工作**。默认为0。
-- `unrolling_factor`：该参数与后端流程相关。展开因子表示神经元将被展开，部署至更多的物理核上，以降低延迟并提高吞吐率。
+- `unrolling_factor`：展开因子表示神经元将被展开，部署至更多的物理核上，以降低延迟并提高吞吐率。该参数仅与后端流程相关。默认为1。
 - `overflow_strict`：溢出严格模式。用于设置是否严格检查运算过程中神经元膜电位出现溢出的情况。若启用，遇到溢出将报错，否则将遵循硬件行为进行处理。默认为 `False`。
 - `keep_shape`：是否在仿真记录数据时保持尺寸信息，默认为 `True`。实际进行运算的尺寸仍视为一维。
 - `name`：神经元的名称。可选参数。
 
+神经元的部分行为由芯片计算核的某些配置项决定：输入数据位数、输出数据位数、SNN使能。芯片计算核的工作模式即由这些参数决定。例如，SNN模式则是输入数据、输出数据位数均为1bit，SNN使能为1。对应关系如下表所列：
+
+<p align="center">计算核配置项与工作模式对应表</p>
+<div class="center">
+
+|            模式             | `input_width` | `spike_width` | `snn_en` |
+| :-------------------------: | :-----------: | :-----------: | :------: |
+|            BANN             |       0       |       0       |    0     |
+|             SNN             |       0       |       0       |    1     |
+|       BANN/SNN to ANN       |       0       |       1       |    0     |
+| BANN/SNN to SNN with values |       0       |       1       |    1     |
+|       ANN to BANN/SNN       |       1       |       0       |    0     |
+|            BANN             |       1       |       1       |    0     |
+|          Undefined          |       1       |      0/1      |    1     |
+
+</div>
+
+- `input_width`：处理核输入数据位数，1或8。为1表示该处理核的输入数据为脉冲，反之为 8bit 无符号数。默认为1。
+- `spike_width`：神经元输出数据位数，1或8。为1表示该处理核输出数据（从神经元输出）为脉冲，反之为 8bit 无符号数。默认为1。
+- `snn_en`：SNN 模式使能。当开启时，神经元内的计算保留上一时刻膜电平信息，反之不保留（ANN 计算模式不需要上一时刻膜电平信息）。默认为 `True`。
+- `bit_truncation`：神经元输出的 8bit 无符号数的截断位置。默认为8，该参数仅在 `spike_width=8` 时生效。由于膜电平为 30bit 有符号数，因此需要截取 8bit 作为神经元最终的输出。若膜电平最高有效位大于所截取的位置，则输出255。该截断操作类似于有上限的斜率可调的 Relu 操作。`bit_truncation` 与截取位置的对应关系如下表所列：
+
+<p align="center">截取位置对应表</p>
+<div class="center">
+
+| `bit_truncation` |   截取位置    |
+| :--------------: | :-----------: |
+|        0         |     8'h0      |
+|        1         |  {[0], 7'h0}  |
+|        2         | {[1:0], 6'h0} |
+|        ……        |      ……       |
+|        8         |     [7:0]     |
+|        9         |     [8:1]     |
+|        ……        |      ……       |
+|        29        |    [28:21]    |
+
+</div>
+
 #### LIF
 
 LIF 神经元实现了“泄露-积分-发射”神经元模型，其调用方式及参数如下：
@@ -912,7 +960,7 @@ mapper.clear()
 
 - `input`：输入节点信息字典。
 - `output`：输出目的地信息字典。
-- `memebers`：中间层所在物理核的配置项字典。
+- `members`：中间层所在物理核的配置项字典。
 - `inherent_timestep`：网络的最长时间步。
 - `n_core_required`：网络**需要**的物理核数目。
 - `n_core_occupied`：网络**实际占用**的物理核数目。

diff --git a/paibox/backend/checker.py b/paibox/backend/checker.py
@@ -1,5 +1,5 @@
 from paicorelib import LCN_EX
-from paicorelib import WeightPrecision as WP
+from paicorelib import WeightWidth as WW
 
 __all__ = ["ConfigChecker"]
 
@@ -10,7 +10,7 @@ class _Checker:
 
 class ConfigChecker(_Checker):
     @staticmethod
-    def n_config_estimate(n_neuron: int, wp: WP, lcn_ex: LCN_EX) -> int:
+    def n_config_estimate(n_neuron: int, wp: WW, lcn_ex: LCN_EX) -> int:
         _base = n_neuron * (1 << wp) * (1 << lcn_ex)
 
         n_total = 3 + 3 + (1 + 4 * _base) + (1 + 18 * _base)

diff --git a/paibox/backend/conf_template.py b/paibox/backend/conf_template.py
@@ -1,9 +1,10 @@
 import sys
 from collections import defaultdict
+from collections.abc import Sequence
 from dataclasses import asdict, dataclass
 from enum import Enum
 from pathlib import Path
-from typing import Any, ClassVar, NamedTuple, TypedDict, Union
+from typing import Any, NamedTuple, TypedDict, Union
 
 import numpy as np
 from numpy.typing import NDArray
@@ -16,6 +17,7 @@
     InputWidthFormat,
     MaxPoolingEnable,
     NeuronAttrs,
+    NeuronConf,
     NeuronDestInfo,
     ParamsReg,
 )
@@ -24,25 +26,23 @@
     RoutingCoord,
     SNNModeEnable,
     SpikeWidthFormat,
-    WeightPrecision,
+    WeightWidth,
     get_replication_id,
 )
+from paicorelib.framelib import OfflineFrameGen
 from paicorelib.framelib import types as flib_types
-from paicorelib.framelib.frame_gen import OfflineFrameGen
 from paicorelib.framelib.utils import _mask, np2bin, np2npy, np2txt
 
 if sys.version_info >= (3, 10):
     from typing import TypeAlias
 else:
     from typing_extensions import TypeAlias
 
-from typing_extensions import NotRequired
-
 from paibox.components import Neuron
-from paibox.utils import bit_reversal
+from paibox.utils import reverse_8bit
 
 from .context import _BACKEND_CONTEXT
-from .types import AxonCoord, NeuSegment, NodeName
+from .types import AxonCoord, NeuSegment, NodeName, WRAMPackedType
 
 try:
     import orjson
@@ -99,7 +99,7 @@ class CoreConfig(NamedTuple):
     """Extra parameters for debugging."""
 
     name: str
-    weight_precision: WeightPrecision
+    weight_width: WeightWidth
     lcn_extension: LCN_EX
     input_width_format: InputWidthFormat
     spike_width_format: SpikeWidthFormat
@@ -176,16 +176,6 @@ class OutputNeuronDest(NamedTuple):
     end: AxonCoord
 
 
-try:
-    from paicorelib.ram_model import NeuronConf as _NeuronConf
-except ImportError:
-    from pydantic import BaseModel
-
-    class _NeuronConf(BaseModel):
-        attrs: NeuronAttrs
-        dest_info: NeuronDestInfo
-
-
 class NeuronConfig(NamedTuple):
     _extra_params = (
         "n_neuron",
@@ -240,8 +230,8 @@ def encapsulate(
             neu_seg.n_neuron, neu_seg.addr_ram, neu_seg.offset, attrs, neuron_dest_info
         )
 
-    def export(self) -> _NeuronConf:
-        return _NeuronConf(attrs=self.neuron_attrs, dest_info=self.neuron_dest_info)
+    def export(self) -> NeuronConf:
+        return NeuronConf(attrs=self.neuron_attrs, dest_info=self.neuron_dest_info)
 
     def to_json(self) -> Union[str, bytes]:
         """Dump the configs into json for debugging."""
@@ -261,23 +251,23 @@ class CorePlmConfig(NamedTuple):
     """Extra parameters for debugging."""
 
     random_seed: int
-    weight_ram: NDArray[np.uint64]
+    weight_ram: WRAMPackedType
     params_reg: ParamsReg
     neuron_configs: dict[Neuron, NeuronConfig]
 
     @classmethod
     def encapsulate(
         cls,
         random_seed: int,
-        weight_ram: NDArray[np.uint64],
-        core_config: CoreConfig,
-        neuron_configs: dict[Neuron, NeuronConfig],
+        weight_ram: WRAMPackedType,
+        core_cfg: CoreConfig,
+        neuron_cfg: dict[Neuron, NeuronConfig],
     ):
         return cls(
             random_seed,
             weight_ram,
-            ParamsReg.model_validate(core_config._asdict(), strict=True),
-            neuron_configs,
+            ParamsReg.model_validate(core_cfg._asdict(), strict=True),
+            neuron_cfg,
         )
 
     def export(self) -> dict[str, Any]:
@@ -288,11 +278,11 @@ def export(self) -> dict[str, Any]:
             **self.params_reg.model_dump(by_alias=True),
         }
 
-        for neu, neu_config in self.neuron_configs.items():
+        for neu, neu_cfg in self.neuron_configs.items():
             if _USE_ORJSON:
-                dict_["neuron_rams"][neu.name] = orjson.loads(neu_config.to_json())
+                dict_["neuron_rams"][neu.name] = orjson.loads(neu_cfg.to_json())
             else:
-                dict_["neuron_rams"][neu.name] = json.loads(neu_config.to_json())
+                dict_["neuron_rams"][neu.name] = json.loads(neu_cfg.to_json())
 
         return dict_
 
@@ -306,23 +296,6 @@ def to_json(self) -> dict[str, Any]:
         return dict_
 
 
-class EmptyCorePlmConfig(CorePlmConfig):
-    _default_seed: ClassVar[int] = 0
-    _default_zero_wram: ClassVar[NDArray[np.uint64]] = np.zeros(
-        (HwConfig.ADDR_RAM_MAX, 18), dtype=np.uint64
-    )
-    _default_neuron_conf = {}  # don't care
-
-    @classmethod
-    def encapsulate(cls, core_config: CoreConfig):
-        return cls(
-            cls._default_seed,
-            cls._default_zero_wram,
-            ParamsReg.model_validate(core_config._asdict(), strict=True),
-            cls._default_neuron_conf,
-        )
-
-
 InputNodeConf: TypeAlias = dict[NodeName, InputNeuronDest]
 OutputDestConf: TypeAlias = dict[NodeName, dict[CoordAddr, NeuronDestInfo]]
 CorePlmConfInChip: TypeAlias = dict[Coord, CorePlmConfig]
@@ -357,7 +330,7 @@ def gen_config_frames_by_coreconf(
     write_to_file: bool,
     fp: Path,
     split_by_chip: bool,
-    formats: list[str],
+    formats: Sequence[str],
 ) -> dict[ChipCoord, list[FrameArrayType]]:
     """Generate configuration frames by given the `CorePlmConfig`."""
 
@@ -386,19 +359,32 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
             )
 
             # 3. Iterate all the neuron segments inside the physical core.
+            # FIXME Unfortunately, at present, only the corresponding NRAM can be written based on
+            # the neuron configurations, and it cannot handle the case where the NRAM address is >= 512,
+            # that is, some neurons need to occupy the NRAM, which is inconsistent with the current logic.
+            # Additional neuron configurations has been written to the NRAM within the CorePlacement.
+            # NOTE The meaning of 'n_neuron' in function 'gen_config_frame3' is the number of neurons in
+            # the NRAM. See notes of function '_weight_ram_mapping' of `CorePlacement` in file
+            # backend/placement.py for details.
             config_frame_type3 = []
             for neu_conf in v.neuron_configs.values():
+                # The actual number of neurons placed in NRAM.
+                _n_neuron_nram = (
+                    HwConfig.ADDR_RAM_MAX + 1
+                    if neu_conf.n_neuron > HwConfig.ADDR_RAM_MAX + 1
+                    else neu_conf.n_neuron
+                )
+
                 config_frame_type3.append(
                     OfflineFrameGen.gen_config_frame3(
                         chip_coord,
                         core_coord,
                         _RID_UNSET,
                         neu_conf.addr_offset,
-                        neu_conf.n_neuron,
+                        _n_neuron_nram,
                         neu_conf.neuron_attrs,
                         neu_conf.neuron_dest_info,
-                        lcn_ex=v.params_reg.lcn_extension,
-                        weight_precision=v.params_reg.weight_precision,
+                        v.params_reg.n_repeat_nram,
                     )
                 )
 
@@ -412,17 +398,18 @@ def _write_to_f(name: str, array: FrameArrayType) -> None:
                 frame3 = np.array([], dtype=FRAME_DTYPE)
 
             # 4. Only one config frame type IV for each physical core.
-            n_addr_write = v.params_reg.num_dendrite  # The number of address to write
-            if n_addr_write > 0:
+            # NOTE To avoid logical complications, write the entire weights to the WRAM, rather than just the
+            # valid partial weights, because there are still some neurons configurations in the WRAM.
+            if v.params_reg.num_dendrite > 0:
                 config_frame_type4 = OfflineFrameGen.gen_config_frame4(
                     chip_coord,
                     core_coord,
                     _RID_UNSET,
                     0,
-                    18 * n_addr_write,
-                    v.weight_ram[:n_addr_write],
+                    18 * (HwConfig.ADDR_RAM_MAX + 1),
+                    v.weight_ram[: HwConfig.ADDR_RAM_MAX + 1],
                 )
-            else:
+            else:  # empty core placement
                 config_frame_type4 = None
 
             if config_frame_type4:
@@ -603,7 +590,7 @@ def to_clk_en_L2_u8(L2_inchip: list[RoutingCoord]) -> list[int]:
         for _ in range(8):
             u8 = bitmap & _mask(8)
             bitmap >>= 8
-            clk_en.append(bit_reversal(u8))
+            clk_en.append(reverse_8bit(u8))
 
         return clk_en