Merge pull request #1 from JiacongSun/master

Update: restructure the zigzag stages and add api for zigzag-imc. Fix several issues regarding D3.
KULeuven-MICAS · Nov 13, 2023 · 5d71984 · 5d71984
2 parents dd0bea5 + 012ad37
commit 5d71984
Show file tree

Hide file tree

Showing 142 changed files with 2,136 additions and 1,330 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,7 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
+*.pyc
 *$py.class
 .idea
 
@@ -153,3 +154,6 @@ html/
 
 # cacti cache files
 zigzag/classes/cacti/cacti_master/self_gen/
+
+# debug file
+debug.py
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/README.md b/README.md
@@ -22,6 +22,8 @@ The repository includes three examples provided in the `run.sh` script, which ar
 
 The output will be saved under `outputs/` folder.
 
+API is also created in zigzag/api.py, which can be directly called externally.
+
 **Note**: Since CACTI is not supported on Windows, please run these example on Linuxs, or else user-defined memory cost value should be provided in the input hardware file.
 
 ## New features

diff --git a/main_onnx.py b/main_onnx.py
@@ -31,11 +31,11 @@
     AcceleratorParserStage,  # Parses the accelerator
     # PickleSaveStage, # Save CMEs to a pickle file
     CompleteSaveStage,  # Saves all received CMEs information to a json
-    SearchNoUseMemStage, # Detect unnecessary memory instances
+    SearchUnusedMemoryStage, # Detect unnecessary memory instances
     WorkloadStage,  # Iterates through the different layers in the workload
-    RemoveNoUseMemStage, # Remove unnecessary memory instances
+    RemoveUnusedMemoryStage, # Remove unnecessary memory instances
     MinimalEDPStage, # Reduces all CMEs, returning minimal EDP one
-    SpatialMappingAutoGeneratorStage,  # Generates multiple spatial mappings (SM)
+    SpatialMappingGeneratorStage,  # Generates multiple spatial mappings (SM)
     LomaStage,  # Generates multiple temporal mappings (TM)
     CostModelStage  # Evaluates generated SM and TM through cost model
 ],
@@ -46,9 +46,9 @@
     pickle_filename=f"outputs/{experiment_id}-layer_?.pkl",  # output file save pattern
     loma_lpf_limit=6,  # required by LomaStage
     loma_show_progress_bar=True,  # shows a progress bar while iterating over temporal mappings
-    enable_mix_sm=True,  # True: enable generating mix spatial mapping. False: single layer dim mapping during the autogeneration
-    enable_speedup=False,  # True: only keep 3 sm with the highest hardware utilization to speedup simulation time
-    enable_ox_unroll=True,  # True: enable OX/OY unrolling when automatically generating sm
+    enable_mix_spatial_mapping_generation=True,  # True: enable generating mix spatial mapping. False: single layer dim mapping during the autogeneration
+    maximize_hardware_utilization=False,  # True: only keep 2 sm with the highest hardware utilization to speedup simulation time
+    enable_weight_diagonal_mapping=True,  # True: enable OX/OY unrolling when automatically generating sm
 )
 
 # Launch the MainStage

diff --git a/zigzag/__pycache__/__init__.cpython-310.pyc b/zigzag/__pycache__/__init__.cpython-310.pyc
diff --git a/zigzag/__pycache__/__init__.cpython-311.pyc b/zigzag/__pycache__/__init__.cpython-311.pyc
diff --git a/zigzag/__pycache__/utils.cpython-310.pyc b/zigzag/__pycache__/utils.cpython-310.pyc
diff --git a/zigzag/__pycache__/utils.cpython-311.pyc b/zigzag/__pycache__/utils.cpython-311.pyc
diff --git a/zigzag/api.py b/zigzag/api.py
@@ -47,7 +47,9 @@ def get_hardware_performance_zigzag(
             SimpleSaveStage,  # Save the summed CME energy and latency to a json
             PickleSaveStage,  # Save all received CMEs in a list to a pickle file
             SumStage,  # Sum up the received best CME across all layers of the workload
+            SearchUnusedMemoryStage,  # Detect unnecessary memory instances
             WorkloadStage,  # Iterate through the different layers in the workload
+            RemoveUnusedMemoryStage,  # Remove unnecessary memory instances
             CompleteSaveStage,  # Save each processed layer to a json
             opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
             SpatialMappingGeneratorStage,  # Generate multiple spatial mappings (SM)
@@ -62,6 +64,9 @@ def get_hardware_performance_zigzag(
         dump_filename_pattern=dump_filename_pattern,  # output file save pattern
         pickle_filename=pickle_filename,  # filename for pickled list of cmes
         loma_lpf_limit=6,  # required by LomaStage
+        enable_mix_spatial_mapping_generation=True,  # enable auto-generation of mix spatial mapping
+        maximize_hardware_utilization=True, # only evaluate spatial mapping with top2 utilization (fast simulation)
+        enable_weight_diagonal_mapping=True,  # required by SpatialMappingGeneratorStage
         loma_show_progress_bar=True,
         # If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
         # take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
@@ -76,85 +81,6 @@ def get_hardware_performance_zigzag(
 
     return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes
 
-
-def get_hardware_performance_zigzag_pe_array_scaling(
-    workload,
-    accelerator,
-    mapping,
-    pe_array_scaling,
-    opt="latency",
-    dump_filename_pattern="outputs/{datetime}.json",
-    pickle_filename="outputs/list_of_cmes.pickle",
-):
-    # Initialize the logger
-    import logging as _logging
-
-    _logging_level = _logging.INFO
-    _logging_format = (
-        "%(asctime)s - %(funcName)s +%(lineno)s - %(levelname)s - %(message)s"
-    )
-    _logging.basicConfig(level=_logging_level, format=_logging_format)
-
-    # Sanity check on the optimization criterion
-    if opt == "energy":
-        opt_stage = MinimalEnergyStage
-    elif opt == "latency":
-        opt_stage = MinimalLatencyStage
-    elif opt == "EDP":
-        opt_stage = MinimalEDPStage
-    else:
-        raise NotImplementedError(
-            "Optimization criterion 'opt' should be either 'energy' or 'latency' or 'EDP'."
-        )
-
-    # Check workload format and based on it select the correct workload parser stage
-    try:
-        if workload.split(".")[-1] == "onnx":
-            workload_parser_stage = ONNXModelParserStage
-        else:
-            workload_parser_stage = WorkloadParserStage
-    except:
-        workload_parser_stage = WorkloadParserStage
-
-    mainstage = MainStage(
-        [  # Initialize the MainStage as entry point
-            workload_parser_stage,  # Parse the ONNX Model into the workload
-            AcceleratorParserStage,  # Parse the accelerator module/passthrough given accelerator
-            PEArrayScalingStage,  # Scale the PE array of the given accelerator
-            SimpleSaveStage,  # Save the summed CME energy and latency to a json
-            PickleSaveStage,  # Save all received CMEs in a list to a pickle file
-            SumStage,  # Sum up the received best CME across all layers of the workload
-            WorkloadStage,  # Iterate through the different layers in the workload
-            CompleteSaveStage,  # Save each processed layer to a json
-            opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
-            SpatialMappingGeneratorStage,  # Generate multiple spatial mappings (SM)
-            opt_stage,  # Reduce all CMEs, returning minimal energy/latency one
-            LomaStage,  # Generate multiple temporal mappings (TM)
-            # TemporalOrderingConversionStage,  # Based on the fixed temporal mapping order, generate one temporal mapping (TM)
-            CostModelStage,  # Evaluate generated SM and TM through cost model
-        ],
-        accelerator=accelerator,  # required by AcceleratorParserStage
-        workload=workload,  # required by workload_parser_stage
-        mapping=mapping,  # required by workload_parser_stage
-        dump_filename_pattern=dump_filename_pattern,  # output file save pattern
-        pickle_filename=pickle_filename,  # filename for pickled list of cmes
-        loma_lpf_limit=6,  # required by LomaStage
-        loma_show_progress_bar=True,
-        # If we need access the same input data multiple times from the innermost memory level and the data size is smaller than the memory read bw,
-        # take into account only one-time access cost (assume the data can stay at the output pins of the memory as long as it is needed).
-        # By default, if the parameter is not defined, it will be set as False internally.
-        access_same_data_considered_as_no_access=True,
-        pe_array_scaling=pe_array_scaling,
-    )
-
-    # Launch the MainStage
-    answers = mainstage.run()
-    # Get CME from answer
-    cmes = answers
-
-    return cmes[0][0].energy_total, cmes[0][0].latency_total2, cmes
-
-
 if __name__ == "__main__":
     workload = "zigzag/inputs/examples/workload/mobilenetv2.onnx"
     # workload = 'inputs.examples.workload.resnet18'
@@ -168,11 +94,10 @@ def get_hardware_performance_zigzag_pe_array_scaling(
     experiment_id = f"{hw_name}-{wl_name}"
     pkl_name = f"{experiment_id}-saved_list_of_cmes"
 
-    answer = get_hardware_performance_zigzag_pe_array_scaling(
+    answer = get_hardware_performance_zigzag(
         workload,
         accelerator,
         mapping,
-        pe_array_scaling=2,
         opt="EDP",
         dump_filename_pattern=f"outputs/{experiment_id}-layer_?.json",
         pickle_filename=f"outputs/{pkl_name}.pickle",

diff --git a/zigzag/classes/__pycache__/__init__.cpython-310.pyc b/zigzag/classes/__pycache__/__init__.cpython-310.pyc
diff --git a/zigzag/classes/__pycache__/__init__.cpython-311.pyc b/zigzag/classes/__pycache__/__init__.cpython-311.pyc
diff --git a/zigzag/classes/cacti/__pycache__/cacti_parser.cpython-310.pyc b/zigzag/classes/cacti/__pycache__/cacti_parser.cpython-310.pyc
diff --git a/zigzag/classes/cacti/__pycache__/cacti_parser.cpython-311.pyc b/zigzag/classes/cacti/__pycache__/cacti_parser.cpython-311.pyc
diff --git a/zigzag/classes/cost_model/__pycache__/cost_model.cpython-310.pyc b/zigzag/classes/cost_model/__pycache__/cost_model.cpython-310.pyc
diff --git a/zigzag/classes/cost_model/__pycache__/cost_model.cpython-311.pyc b/zigzag/classes/cost_model/__pycache__/cost_model.cpython-311.pyc
diff --git a/zigzag/classes/cost_model/__pycache__/cost_model_in_sram_computing.cpython-310.pyc b/zigzag/classes/cost_model/__pycache__/cost_model_in_sram_computing.cpython-310.pyc
diff --git a/zigzag/classes/cost_model/__pycache__/cost_model_in_sram_computing.cpython-311.pyc b/zigzag/classes/cost_model/__pycache__/cost_model_in_sram_computing.cpython-311.pyc
diff --git a/zigzag/classes/cost_model/cost_model.py b/zigzag/classes/cost_model/cost_model.py
@@ -211,12 +211,14 @@ def __init__(
         accelerator,
         layer,
         spatial_mapping,
+        spatial_mapping_int,
         temporal_mapping,
         access_same_data_considered_as_no_access=True,
     ):
         self.accelerator = accelerator
         self.layer = layer
         self.spatial_mapping = spatial_mapping
+        self.spatial_mapping_int = spatial_mapping_int  # the original spatial mapping without decimal
         self.temporal_mapping = temporal_mapping
         self.access_same_data_considered_as_no_access = (
             access_same_data_considered_as_no_access
@@ -246,9 +248,10 @@ def __init__(
 
         """ generate the integer spatial mapping from fractional spatial mapping (due to greedy mapping support).
         Later the fractional one is used for calculating energy, and the integer one is used for calculating latency"""
-        self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int(
-            self.spatial_mapping.mapping_dict_origin
-        )
+        # self.spatial_mapping_dict_int = spatial_mapping_fractional_to_int(
+        #     self.spatial_mapping.mapping_dict_origin
+        # )
+        self.spatial_mapping_dict_int = self.spatial_mapping_int
 
         # For constructing Mapping object,  the last parameter "self.access_same_data_considered_as_no_access" is optional
         self.mapping = Mapping(