From 0227f5499f121e98392dfce190bc374a672d9f4b Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Tue, 3 Sep 2024 14:33:45 +0200 Subject: [PATCH 1/7] cleanup model parser code --- .../classes/hardware/architecture/noc/bus.py | 16 +- .../hardware/architecture/noc/mesh_2d.py | 2 +- stream/classes/io/accelerator_factory.py | 13 +- stream/classes/io/accelerator_validator.py | 3 +- stream/classes/io/onnx/concat.py | 4 +- stream/classes/io/onnx/conv.py | 25 +-- stream/classes/io/onnx/default.py | 5 +- stream/classes/io/onnx/elementwise.py | 5 +- stream/classes/io/onnx/flatten.py | 4 +- stream/classes/io/onnx/gather.py | 4 +- stream/classes/io/onnx/gemm.py | 14 +- stream/classes/io/onnx/lpnormalization.py | 5 +- stream/classes/io/onnx/matmul.py | 49 +---- stream/classes/io/onnx/model.py | 193 ++++++------------ stream/classes/io/onnx/operator_parser.py | 29 +++ stream/classes/io/onnx/pooling.py | 28 +-- stream/classes/io/onnx/reshape.py | 6 +- stream/classes/io/onnx/simd.py | 68 ++---- stream/classes/io/onnx/softmax.py | 103 ++++++++++ stream/classes/io/onnx/transpose.py | 5 +- stream/classes/workload/elementwise_node.py | 1 - stream/utils.py | 22 +- 22 files changed, 280 insertions(+), 324 deletions(-) create mode 100644 stream/classes/io/onnx/operator_parser.py create mode 100644 stream/classes/io/onnx/softmax.py diff --git a/stream/classes/hardware/architecture/noc/bus.py b/stream/classes/hardware/architecture/noc/bus.py index 7f9fbfa..874d53c 100644 --- a/stream/classes/hardware/architecture/noc/bus.py +++ b/stream/classes/hardware/architecture/noc/bus.py @@ -5,7 +5,7 @@ from stream.classes.hardware.architecture.noc.communication_link import CommunicationLink -def have_shared_memory(a, b): +def have_shared_memory(a: Core, b: Core): """Returns True if core a and core b have a shared top level memory Args: @@ -25,12 +25,12 @@ def have_shared_memory(a, b): def get_bus( - cores, - bandwidth, - unit_energy_cost, - pooling_core=None, - simd_core=None, - offchip_core=None, + cores: list[Core], + bandwidth: int, + unit_energy_cost: float, + pooling_core: Core | None = None, + simd_core: Core | None = None, + offchip_core: Core | None = None, ): """Return a graph of the cores where each core is connected to a single bus. @@ -46,7 +46,7 @@ def get_bus( """ bus = CommunicationLink("Any", "Any", bandwidth, unit_energy_cost) - edges = [] + edges: list[tuple[Core, Core, dict[str, CommunicationLink]]] = [] pairs = [(a, b) for idx, a in enumerate(cores) for b in cores[idx + 1 :]] for pair in pairs: (sender, receiver) = pair diff --git a/stream/classes/hardware/architecture/noc/mesh_2d.py b/stream/classes/hardware/architecture/noc/mesh_2d.py index f3731e2..e518255 100644 --- a/stream/classes/hardware/architecture/noc/mesh_2d.py +++ b/stream/classes/hardware/architecture/noc/mesh_2d.py @@ -35,7 +35,7 @@ def get_2d_mesh( simd_core: Core | None = None, offchip_core: Core | None = None, ): - """Return a 2D mesh graph of the cores where each core is connected to its N, E, S, W neighbour. + """Return a 2D mesh graph of the cores where each core is connected to its N, E, S, W neighbor. We build the mesh by iterating through the row and then moving to the next column. Each connection between two cores includes two links, one in each direction, each with specified bandwidth. Thus there are a total of ((nb_cols-1)*2*nb_rows + (nb_rows-1)*2*nb_cols) links in the noc. diff --git a/stream/classes/io/accelerator_factory.py b/stream/classes/io/accelerator_factory.py index f3e9aea..0fd6a71 100644 --- a/stream/classes/io/accelerator_factory.py +++ b/stream/classes/io/accelerator_factory.py @@ -23,12 +23,13 @@ def create(self) -> Accelerator: core = core_factory.create(core_id) cores.append(core) - if self.data["graph"]["type"] == "2d_mesh": - cores_graph = self.create_2d_mesh(cores) - elif self.data["graph"]["type"] == "bus": - cores_graph = self.create_bus(cores) - else: - raise ValueError(f"Invalid graph type {self.data['graph']['type']}.") + match self.data["graph"]["type"]: + case "2d_mesh": + cores_graph = self.create_2d_mesh(cores) + case "bus": + cores_graph = self.create_bus(cores) + case _: + raise ValueError(f"Invalid graph type {self.data['graph']['type']}.") offchip_core_id: int | None = self.data["graph"]["offchip_core_id"] return Accelerator(name=self.data["name"], cores=cores_graph, offchip_core_id=offchip_core_id) diff --git a/stream/classes/io/accelerator_validator.py b/stream/classes/io/accelerator_validator.py index cba7dc8..37d9639 100644 --- a/stream/classes/io/accelerator_validator.py +++ b/stream/classes/io/accelerator_validator.py @@ -11,6 +11,7 @@ class AcceleratorValidator: INPUT_DIR_LOCATION = "stream/inputs/" + GRAPH_TYPES = ["2d_mesh", "bus"] SCHEMA = { "name": {"type": "string", "required": True}, @@ -26,7 +27,7 @@ class AcceleratorValidator: "type": "dict", "required": True, "schema": { - "type": {"type": "string", "required": True}, + "type": {"type": "string", "required": True, "allowed": GRAPH_TYPES}, "nb_rows": {"type": "integer", "required": False}, "nb_cols": {"type": "integer", "required": False}, "bandwidth": {"type": "integer", "required": True}, diff --git a/stream/classes/io/onnx/concat.py b/stream/classes/io/onnx/concat.py index 71851c8..d5bf2bd 100644 --- a/stream/classes/io/onnx/concat.py +++ b/stream/classes/io/onnx/concat.py @@ -1,10 +1,10 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.parser.onnx.utils import OnnxTensorCategory, get_onnx_tensor_type +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.concat_node import ConcatNode -class ConcatParser(ONNXOperatorParser): +class ConcatParser(OnnxOperatorParser): """Parses an onnx gather operator into a ConcatNode.""" def run(self): diff --git a/stream/classes/io/onnx/conv.py b/stream/classes/io/onnx/conv.py index 8c03018..9f55e43 100644 --- a/stream/classes/io/onnx/conv.py +++ b/stream/classes/io/onnx/conv.py @@ -2,37 +2,22 @@ from math import ceil from typing import Any -from onnx import ModelProto, NodeProto -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.parser.onnx.utils import ( get_attribute_ints_with_name, get_node_input_output_dimension_shapes, ) from zigzag.parser.workload_factory import LayerNodeFactory -from stream.classes.hardware.architecture.accelerator import Accelerator +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.computation_node import ComputationNode logger = logging.getLogger(__name__) -class ConvParser(ONNXOperatorParser): +class ConvParser(OnnxOperatorParser): """Parser for ONNX Conv and QLinearConv nodes into LayerNode.""" - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping_data: list[dict[str, Any]], - onnx_model: ModelProto, - accelerator: Accelerator, - ) -> None: - super().__init__(node_id, node, nodes_outputs, onnx_model) - self.onnx_model = onnx_model - self.mapping_data = mapping_data - self.accelerator = accelerator - self.op_type = "conv" + OP_TYPE = "conv" def run(self) -> ComputationNode: """Run the parser and return the created LayerNode object.""" @@ -57,7 +42,7 @@ def get_layer_node_input_format( data: dict[str, Any] = {} data["id"] = self.node_id data["name"] = f"Layer{self.node_id}" - data["operator_type"] = self.op_type + data["operator_type"] = ConvParser.OP_TYPE # IMPORTANT: If any of the input loops require padding, they should be defined as the rightmost dimensions in # the equation. This is because we construct the dimensionality order and then add the padding to those last # dimensions in the order @@ -168,6 +153,6 @@ def generate_layer_node_for_conv(self): node_attr=node_attrs, input_names=node_input_names, output_names=node_output_names, - op_type=self.op_type, + op_type=ConvParser.OP_TYPE, operand_tensor_reshape=None, ) diff --git a/stream/classes/io/onnx/default.py b/stream/classes/io/onnx/default.py index cf47fac..31ba679 100644 --- a/stream/classes/io/onnx/default.py +++ b/stream/classes/io/onnx/default.py @@ -1,9 +1,8 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser - +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.dummy_node import DummyNode -class DefaultNodeParser(ONNXOperatorParser): +class DefaultNodeParser(OnnxOperatorParser): """Parse an ONNX node into a DummyNode.""" def run(self): diff --git a/stream/classes/io/onnx/elementwise.py b/stream/classes/io/onnx/elementwise.py index 01d9e99..70c8065 100644 --- a/stream/classes/io/onnx/elementwise.py +++ b/stream/classes/io/onnx/elementwise.py @@ -1,9 +1,8 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser - +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.elementwise_node import ElementwiseNode -class ElementwiseParser(ONNXOperatorParser): +class ElementwiseParser(OnnxOperatorParser): """Parser for onnx operators that perform an elementwise operation on two input tensors into a single output tensor. For example, an Add operator adds two tensors together in every position into one output tensor. """ diff --git a/stream/classes/io/onnx/flatten.py b/stream/classes/io/onnx/flatten.py index 71f3dae..a864d72 100644 --- a/stream/classes/io/onnx/flatten.py +++ b/stream/classes/io/onnx/flatten.py @@ -1,10 +1,10 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.parser.onnx.utils import get_attribute_ints_with_name +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.flatten_node import FlattenNode -class FlattenParser(ONNXOperatorParser): +class FlattenParser(OnnxOperatorParser): """Parses an onnx flatten operator into a FlattenNode.""" def run(self): diff --git a/stream/classes/io/onnx/gather.py b/stream/classes/io/onnx/gather.py index 91b2217..83cf057 100644 --- a/stream/classes/io/onnx/gather.py +++ b/stream/classes/io/onnx/gather.py @@ -1,10 +1,10 @@ from onnx import numpy_helper -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.gather_node import GatherNode -class GatherParser(ONNXOperatorParser): +class GatherParser(OnnxOperatorParser): """Parses an onnx gather operator into a GatherNode.""" def run(self): diff --git a/stream/classes/io/onnx/gemm.py b/stream/classes/io/onnx/gemm.py index 04c6536..aadae59 100644 --- a/stream/classes/io/onnx/gemm.py +++ b/stream/classes/io/onnx/gemm.py @@ -5,24 +5,30 @@ from zigzag.parser.onnx.GemmParser import GemmParser as GemmParserZigZag from stream.classes.hardware.architecture.accelerator import Accelerator +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.computation_node import ComputationNode logger = logging.getLogger(__name__) -class GemmParser(GemmParserZigZag): +class GemmParser(GemmParserZigZag, OnnxOperatorParser): """Parses an ONNX Gemm operator into a ComputationNode""" def __init__( self, node_id: int, node: NodeProto, - nodes_outputs: dict[int, list[str]], - mapping_data: list[dict[str, Any]], + nodes_outputs: dict[int, Any], onnx_model: ModelProto, + *, + mapping_data: list[dict[str, Any]], accelerator: Accelerator, ) -> None: - super().__init__(node_id, node, nodes_outputs, mapping_data, onnx_model) + self.node_id = node_id + self.node = node + self.nodes_outputs = nodes_outputs + self.onnx_model = onnx_model + self.mapping_data = mapping_data self.accelerator = accelerator def run(self): diff --git a/stream/classes/io/onnx/lpnormalization.py b/stream/classes/io/onnx/lpnormalization.py index ca1e27c..79fc46e 100644 --- a/stream/classes/io/onnx/lpnormalization.py +++ b/stream/classes/io/onnx/lpnormalization.py @@ -1,9 +1,8 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser - +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.lpnormalization_node import LpNormalizationNode -class LpNormalizationParser(ONNXOperatorParser): +class LpNormalizationParser(OnnxOperatorParser): """Parses an onnx reshape operator into a LpNormalizationNode.""" def __init__(self, node_id, node, nodes_outputs, mapping, onnx_model) -> None: diff --git a/stream/classes/io/onnx/matmul.py b/stream/classes/io/onnx/matmul.py index 0d34d23..ce25d89 100644 --- a/stream/classes/io/onnx/matmul.py +++ b/stream/classes/io/onnx/matmul.py @@ -1,52 +1,9 @@ import logging -from typing import Any -from onnx import ModelProto, NodeProto -from zigzag.parser.onnx.MatMulParser import MatMulParser as MatMulParserZigZag - -from stream.classes.hardware.architecture.accelerator import Accelerator -from stream.classes.workload.computation_node import ComputationNode +from stream.classes.io.onnx.gemm import GemmParser logger = logging.getLogger(__name__) -class MatMulParser(MatMulParserZigZag): - """Parses an ONNX MatMul operator into a ComputationNode""" - - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping_data: list[dict[str, Any]], - onnx_model: ModelProto, - accelerator: Accelerator, - ) -> None: - super().__init__(node_id, node, nodes_outputs, mapping_data, onnx_model) - self.accelerator = accelerator - - def run(self): - """Run the parser""" - return self.generate_node() - - def generate_node(self): - layer_node = self.generate_layer_node() - node_attrs = layer_node.extract_node_attr() - - # Override spatial mapping by the one defined in the core's dataflows - core_allocation = node_attrs.core_allocation - spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) - node_attrs.spatial_mapping = spatial_mapping - - # Get the node's input(s) and output(s) tensor names - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - return ComputationNode( - node_id=self.node_id, - node_name=self.node.name, - node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, - op_type=node_attrs.layer_type, - operand_tensor_reshape=None, - ) +class MatMulParser(GemmParser): + """! Parses an ONNX MatMul operator into a ComputationNode. Exactly the same as Gemm Parser""" diff --git a/stream/classes/io/onnx/model.py b/stream/classes/io/onnx/model.py index 09ba6fe..56aa6e9 100644 --- a/stream/classes/io/onnx/model.py +++ b/stream/classes/io/onnx/model.py @@ -1,5 +1,5 @@ import logging -from typing import Any +from typing import Any, Type from onnx import ModelProto, NodeProto from zigzag.parser.onnx.utils import get_onnx_tensor_type, parse_onnx_model_from_path @@ -14,10 +14,12 @@ from stream.classes.io.onnx.gemm import GemmParser from stream.classes.io.onnx.lpnormalization import LpNormalizationParser from stream.classes.io.onnx.matmul import MatMulParser +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.io.onnx.pooling import PoolingParser from stream.classes.io.onnx.reshape import ReshapeParser from stream.classes.io.onnx.simd import SimdParser from stream.classes.io.onnx.transpose import TransposeParser +from stream.classes.workload.node import Node from stream.classes.workload.onnx_workload import ONNXWorkload logger = logging.getLogger(__name__) @@ -26,15 +28,31 @@ class ONNXModelParser: """Parse the ONNX model into a workload.""" + # Map the node's op_type to the corresponding Parser class + PARSER_MAPPING: dict[str, Type[OnnxOperatorParser]] = { + "QLinearConv": ConvParser, + "Conv": ConvParser, + "MatMul": MatMulParser, + "Gemm": GemmParser, + "MaxPool": PoolingParser, + "AveragePool": PoolingParser, + "GlobalMaxPool": PoolingParser, + "GlobalAveragePool": PoolingParser, + "Reshape": ReshapeParser, + "Flatten": FlattenParser, + "Gather": GatherParser, + "Add": SimdParser, + "Mul": SimdParser, + "Transpose": TransposeParser, + "LpNormalization": LpNormalizationParser, + "Concat": ConcatParser, + } + def __init__(self, onnx_model_path: str, mapping_yaml_path: str, accelerator: Accelerator) -> None: self.onnx_model_path = onnx_model_path self.mapping_yaml_path_data = mapping_yaml_path self.accelerator = accelerator - self.onnx_model = None - self.workload = None - self.mapping_data = None - def run(self): """Run the parser: - parse the onnx_model_path into an onnx model @@ -45,26 +63,38 @@ def run(self): self.mapping_data = WorkloadParserStage.parse_mapping_data(self.mapping_yaml_path_data) self.workload = self.parse_workload_from_onnx_model_and_mapping() + def get_parser_class(self, node: NodeProto): + # A temporary fix an element-wise Add or Mul which has asymmetric input data -> treat it as a DummyNode. + # TODO support node with asymmetric input data. + if node.op_type in ["Add", "Mul"] and has_asymmetric_input_data(node, self.onnx_model): + return DefaultNodeParser + + parser_class = ONNXModelParser.PARSER_MAPPING.get(node.op_type) + if not parser_class: + return DefaultNodeParser + return parser_class + def parse_workload_from_onnx_model_and_mapping(self): """ Converts an onnx model into a workload object. We scan the model for all convolutional layers, and setup a Layer object for each of those using the mapping. Then we combine the layers into a workload graph. + + If the model isn't in the format with external data, it will be slow to manipulate it, so better to work with + raw models with external data # The line below accomplishes this. + onnx.save_model(model, 'model_external.onnx', save_as_external_data=True, all_tensors_to_one_file=True, + location='model_external_raw_data', size_threshold=1024, convert_attribute=False) + + In the future, assume we will have a model saved with external data, then we have to execute the code below + if the model isn't inferred yet + This approach is faster for large models because the raw model is used (w/o the external data) + if model is not inferred: + onnx.shape_inference.infer_shapes_path('path/to/the/model.onnx') # This will save the inferred model to the + same file + model = onnx.load('path/to/the/model.onnx') # reload the inferred model """ assert self.mapping_data is not None assert self.onnx_model is not None - # If the model isn't in the format with external data, it will be slow to manipulate it, so better to work with - # raw models with external data # The line below accomplishes this. - # onnx.save_model(model, 'model_external.onnx', save_as_external_data=True, all_tensors_to_one_file=True, - # location='model_external_raw_data', size_threshold=1024, convert_attribute=False) - - # In the future, assume we will have a model saved with external data, then we have to execute the code below - # if the model isn't inferred yet - # This approach is faster for large models because the raw model is used (w/o the external data) - # if model is not inferred: - # onnx.shape_inference.infer_shapes_path('path/to/the/model.onnx') # This will save the inferred model to the - # same file - # model = onnx.load('path/to/the/model.onnx') # reload the inferred model # Saves for each node_id the inputs and outputs tensor names nodes_inputs: dict[int, Any] = {} @@ -80,124 +110,24 @@ def parse_workload_from_onnx_model_and_mapping(self): nodes_inputs[node_id] = node.input nodes_outputs[node_id] = node.output - if node.op_type in ["QLinearConv", "Conv"]: - parser = ConvParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - mapping_data=self.mapping_data, - onnx_model=self.onnx_model, - accelerator=self.accelerator, - ) - logger.info("Parsed Conv node %s.", node.name) - elif node.op_type in ["MatMul"]: - parser = MatMulParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - mapping_data=self.mapping_data, - onnx_model=self.onnx_model, - accelerator=self.accelerator, - ) - logger.info("Parsed MatMul node %s.", node.name) - elif node.op_type in ["Gemm"]: - parser = GemmParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - mapping_data=self.mapping_data, - onnx_model=self.onnx_model, - accelerator=self.accelerator, - ) - logger.info("Parsed Gemm node %s.", node.name) - elif node.op_type in ["MaxPool", "AveragePool", "GlobalMaxPool", "GlobalAveragePool"]: - parser = PoolingParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - mapping_data=self.mapping_data, - onnx_model=self.onnx_model, - accelerator=self.accelerator, - ) - logger.info("Parsed Pooling node %s.", node.name) - elif node.op_type in ["Reshape"]: - parser = ReshapeParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - onnx_model=self.onnx_model, - ) - elif node.op_type in ["Flatten"]: - parser = FlattenParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - onnx_model=self.onnx_model, - ) - logger.info("Parsed Flatten node %s.", node.name) - elif node.op_type in ["Gather"]: - parser = GatherParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - onnx_model=self.onnx_model, - ) - logger.info("Parsed Gather node %s.", node.name) - elif node.op_type in ["Add", "Mul"]: - # TODO: a temporary fix an element-wise Add or Mul which has asymmetric input data - # TODO: -> treat it as a DummyNode. - # Future to support node with asymmetric input data. - if has_asymmetric_input_data(node, self.onnx_model): - parser = DefaultNodeParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - onnx_model=self.onnx_model, - ) - logger.info( - "Parsed asymmetric %s node %s as a DummyNode", - node.op_type, - node.name, - ) - else: - parser = SimdParser( - node_id=node_id, - node=node, - nodes_outputs=nodes_outputs, - mapping_data=self.mapping_data, - onnx_model=self.onnx_model, - accelerator=self.accelerator, - ) - logger.info( - "Parsed %s node %s.", - node.op_type, - node.name, - ) - elif node.op_type in ["Transpose"]: - parser = TransposeParser(node_id, node, nodes_outputs, self.onnx_model) - logger.info("Parsed Transpose node %s.", node.name) - elif node.op_type in ["LpNormalization"]: - parser = LpNormalizationParser(node_id, node, nodes_outputs, self.mapping_data, self.onnx_model) - logger.info("Parsed LpNormalization node %s.", node.name) - elif node.op_type in ["Concat"]: - parser = ConcatParser(node_id, node, nodes_outputs, self.onnx_model) - logger.info("Parsed LpNormalization node %s.", node.name) - # it is not any of the above, so create a DummyNode - else: - parser = DefaultNodeParser(node_id, node, nodes_outputs, self.onnx_model) - logger.info( - "Parsed %s node %s as a DummyNode", - node.op_type, - node.name, - ) - node_obj = parser.run() - # Add the node_obj to the ONNXWorkload + parser_class = self.get_parser_class(node) + parser = parser_class( + node_id=node_id, + node=node, + nodes_outputs=nodes_outputs, + onnx_model=self.onnx_model, + mapping_data=self.mapping_data, + accelerator=self.accelerator, + ) + + logger.info("Parsed %s node %s.", node.op_type, node.name) + node_obj: Node = parser.run() workload.add(node_id, node_obj) logger.info( "Created ONNXWorkload graph with %i nodes and %i edges.", workload.number_of_nodes(), - workload.number_of_edges(), + workload.number_of_edges(), # type: ignore ) return workload @@ -213,6 +143,9 @@ def get_workload(self): def has_asymmetric_input_data(node: NodeProto, onnx_model: ModelProto): + """Return true iff the node has two inputs and the input nodes have a different shape""" + if len(node.input) != 2: + return False input_name1 = node.input[0] input_name2 = node.input[1] input_shape1 = get_onnx_tensor_type(input_name1, onnx_model).shape diff --git a/stream/classes/io/onnx/operator_parser.py b/stream/classes/io/onnx/operator_parser.py new file mode 100644 index 0000000..20dc810 --- /dev/null +++ b/stream/classes/io/onnx/operator_parser.py @@ -0,0 +1,29 @@ +from typing import Any, Iterator + +from onnx import ModelProto, NodeProto +from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser as ONNXOperatorParserZigZag + +from stream.classes.hardware.architecture.accelerator import Accelerator +from stream.classes.workload.computation_node import ComputationNode +from stream.classes.workload.node import Node + + +class OnnxOperatorParser(ONNXOperatorParserZigZag): + def __init__( + self, + node_id: int, + node: NodeProto, + nodes_outputs: dict[int, Any], + onnx_model: ModelProto, + mapping_data: list[dict[str, Any]], + accelerator: Accelerator, + ) -> None: + """'overloads' the ONNXOperatorParserZigZag init method with the correct `accelerator` type""" + self.node_id = node_id + self.node = node + self.nodes_outputs = nodes_outputs + self.onnx_model = onnx_model + self.mapping_data = mapping_data + self.accelerator = accelerator + + def run(self) -> Node | Iterator[ComputationNode]: ... # type: ignore diff --git a/stream/classes/io/onnx/pooling.py b/stream/classes/io/onnx/pooling.py index 1c8b4f9..e94a6a9 100644 --- a/stream/classes/io/onnx/pooling.py +++ b/stream/classes/io/onnx/pooling.py @@ -1,38 +1,20 @@ from typing import Any -from onnx import ModelProto, NodeProto -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.parser.onnx.utils import ( get_attribute_ints_with_name, get_node_input_output_dimension_shapes, ) from zigzag.parser.workload_factory import LayerNodeFactory -from stream.classes.hardware.architecture.accelerator import Accelerator +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.pooling_node import PoolingNode -class PoolingParser(ONNXOperatorParser): +class PoolingParser(OnnxOperatorParser): """Parses an onnx pooling operator into a PoolingNode. e.g. MaxPool, AveragePool, etc. """ - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping_data: list[dict[str, Any]], - onnx_model: ModelProto, - accelerator: Accelerator, - ) -> None: - super().__init__(node_id, node, nodes_outputs, onnx_model) - self.onnx_model = onnx_model - self.mapping_data = mapping_data - self.accelerator = accelerator - self.op_type = self.node.op_type - self.node_name = f"Layer{self.node_id}" - def run(self): return self.generate_layer_node_for_pooling() @@ -71,8 +53,8 @@ def get_layer_node_input_format( data: dict[str, Any] = {} data["id"] = self.node_id - data["name"] = self.node_name - data["operator_type"] = self.op_type + data["name"] = self.node.name + data["operator_type"] = self.node.op_type data["equation"] = "O[b][k][oy][ox]+=W[fy][fx]*I[b][k][iy][ix]" # Get dimension sizes from input parameters assert ia_shape[0] == oa_shape[0], "Batch size is different for input and output activations." @@ -149,7 +131,7 @@ def generate_layer_node_for_pooling(self): return PoolingNode( node_id=self.node_id, - node_name=self.node_name, + node_name=self.node.name, node_attr=node_attrs, input_names=node_input_names, output_names=node_output_names, diff --git a/stream/classes/io/onnx/reshape.py b/stream/classes/io/onnx/reshape.py index 2153980..64de6fc 100644 --- a/stream/classes/io/onnx/reshape.py +++ b/stream/classes/io/onnx/reshape.py @@ -1,10 +1,10 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.parser.onnx.utils import get_node_input_output_dimension_shapes +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.reshape_node import ReshapeNode -class ReshapeParser(ONNXOperatorParser): +class ReshapeParser(OnnxOperatorParser): """Parses an onnx reshape operator into a ReshapeNode.""" def run(self): @@ -16,7 +16,7 @@ def generate_node(self): predecessor = predecessors.pop() # The operator shape is saved as the second input, so we need to get the input's dimension shape - shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model)[1] + shape = tuple(get_node_input_output_dimension_shapes(self.node, self.onnx_model)[1]) input_names = [self.node.input[0]] output_names = [self.node.output[0]] diff --git a/stream/classes/io/onnx/simd.py b/stream/classes/io/onnx/simd.py index e3bcc59..36465d1 100644 --- a/stream/classes/io/onnx/simd.py +++ b/stream/classes/io/onnx/simd.py @@ -1,89 +1,59 @@ from typing import Any -from onnx import ModelProto, NodeProto -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser from zigzag.parser.onnx.utils import ( get_node_input_output_dimension_shapes, ) from zigzag.parser.workload_factory import LayerNodeFactory -from stream.classes.hardware.architecture.accelerator import Accelerator +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.simd_node import SimdNode -class SimdParser(ONNXOperatorParser): +class SimdParser(OnnxOperatorParser): """Parses an ONNXOperatorParser operator representing an elementwise operation (simd) into a SimdNode. e.g. Add, etc. """ - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - mapping_data: list[dict[str, Any]], - onnx_model: ModelProto, - accelerator: Accelerator, - ) -> None: - super().__init__(node_id, node, nodes_outputs, onnx_model) - self.onnx_model = onnx_model - self.mapping_data = mapping_data - self.accelerator = accelerator - self.op_type = self.node.op_type # .lower() - self.node_name = f"Layer{self.node_id}" - def run(self): return self.generate_node() def get_layer_node_input_format(self, ia_shape: list[int], oa_shape: list[int]): """ Generate the necessary dictionary items required for the LayerNode creation. - For the pooling node, we pick K as the "channel" dimension. It should be equal to C anyways. """ assert ia_shape == oa_shape, "Input and output of simd operation should be identical." + predecessors = self.get_node_predecessors() + assert len(predecessors) > 0, "Undefined behavior for Simd node with no inputs" + # Nodes with only 1 input (e.g. Relu, Max, add/mul with constant, etc) have an empty `W` part in equation + has_single_input = len(predecessors) == 1 data: dict[str, Any] = {} data["id"] = self.node_id - data["name"] = self.node_name - data["operator_type"] = self.op_type + data["name"] = self.node.name + data["operator_type"] = self.node.op_type + data["loop_sizes"] = oa_shape + data["dimension_relations"] = [] match len(oa_shape): case 1: - data["equation"] = "O[ox]+=I[ox]*W[ox]" - data["loop_dims"] = ["OX"] - data["loop_sizes"] = oa_shape + data["equation"] = f"O[k]+=I[k]*W{'[]' if has_single_input else '[k]'}" + data["loop_dims"] = ["K"] case 2: - data["equation"] = "O[oy][ox]+=I[oy][ox]*W[oy][ox]" - data["loop_dims"] = ["OX", "OY"] - data["loop_sizes"] = oa_shape + data["equation"] = f"O[d][k]+=I[d][k]*W{'[]' if has_single_input else '[d][k]'}" + data["loop_dims"] = ["D", "K"] case 3: - data["equation"] = "O[b][oy][ox]+=I[b][oy][ox]*W[b][oy][ox]" - data["loop_dims"] = ["B", "OX", "OY"] - data["loop_sizes"] = oa_shape + data["equation"] = f"O[b][d][k]+=I[b][d][k]*W{'[]' if has_single_input else '[b][d][k]'}" + data["loop_dims"] = ["B", "D", "k"] case 4: - data["equation"] = "O[b][k][oy][ox]+=I[b][k][oy][ox]*W[b][k][oy][ox]" - data["loop_dims"] = ["B", "K", "OX", "OY"] - data["loop_sizes"] = oa_shape + data["equation"] = f"O[b][h][d][k]+=I[b][h][d][k]*W{'[]' if has_single_input else '[b][h][d][k]'}" + data["loop_dims"] = ["B", "H", "D", "k"] case _: raise NotImplementedError - data["dimension_relations"] = [] - - predecessors = self.get_node_predecessors() act_precision = self.get_activation_precision() weight_precision = self.get_weight_precision() intermediate_output_precision = self.get_intermediate_output_precision() match len(predecessors): - case 0: - # No source operands -> assume one is constant - # TODO should this be 2? - data["operand_source"] = {"W": self.node_id} - data["operand_precision"] = { - "W": weight_precision, - "I": act_precision, - "O_final": act_precision, - "O": intermediate_output_precision, - } case 1: # One source operand, one constant data["operand_source"] = {"W": self.node_id, "I": predecessors[0]} @@ -130,5 +100,5 @@ def generate_node(self): node_attr=node_attrs, input_names=node_input_names, output_names=node_output_names, - op_type=self.op_type, + op_type=self.node.op_type, ) diff --git a/stream/classes/io/onnx/softmax.py b/stream/classes/io/onnx/softmax.py new file mode 100644 index 0000000..b19cfdf --- /dev/null +++ b/stream/classes/io/onnx/softmax.py @@ -0,0 +1,103 @@ +from typing import Any, Iterator + +from zigzag.parser.onnx.utils import ( + get_node_input_output_dimension_shapes, +) +from zigzag.parser.workload_factory import LayerNodeFactory + +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser +from stream.classes.workload.computation_node import ComputationNode +from stream.classes.workload.simd_node import SimdNode + + +class SoftmaxParser(OnnxOperatorParser): + """Parses the Softmax operator""" + + def run(self) -> Iterator[ComputationNode]: + return self.generate_node() + + def get_layer_node_input_format(self, ia_shape: list[int], oa_shape: list[int]): + """ + Generate the necessary dictionary items required for the LayerNode creation. + """ + assert ia_shape == oa_shape, "Input and output of simd operation should be identical." + predecessors = self.get_node_predecessors() + assert len(predecessors) > 0, "Undefined behavior for Simd node with no inputs" + # Nodes with only 1 input (e.g. Relu, Max, add/mul with constant, etc) have an empty `W` part in equation + has_single_input = len(predecessors) == 1 + + data: dict[str, Any] = {} + data["id"] = self.node_id + data["name"] = self.node.name + data["operator_type"] = self.node.op_type + data["loop_sizes"] = oa_shape + data["dimension_relations"] = [] + + match len(oa_shape): + case 1: + data["equation"] = f"O[k]+=I[k]*W{'[]' if has_single_input else '[k]'}" + data["loop_dims"] = ["K"] + case 2: + data["equation"] = f"O[d][k]+=I[d][k]*W{'[]' if has_single_input else '[d][k]'}" + data["loop_dims"] = ["D", "K"] + case 3: + data["equation"] = f"O[b][d][k]+=I[b][d][k]*W{'[]' if has_single_input else '[b][d][k]'}" + data["loop_dims"] = ["B", "D", "k"] + case 4: + data["equation"] = f"O[b][h][d][k]+=I[b][h][d][k]*W{'[]' if has_single_input else '[b][h][d][k]'}" + data["loop_dims"] = ["B", "H", "D", "k"] + case _: + raise NotImplementedError + + act_precision = self.get_activation_precision() + weight_precision = self.get_weight_precision() + intermediate_output_precision = self.get_intermediate_output_precision() + match len(predecessors): + case 1: + # One source operand, one constant + data["operand_source"] = {"W": self.node_id, "I": predecessors[0]} + data["operand_precision"] = { + "W": weight_precision, + "I": act_precision, + "O_final": act_precision, + "O": intermediate_output_precision, + } + case 2: + # Two source operands, none are constant (W and I can be swapped) + data["operand_source"] = {"W": predecessors[0], "I": predecessors[1]} + data["operand_precision"] = { + "W": act_precision, + "I": act_precision, + "O_final": act_precision, + "O": intermediate_output_precision, + } + + case _: + raise ValueError("No more than 2 layer predecessors expected") + + return data + + def generate_node(self): + # Get the input and output activation shapes + ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) + + node_data = self.get_layer_node_input_format(ia_dimension_shape, oa_dimension_shape) + node_factory = LayerNodeFactory(node_data, self.mapping_data) + node_attrs = node_factory.create_node_attr() + + # Override spatial mapping by the one defined in the core's dataflows + core_allocation = node_attrs.core_allocation + spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) + node_attrs.spatial_mapping = spatial_mapping + + node_input_names = list(self.node.input) + node_output_names = list(self.node.output) + + return SimdNode( + node_id=self.node_id, + node_name=self.node.name, + node_attr=node_attrs, + input_names=node_input_names, + output_names=node_output_names, + op_type=self.node.op_type, + ) diff --git a/stream/classes/io/onnx/transpose.py b/stream/classes/io/onnx/transpose.py index 2d5119e..6e19c4f 100644 --- a/stream/classes/io/onnx/transpose.py +++ b/stream/classes/io/onnx/transpose.py @@ -1,9 +1,8 @@ -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser - +from stream.classes.io.onnx.operator_parser import OnnxOperatorParser from stream.classes.workload.transpose_node import TransposeNode -class TransposeParser(ONNXOperatorParser): +class TransposeParser(OnnxOperatorParser): """Parses an onnx reshape operator into a TransposeNode.""" def run(self): diff --git a/stream/classes/workload/elementwise_node.py b/stream/classes/workload/elementwise_node.py index 27d63a7..61ba312 100644 --- a/stream/classes/workload/elementwise_node.py +++ b/stream/classes/workload/elementwise_node.py @@ -4,7 +4,6 @@ class ElementwiseNode(Node): - """Class that represents an onnx Reshape node.""" def __init__( self, node_id: int, node_name: str, predecessor: int, input_names: list[str], output_names: list[str] diff --git a/stream/utils.py b/stream/utils.py index c28c235..400d5e5 100644 --- a/stream/utils.py +++ b/stream/utils.py @@ -218,8 +218,8 @@ def gather(self, gather_indices: int | list[int], axis: int) -> "NodeTensor": return (np.take(self.as_ndarray(), gather_indices, axis=axis)).view(NodeTensor) def concat_with_empty(self, shape: tuple[int, ...], axis: int, variable_input_first: bool): - emtpy_shape = self.convert_to_full_shape(shape) - empty_tensor = np.zeros(emtpy_shape, dtype=object) + empty_shape = self.convert_to_full_shape(shape) + empty_tensor = np.zeros(empty_shape, dtype=object) axis = axis - 1 if axis < 0 else axis if variable_input_first: return np.concat((empty_tensor, self.as_ndarray()), axis=axis).view(NodeTensor) @@ -240,16 +240,13 @@ class DiGraphWrapper(Generic[T], DiGraph): """Wraps the DiGraph class with type annotations for the nodes""" @overload - def in_edges(self, node: T, data: Literal[False]) -> list[tuple[T, T]]: - ... # type: ignore + def in_edges(self, node: T, data: Literal[False]) -> list[tuple[T, T]]: ... # type: ignore @overload - def in_edges(self, node: T, data: Literal[True]) -> list[tuple[T, T, dict[str, Any]]]: - ... # type: ignore + def in_edges(self, node: T, data: Literal[True]) -> list[tuple[T, T, dict[str, Any]]]: ... # type: ignore @overload - def in_edges(self, node: T) -> list[tuple[T, T]]: - ... # type: ignore + def in_edges(self, node: T) -> list[tuple[T, T]]: ... # type: ignore def in_edges( # type: ignore self, @@ -259,16 +256,13 @@ def in_edges( # type: ignore return super().in_edges(node, data) # type: ignore @overload - def out_edges(self, node: T, data: Literal[True]) -> list[tuple[T, T, dict[str, Any]]]: - ... # type: ignore + def out_edges(self, node: T, data: Literal[True]) -> list[tuple[T, T, dict[str, Any]]]: ... # type: ignore @overload - def out_edges(self, node: T, data: Literal[False]) -> list[tuple[T, T]]: - ... # type: ignore + def out_edges(self, node: T, data: Literal[False]) -> list[tuple[T, T]]: ... # type: ignore @overload - def out_edges(self, node: T) -> list[tuple[T, T]]: - ... # type: ignore + def out_edges(self, node: T) -> list[tuple[T, T]]: ... # type: ignore def out_edges( # type: ignore self, From 5a5634989caff5d500b6d15b52cb4ac31d25f9ae Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Wed, 4 Sep 2024 13:12:18 +0200 Subject: [PATCH 2/7] parsers can return multiple nodes --- .pre-commit-config.yaml | 2 +- stream/classes/io/onnx/concat.py | 3 - stream/classes/io/onnx/conv.py | 14 +- stream/classes/io/onnx/default.py | 6 +- stream/classes/io/onnx/elementwise.py | 5 +- stream/classes/io/onnx/flatten.py | 18 +-- stream/classes/io/onnx/gather.py | 4 - stream/classes/io/onnx/gemm.py | 52 +------ stream/classes/io/onnx/lpnormalization.py | 5 +- stream/classes/io/onnx/model.py | 16 ++- stream/classes/io/onnx/operator_parser.py | 87 +++++++++++- stream/classes/io/onnx/pooling.py | 13 +- stream/classes/io/onnx/reduce_1d.py | 40 ++++++ stream/classes/io/onnx/reshape.py | 3 - stream/classes/io/onnx/simd.py | 77 ++-------- stream/classes/io/onnx/softmax.py | 164 +++++++++++----------- stream/classes/io/onnx/transpose.py | 3 - stream/classes/workload/node.py | 3 + stream/classes/workload/simd_node.py | 40 +++--- 19 files changed, 268 insertions(+), 287 deletions(-) create mode 100644 stream/classes/io/onnx/reduce_1d.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4357c25..f572152 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: types_or: [python, pyi] args: [--extend-select, I, --fix, --exit-non-zero-on-fix, --line-length, "120"] - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 24.8.0 hooks: - id: black args: [--line-length, "120"] diff --git a/stream/classes/io/onnx/concat.py b/stream/classes/io/onnx/concat.py index d5bf2bd..26c6b78 100644 --- a/stream/classes/io/onnx/concat.py +++ b/stream/classes/io/onnx/concat.py @@ -7,9 +7,6 @@ class ConcatParser(OnnxOperatorParser): """Parses an onnx gather operator into a ConcatNode.""" - def run(self): - return self.generate_node() - def generate_node(self): predecessors = self.get_node_predecessors() diff --git a/stream/classes/io/onnx/conv.py b/stream/classes/io/onnx/conv.py index 9f55e43..0015a0a 100644 --- a/stream/classes/io/onnx/conv.py +++ b/stream/classes/io/onnx/conv.py @@ -8,22 +8,18 @@ ) from zigzag.parser.workload_factory import LayerNodeFactory -from stream.classes.io.onnx.operator_parser import OnnxOperatorParser +from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser from stream.classes.workload.computation_node import ComputationNode logger = logging.getLogger(__name__) -class ConvParser(OnnxOperatorParser): +class ConvParser(OnnxComputeOperatorParser): """Parser for ONNX Conv and QLinearConv nodes into LayerNode.""" OP_TYPE = "conv" - def run(self) -> ComputationNode: - """Run the parser and return the created LayerNode object.""" - return self.generate_layer_node_for_conv() - - def get_layer_node_input_format( + def get_layer_node_user_format( # type: ignore self, kernel_shape: list[int], strides: list[int], @@ -111,7 +107,7 @@ def get_layer_node_input_format( return data - def generate_layer_node_for_conv(self): + def generate_node(self): attrs = self.node.attribute kernel_shape: list[int] = get_attribute_ints_with_name("kernel_shape", attrs, default=None) # type:ignore strides: list[int] = get_attribute_ints_with_name("strides", attrs, default=[1, 1]) # type:ignore @@ -125,7 +121,7 @@ def generate_layer_node_for_conv(self): # Get the input and output activation and weight data type (precision) # TODO not used? # ia_data_type, oa_data_type, w_data_type = get_input_output_weight_data_type(self.node, self.onnx_model) - node_data: dict[str, Any] = self.get_layer_node_input_format( + node_data: dict[str, Any] = self.get_layer_node_user_format( kernel_shape, strides, dilations, diff --git a/stream/classes/io/onnx/default.py b/stream/classes/io/onnx/default.py index 31ba679..99b85a0 100644 --- a/stream/classes/io/onnx/default.py +++ b/stream/classes/io/onnx/default.py @@ -5,11 +5,7 @@ class DefaultNodeParser(OnnxOperatorParser): """Parse an ONNX node into a DummyNode.""" - def run(self): - """Run the parser""" - return self.generate_dummy_node() - - def generate_dummy_node(self): + def generate_node(self): predecessors = self.get_node_predecessors() input_names = list(self.node.input) output_names = list(self.node.output) diff --git a/stream/classes/io/onnx/elementwise.py b/stream/classes/io/onnx/elementwise.py index 70c8065..47f0b91 100644 --- a/stream/classes/io/onnx/elementwise.py +++ b/stream/classes/io/onnx/elementwise.py @@ -13,10 +13,7 @@ def __init__(self, node_id, node, nodes_outputs, mapping, onnx_model) -> None: self.type = node.op_type.lower() self.name = node.name - def run(self): - return self.generate_elementwise_node() - - def generate_elementwise_node(self): + def generate_node(self): # Get the predecessors of this node predecessors = [] for node_input in self.node.input: diff --git a/stream/classes/io/onnx/flatten.py b/stream/classes/io/onnx/flatten.py index a864d72..0eee715 100644 --- a/stream/classes/io/onnx/flatten.py +++ b/stream/classes/io/onnx/flatten.py @@ -7,18 +7,10 @@ class FlattenParser(OnnxOperatorParser): """Parses an onnx flatten operator into a FlattenNode.""" - def run(self): - return self.generate_flatten_node() - - def generate_flatten_node(self): - # Get the predecessors of this node - predecessors: list[int] = [] - for node_input in self.node.input: - for n in self.nodes_outputs: - if node_input in self.nodes_outputs[n]: - predecessors.append(n) - assert len(predecessors) <= 1 - predecessor = predecessors[0] if len(predecessors) == 1 else None + def generate_node(self): + predecessors = self.get_node_predecessors() + assert len(predecessors) == 1 + predecessor = predecessors[0] attrs = self.node.attribute # Get the axis which indicates how to flatten the input tensor @@ -27,7 +19,7 @@ def generate_flatten_node(self): output_names = [self.node.output[0]] return FlattenNode( node_id=self.node_id, - node_name="", + node_name=self.node.name, predecessor=predecessor, axis=axis, input_names=input_names, diff --git a/stream/classes/io/onnx/gather.py b/stream/classes/io/onnx/gather.py index 83cf057..a2e6f89 100644 --- a/stream/classes/io/onnx/gather.py +++ b/stream/classes/io/onnx/gather.py @@ -7,12 +7,8 @@ class GatherParser(OnnxOperatorParser): """Parses an onnx gather operator into a GatherNode.""" - def run(self): - return self.generate_node() - def generate_node(self): predecessors = self.get_node_predecessors() - axis = self.get_axis_value() indices = self.get_indices_value() diff --git a/stream/classes/io/onnx/gemm.py b/stream/classes/io/onnx/gemm.py index aadae59..84bfd15 100644 --- a/stream/classes/io/onnx/gemm.py +++ b/stream/classes/io/onnx/gemm.py @@ -1,58 +1,16 @@ import logging -from typing import Any +from typing import Generator -from onnx import ModelProto, NodeProto from zigzag.parser.onnx.GemmParser import GemmParser as GemmParserZigZag -from stream.classes.hardware.architecture.accelerator import Accelerator -from stream.classes.io.onnx.operator_parser import OnnxOperatorParser +from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser from stream.classes.workload.computation_node import ComputationNode logger = logging.getLogger(__name__) -class GemmParser(GemmParserZigZag, OnnxOperatorParser): +class GemmParser(GemmParserZigZag, OnnxComputeOperatorParser): """Parses an ONNX Gemm operator into a ComputationNode""" - def __init__( - self, - node_id: int, - node: NodeProto, - nodes_outputs: dict[int, Any], - onnx_model: ModelProto, - *, - mapping_data: list[dict[str, Any]], - accelerator: Accelerator, - ) -> None: - self.node_id = node_id - self.node = node - self.nodes_outputs = nodes_outputs - self.onnx_model = onnx_model - self.mapping_data = mapping_data - self.accelerator = accelerator - - def run(self): - """Run the parser""" - return self.generate_node() - - def generate_node(self): - layer_node = self.generate_layer_node() - node_attrs = layer_node.extract_node_attr() - - # Override spatial mapping by the one defined in the core's dataflows - core_allocation = node_attrs.core_allocation - spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) - node_attrs.spatial_mapping = spatial_mapping - - # Get the node's input(s) and output(s) tensor names - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - return ComputationNode( - node_id=self.node_id, - node_name=self.node.name, - node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, - op_type=node_attrs.layer_type, - operand_tensor_reshape=None, - ) + def run(self) -> Generator[ComputationNode, None, None]: # type: ignore + yield self.generate_node() diff --git a/stream/classes/io/onnx/lpnormalization.py b/stream/classes/io/onnx/lpnormalization.py index 79fc46e..24a9863 100644 --- a/stream/classes/io/onnx/lpnormalization.py +++ b/stream/classes/io/onnx/lpnormalization.py @@ -10,10 +10,7 @@ def __init__(self, node_id, node, nodes_outputs, mapping, onnx_model) -> None: super().__init__(node_id, node, nodes_outputs, mapping, onnx_model) - def run(self): - return self.generate_lpnormalization_node() - - def generate_lpnormalization_node(self): + def generate_node(self): # Get the predecessors of this node predecessors = [] for node_input in self.node.input: diff --git a/stream/classes/io/onnx/model.py b/stream/classes/io/onnx/model.py index 56aa6e9..390f847 100644 --- a/stream/classes/io/onnx/model.py +++ b/stream/classes/io/onnx/model.py @@ -19,7 +19,6 @@ from stream.classes.io.onnx.reshape import ReshapeParser from stream.classes.io.onnx.simd import SimdParser from stream.classes.io.onnx.transpose import TransposeParser -from stream.classes.workload.node import Node from stream.classes.workload.onnx_workload import ONNXWorkload logger = logging.getLogger(__name__) @@ -102,14 +101,13 @@ def parse_workload_from_onnx_model_and_mapping(self): # Workload Graph workload = ONNXWorkload() - - for node_id, node in enumerate(self.onnx_model.graph.node): + node_id = 0 + for node in self.onnx_model.graph.node: # If this node has no inputs, don't take it into consideration (e.g. Constant operator has no inputs) if not node.input: continue - nodes_inputs[node_id] = node.input - nodes_outputs[node_id] = node.output + nodes_inputs[node_id] = node.input parser_class = self.get_parser_class(node) parser = parser_class( node_id=node_id, @@ -121,8 +119,12 @@ def parse_workload_from_onnx_model_and_mapping(self): ) logger.info("Parsed %s node %s.", node.op_type, node.name) - node_obj: Node = parser.run() - workload.add(node_id, node_obj) + for node_obj in parser.run(): + # Parsers that yield multiple nodes increment the node id internally, so we must keep count here. + workload.add(node_id, node_obj) + node_id += 1 + + nodes_outputs[node_id - 1] = node.output logger.info( "Created ONNXWorkload graph with %i nodes and %i edges.", diff --git a/stream/classes/io/onnx/operator_parser.py b/stream/classes/io/onnx/operator_parser.py index 20dc810..179876d 100644 --- a/stream/classes/io/onnx/operator_parser.py +++ b/stream/classes/io/onnx/operator_parser.py @@ -1,14 +1,19 @@ -from typing import Any, Iterator +from abc import ABCMeta, abstractmethod +from typing import Any, Generator from onnx import ModelProto, NodeProto from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser as ONNXOperatorParserZigZag +from zigzag.parser.onnx.utils import ( + get_node_input_output_dimension_shapes, +) +from zigzag.parser.workload_factory import LayerNodeFactory from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.workload.computation_node import ComputationNode from stream.classes.workload.node import Node -class OnnxOperatorParser(ONNXOperatorParserZigZag): +class OnnxOperatorParser(ONNXOperatorParserZigZag, metaclass=ABCMeta): def __init__( self, node_id: int, @@ -26,4 +31,80 @@ def __init__( self.mapping_data = mapping_data self.accelerator = accelerator - def run(self) -> Node | Iterator[ComputationNode]: ... # type: ignore + def run(self) -> Generator[Node, None, None]: # type: ignore + yield self.generate_node() + + @abstractmethod + def generate_node(self) -> Node: ... + + def get_operand_source_input_format(self): + predecessors = self.get_node_predecessors() + match len(predecessors): + case 1: + # One source operand, one constant + return {"W": self.node_id, "I": predecessors[0]} + + case 2: + # Two source operands, none are constant (W and I can be swapped) + return {"W": predecessors[0], "I": predecessors[1]} + case _: + raise ValueError("No more than 2 layer predecessors expected") + + +class OnnxComputeOperatorParser(OnnxOperatorParser, metaclass=ABCMeta): + + def run(self) -> Generator[ComputationNode, None, None]: + yield self.generate_node() + + @abstractmethod + def get_layer_node_user_format(self, input_shape: list[int], output_shape: list[int]) -> dict[str, Any]: ... + + def get_operand_precision_input_format(self): + act_precision = self.get_activation_precision() + weight_precision = self.get_weight_precision() + intermediate_output_precision = self.get_intermediate_output_precision() + predecessors = self.get_node_predecessors() + match len(predecessors): + case 1: + # One source operand, one constant + return { + "W": weight_precision, + "I": act_precision, + "O_final": act_precision, + "O": intermediate_output_precision, + } + case 2: + # Two source operands, none are constant (W and I can be swapped) + return { + "W": act_precision, + "I": act_precision, + "O_final": act_precision, + "O": intermediate_output_precision, + } + case _: + raise ValueError("No more than 2 layer predecessors expected") + + def generate_node(self): + # Get the input and output activation shapes + input_shape, output_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) + + node_data = self.get_layer_node_user_format(input_shape, output_shape) + node_factory = LayerNodeFactory(node_data, self.mapping_data) + node_attrs = node_factory.create_node_attr() + + # Override spatial mapping by the one defined in the core's dataflows + core_allocation = node_attrs.core_allocation + spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) + node_attrs.spatial_mapping = spatial_mapping + + node_input_names = list(self.node.input) + node_output_names = list(self.node.output) + + return ComputationNode( + node_id=self.node_id, + node_name=self.node.name, + node_attr=node_attrs, + input_names=node_input_names, + output_names=node_output_names, + op_type=self.node.op_type, + ) diff --git a/stream/classes/io/onnx/pooling.py b/stream/classes/io/onnx/pooling.py index e94a6a9..3bec703 100644 --- a/stream/classes/io/onnx/pooling.py +++ b/stream/classes/io/onnx/pooling.py @@ -6,18 +6,15 @@ ) from zigzag.parser.workload_factory import LayerNodeFactory -from stream.classes.io.onnx.operator_parser import OnnxOperatorParser +from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser from stream.classes.workload.pooling_node import PoolingNode -class PoolingParser(OnnxOperatorParser): +class PoolingParser(OnnxComputeOperatorParser): """Parses an onnx pooling operator into a PoolingNode. e.g. MaxPool, AveragePool, etc. """ - def run(self): - return self.generate_layer_node_for_pooling() - def get_kernel_shape(self, attrs, ia_dimension_shape) -> list[int]: """Return the kernel shape of the pooling operator depending on the type of node @@ -36,7 +33,7 @@ def get_kernel_shape(self, attrs, ia_dimension_shape) -> list[int]: ) return kernel_shape - def get_layer_node_input_format( + def get_layer_node_user_format( self, kernel_shape: list[int], strides: list[int], @@ -99,7 +96,7 @@ def get_layer_node_input_format( return data - def generate_layer_node_for_pooling(self): + def generate_node(self): # Get the input and output activation shapes ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) @@ -109,7 +106,7 @@ def generate_layer_node_for_pooling(self): dilations: list[int] = get_attribute_ints_with_name("dilations", attrs, default=[1, 1]) # type: ignore padding: list[int] = get_attribute_ints_with_name("pads", attrs, default=[0, 0, 0, 0]) # type: ignore - node_data: dict[str, Any] = self.get_layer_node_input_format( + node_data: dict[str, Any] = self.get_layer_node_user_format( kernel_shape, strides, dilations, diff --git a/stream/classes/io/onnx/reduce_1d.py b/stream/classes/io/onnx/reduce_1d.py new file mode 100644 index 0000000..6573ed4 --- /dev/null +++ b/stream/classes/io/onnx/reduce_1d.py @@ -0,0 +1,40 @@ +from typing import Any + +from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser + + +class Reduce1DParser(OnnxComputeOperatorParser): + """Parses an operator that reduces the data in a single dimension. + e.g. sum over one row or max of a single row + """ + + def get_layer_node_user_format(self, input_shape: list[int], output_shape: list[int]): + """ + Generate the necessary dictionary items required for the LayerNode creation. + """ + # TODO check the output shape as well? + assert len(self.get_node_predecessors()) == 1 + + data: dict[str, Any] = {} + data["id"] = self.node_id + data["name"] = self.node.name + data["operator_type"] = self.node.op_type + data["operand_source"] = self.get_operand_source_input_format() + data["operand_precision"] = self.get_operand_precision_input_format() + data["dimension_relations"] = [] + data["loop_sizes"] = input_shape + + match len(input_shape): + case 2: + data["equation"] = "O[k]+=I[k][c]*W[]" + data["loop_dims"] = ["K", "C"] + case 3: + data["equation"] = "O[b][k]+=I[b][k][c]*W[]" + data["loop_dims"] = ["B", "K", "C"] + case 4: + data["equation"] = "O[b][h][k]+=I[b][h][k][c]*W[]" + data["loop_dims"] = ["B", "H", "K", "C"] + case _: + raise NotImplementedError + + return data diff --git a/stream/classes/io/onnx/reshape.py b/stream/classes/io/onnx/reshape.py index 64de6fc..0f6e67b 100644 --- a/stream/classes/io/onnx/reshape.py +++ b/stream/classes/io/onnx/reshape.py @@ -7,9 +7,6 @@ class ReshapeParser(OnnxOperatorParser): """Parses an onnx reshape operator into a ReshapeNode.""" - def run(self): - return self.generate_node() - def generate_node(self): predecessors = self.get_node_predecessors() assert len(predecessors) == 1, "An ONNX reshape node with multiple input nodes is not supported" diff --git a/stream/classes/io/onnx/simd.py b/stream/classes/io/onnx/simd.py index 36465d1..7929544 100644 --- a/stream/classes/io/onnx/simd.py +++ b/stream/classes/io/onnx/simd.py @@ -1,29 +1,19 @@ from typing import Any -from zigzag.parser.onnx.utils import ( - get_node_input_output_dimension_shapes, -) -from zigzag.parser.workload_factory import LayerNodeFactory +from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser -from stream.classes.io.onnx.operator_parser import OnnxOperatorParser -from stream.classes.workload.simd_node import SimdNode - -class SimdParser(OnnxOperatorParser): - """Parses an ONNXOperatorParser operator representing an elementwise operation (simd) into a SimdNode. +class SimdParser(OnnxComputeOperatorParser): + """Parses an ONNX operator representing an elementwise operation (simd) into a ComputationNode. e.g. Add, etc. """ - def run(self): - return self.generate_node() - - def get_layer_node_input_format(self, ia_shape: list[int], oa_shape: list[int]): + def get_layer_node_user_format(self, input_shape: list[int], output_shape: list[int]): """ Generate the necessary dictionary items required for the LayerNode creation. """ - assert ia_shape == oa_shape, "Input and output of simd operation should be identical." + assert input_shape == output_shape, "Input and output of simd operation should be identical." predecessors = self.get_node_predecessors() - assert len(predecessors) > 0, "Undefined behavior for Simd node with no inputs" # Nodes with only 1 input (e.g. Relu, Max, add/mul with constant, etc) have an empty `W` part in equation has_single_input = len(predecessors) == 1 @@ -31,10 +21,12 @@ def get_layer_node_input_format(self, ia_shape: list[int], oa_shape: list[int]): data["id"] = self.node_id data["name"] = self.node.name data["operator_type"] = self.node.op_type - data["loop_sizes"] = oa_shape + data["operand_source"] = self.get_operand_source_input_format() + data["operand_precision"] = self.get_operand_precision_input_format() data["dimension_relations"] = [] + data["loop_sizes"] = output_shape - match len(oa_shape): + match len(output_shape): case 1: data["equation"] = f"O[k]+=I[k]*W{'[]' if has_single_input else '[k]'}" data["loop_dims"] = ["K"] @@ -50,55 +42,4 @@ def get_layer_node_input_format(self, ia_shape: list[int], oa_shape: list[int]): case _: raise NotImplementedError - act_precision = self.get_activation_precision() - weight_precision = self.get_weight_precision() - intermediate_output_precision = self.get_intermediate_output_precision() - match len(predecessors): - case 1: - # One source operand, one constant - data["operand_source"] = {"W": self.node_id, "I": predecessors[0]} - data["operand_precision"] = { - "W": weight_precision, - "I": act_precision, - "O_final": act_precision, - "O": intermediate_output_precision, - } - case 2: - # Two source operands, none are constant (W and I can be swapped) - data["operand_source"] = {"W": predecessors[0], "I": predecessors[1]} - data["operand_precision"] = { - "W": act_precision, - "I": act_precision, - "O_final": act_precision, - "O": intermediate_output_precision, - } - - case _: - raise ValueError("No more than 2 layer predecessors expected") - return data - - def generate_node(self): - # Get the input and output activation shapes - ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) - - node_data = self.get_layer_node_input_format(ia_dimension_shape, oa_dimension_shape) - node_factory = LayerNodeFactory(node_data, self.mapping_data) - node_attrs = node_factory.create_node_attr() - - # Override spatial mapping by the one defined in the core's dataflows - core_allocation = node_attrs.core_allocation - spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) - node_attrs.spatial_mapping = spatial_mapping - - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - - return SimdNode( - node_id=self.node_id, - node_name=self.node.name, - node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, - op_type=self.node.op_type, - ) diff --git a/stream/classes/io/onnx/softmax.py b/stream/classes/io/onnx/softmax.py index b19cfdf..b8bcfb4 100644 --- a/stream/classes/io/onnx/softmax.py +++ b/stream/classes/io/onnx/softmax.py @@ -1,103 +1,97 @@ -from typing import Any, Iterator - -from zigzag.parser.onnx.utils import ( - get_node_input_output_dimension_shapes, -) -from zigzag.parser.workload_factory import LayerNodeFactory - -from stream.classes.io.onnx.operator_parser import OnnxOperatorParser -from stream.classes.workload.computation_node import ComputationNode -from stream.classes.workload.simd_node import SimdNode - - -class SoftmaxParser(OnnxOperatorParser): - """Parses the Softmax operator""" - - def run(self) -> Iterator[ComputationNode]: - return self.generate_node() - - def get_layer_node_input_format(self, ia_shape: list[int], oa_shape: list[int]): +from typing import Any + +from zigzag.datatypes import Constants + +from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser +from stream.classes.io.onnx.reduce_1d import Reduce1DParser + + +class SoftmaxParser(OnnxComputeOperatorParser): + """Parses the Softmax operator. Softmax works on full rows and can be computed as follows: + (1) m <- max(row[0:L]) + (2) e[0:L] <- exp(row[0:L] - m) + (3) s <- sum(e[0:L]) + (4) r[0:L] <- e[0:L] / s + It is split up in four distinct computation nodes. + """ + + def run(self): + for node in self.get_nodes(): + yield node + + def get_nodes(self): + node_types = ["max", "exp", "sum", "div"] + parser_classes = [Reduce1DParser, SoftmaxExpParser, Reduce1DParser, SoftmaxDivParser] + + node_ids = [self.node_id + i for i in range(4)] + parsers = [ + parser( + node_id=node_id, + node=self.node, + nodes_outputs=self.nodes_outputs, # TODO now, the node_outputs does not contain the current id + onnx_model=self.onnx_model, + mapping_data=self.mapping_data, + accelerator=self.accelerator, + ) + for parser, node_id in zip(parser_classes, node_ids) + ] + nodes = [next(parser.run()) for parser in parsers] + + # Give correct op type and name + for node, node_type in zip(nodes, node_types): + node.type = node_type + node.name += f"-{node_type}" + + # Override dependencies + op_I = Constants.LAYER_OP_I + op_W = Constants.LAYER_OP_W + id_max, id_exp, id_sum, _ = node_ids + prev_node_id = nodes[0].input_operand_source[op_I] # Node before max + nodes[1].input_operand_source = {op_I: prev_node_id, op_W: id_max} # Exp + nodes[2].input_operand_source = {op_I: id_exp, op_W: id_sum} # Sum + nodes[3].input_operand_source = {op_I: id_exp, op_W: id_sum} # Div + + return nodes + + +class SoftmaxExpParser(OnnxComputeOperatorParser): + """Parses a softmax node into a ComputationNode for the element-wise operation exp(row-m) where m is the max value + of the row. + """ + + def get_layer_node_user_format(self, input_shape: list[int], output_shape: list[int]): """ Generate the necessary dictionary items required for the LayerNode creation. """ - assert ia_shape == oa_shape, "Input and output of simd operation should be identical." - predecessors = self.get_node_predecessors() - assert len(predecessors) > 0, "Undefined behavior for Simd node with no inputs" - # Nodes with only 1 input (e.g. Relu, Max, add/mul with constant, etc) have an empty `W` part in equation - has_single_input = len(predecessors) == 1 data: dict[str, Any] = {} data["id"] = self.node_id data["name"] = self.node.name data["operator_type"] = self.node.op_type - data["loop_sizes"] = oa_shape + data["operand_source"] = self.get_operand_source_input_format() + data["operand_precision"] = self.get_operand_precision_input_format() data["dimension_relations"] = [] + data["loop_sizes"] = input_shape - match len(oa_shape): - case 1: - data["equation"] = f"O[k]+=I[k]*W{'[]' if has_single_input else '[k]'}" - data["loop_dims"] = ["K"] + # C is the row dimension, so W should not have this as there is only 1 max value for each row + match len(input_shape): case 2: - data["equation"] = f"O[d][k]+=I[d][k]*W{'[]' if has_single_input else '[d][k]'}" - data["loop_dims"] = ["D", "K"] + data["equation"] = "O[k][c]+=I[k][c]+W[k]" + data["loop_dims"] = ["K", "C"] case 3: - data["equation"] = f"O[b][d][k]+=I[b][d][k]*W{'[]' if has_single_input else '[b][d][k]'}" - data["loop_dims"] = ["B", "D", "k"] + data["equation"] = "O[b][k][c]+=I[b][k][c]+W[b][k]" + data["loop_dims"] = ["B", "C", "k"] case 4: - data["equation"] = f"O[b][h][d][k]+=I[b][h][d][k]*W{'[]' if has_single_input else '[b][h][d][k]'}" - data["loop_dims"] = ["B", "H", "D", "k"] + data["equation"] = "O[b][h][k][c]+=I[b][h][k][c]+W[b][h][k]" + data["loop_dims"] = ["B", "H", "C", "k"] case _: raise NotImplementedError - act_precision = self.get_activation_precision() - weight_precision = self.get_weight_precision() - intermediate_output_precision = self.get_intermediate_output_precision() - match len(predecessors): - case 1: - # One source operand, one constant - data["operand_source"] = {"W": self.node_id, "I": predecessors[0]} - data["operand_precision"] = { - "W": weight_precision, - "I": act_precision, - "O_final": act_precision, - "O": intermediate_output_precision, - } - case 2: - # Two source operands, none are constant (W and I can be swapped) - data["operand_source"] = {"W": predecessors[0], "I": predecessors[1]} - data["operand_precision"] = { - "W": act_precision, - "I": act_precision, - "O_final": act_precision, - "O": intermediate_output_precision, - } - - case _: - raise ValueError("No more than 2 layer predecessors expected") - return data - def generate_node(self): - # Get the input and output activation shapes - ia_dimension_shape, oa_dimension_shape = get_node_input_output_dimension_shapes(self.node, self.onnx_model) - - node_data = self.get_layer_node_input_format(ia_dimension_shape, oa_dimension_shape) - node_factory = LayerNodeFactory(node_data, self.mapping_data) - node_attrs = node_factory.create_node_attr() - - # Override spatial mapping by the one defined in the core's dataflows - core_allocation = node_attrs.core_allocation - spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) - node_attrs.spatial_mapping = spatial_mapping - - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - - return SimdNode( - node_id=self.node_id, - node_name=self.node.name, - node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, - op_type=self.node.op_type, - ) + +class SoftmaxDivParser(SoftmaxExpParser): + """Parses a softmax node into a ComputationNode for the element-wise operation div(row, s) where s is the sum value + of the row. + The equation is identical to the one from SoftmaxExpParser + """ diff --git a/stream/classes/io/onnx/transpose.py b/stream/classes/io/onnx/transpose.py index 6e19c4f..f1d2d15 100644 --- a/stream/classes/io/onnx/transpose.py +++ b/stream/classes/io/onnx/transpose.py @@ -5,9 +5,6 @@ class TransposeParser(OnnxOperatorParser): """Parses an onnx reshape operator into a TransposeNode.""" - def run(self): - return self.generate_layer_node_for_transpose() - def generate_layer_node_for_transpose(self): predecessors = self.get_node_predecessors() assert len(predecessors) == 1, "An ONNX transpose node with multiple input nodes is not supported" diff --git a/stream/classes/workload/node.py b/stream/classes/workload/node.py index 524448f..38a3ce3 100644 --- a/stream/classes/workload/node.py +++ b/stream/classes/workload/node.py @@ -139,3 +139,6 @@ def has_end(self) -> bool: def set_offchip_bandwidth(self, offchip_bw: float): self.offchip_bw = offchip_bw + + def __repr__(self): + return self.name diff --git a/stream/classes/workload/simd_node.py b/stream/classes/workload/simd_node.py index 4422395..8943694 100644 --- a/stream/classes/workload/simd_node.py +++ b/stream/classes/workload/simd_node.py @@ -1,23 +1,23 @@ -from zigzag.workload.layer_node import LayerNodeAttributes +# from zigzag.workload.layer_node import LayerNodeAttributes -from stream.classes.workload.computation_node import ComputationNode +# from stream.classes.workload.computation_node import ComputationNode -class SimdNode(ComputationNode): - def __init__( - self, - node_id: int, - node_name: str, - node_attr: LayerNodeAttributes, - input_names: list[str], - output_names: list[str], - op_type: str, - ): - super().__init__( - node_id=node_id, - node_name=node_name, - node_attr=node_attr, - input_names=input_names, - output_names=output_names, - op_type=op_type, - ) +# class SimdNode(ComputationNode): +# def __init__( +# self, +# node_id: int, +# node_name: str, +# node_attr: LayerNodeAttributes, +# input_names: list[str], +# output_names: list[str], +# op_type: str, +# ): +# super().__init__( +# node_id=node_id, +# node_name=node_name, +# node_attr=node_attr, +# input_names=input_names, +# output_names=output_names, +# op_type=op_type, +# ) From c05564d513dadf28c3c8f85f29960b3ff1e50ee5 Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Thu, 5 Sep 2024 09:08:31 +0200 Subject: [PATCH 3/7] parse softmax as 4 CNs --- main_stream.py | 1 + stream/classes/io/onnx/lpnormalization.py | 1 + stream/classes/io/onnx/model.py | 10 +-- stream/classes/io/onnx/softmax.py | 66 +++++++++++++++---- stream/classes/io/onnx/transpose.py | 2 +- .../classes/stages/DetermineHintLoopsStage.py | 5 +- .../stages/GenerateCNWorkloadHybridStage.py | 29 +++++--- .../classes/stages/IntraCoreMappingStage.py | 1 - stream/classes/workload/dummy_node.py | 4 +- stream/classes/workload/node.py | 20 +++--- stream/classes/workload/onnx_workload.py | 3 +- 11 files changed, 97 insertions(+), 45 deletions(-) diff --git a/main_stream.py b/main_stream.py index a688b0b..5365f24 100644 --- a/main_stream.py +++ b/main_stream.py @@ -25,6 +25,7 @@ ################################INPUTS################################ accelerator = "stream/inputs/examples/hardware/tpu_like_quad_core.yaml" workload_path = "stream/inputs/examples/workload/resnet18.onnx" +workload_path = "../transformer-dse/outputs/onnx/OPT-125M_B=8_SINGLELAYER_W4A16_prefill.onnx" mapping_path = "stream/inputs/examples/mapping/tpu_like_quad_core.yaml" CN_define_mode = 1 # manually define outer-CN loops hint_loops = [("OY", "all")] diff --git a/stream/classes/io/onnx/lpnormalization.py b/stream/classes/io/onnx/lpnormalization.py index 24a9863..5a0cf5a 100644 --- a/stream/classes/io/onnx/lpnormalization.py +++ b/stream/classes/io/onnx/lpnormalization.py @@ -12,6 +12,7 @@ def __init__(self, node_id, node, nodes_outputs, mapping, onnx_model) -> None: def generate_node(self): # Get the predecessors of this node + # TODO use superclass' `get_node_predecessors` predecessors = [] for node_input in self.node.input: for n in self.nodes_outputs: diff --git a/stream/classes/io/onnx/model.py b/stream/classes/io/onnx/model.py index 390f847..7552402 100644 --- a/stream/classes/io/onnx/model.py +++ b/stream/classes/io/onnx/model.py @@ -18,6 +18,7 @@ from stream.classes.io.onnx.pooling import PoolingParser from stream.classes.io.onnx.reshape import ReshapeParser from stream.classes.io.onnx.simd import SimdParser +from stream.classes.io.onnx.softmax import SoftmaxParser from stream.classes.io.onnx.transpose import TransposeParser from stream.classes.workload.onnx_workload import ONNXWorkload @@ -37,13 +38,14 @@ class ONNXModelParser: "AveragePool": PoolingParser, "GlobalMaxPool": PoolingParser, "GlobalAveragePool": PoolingParser, - "Reshape": ReshapeParser, - "Flatten": FlattenParser, - "Gather": GatherParser, "Add": SimdParser, "Mul": SimdParser, - "Transpose": TransposeParser, + "Softmax": SoftmaxParser, "LpNormalization": LpNormalizationParser, + "Gather": GatherParser, + "Transpose": TransposeParser, + "Reshape": ReshapeParser, + "Flatten": FlattenParser, "Concat": ConcatParser, } diff --git a/stream/classes/io/onnx/softmax.py b/stream/classes/io/onnx/softmax.py index b8bcfb4..34a8d2d 100644 --- a/stream/classes/io/onnx/softmax.py +++ b/stream/classes/io/onnx/softmax.py @@ -15,12 +15,20 @@ class SoftmaxParser(OnnxComputeOperatorParser): It is split up in four distinct computation nodes. """ + NODE_TYPES = ["max", "exp", "sum", "div"] + def run(self): for node in self.get_nodes(): yield node - def get_nodes(self): - node_types = ["max", "exp", "sum", "div"] + def get_layer_node_user_format(self, input_shape: list[int], output_shape: list[int]) -> dict[str, Any]: + """Not used for this class, but abstract base class requires instantiation anyway""" + ... + + def parse_into_subnodes(self): + """Prase the base ONNX node multiple times into the different Computation Nodes. + The CNs that result from this operation have some incorrect properties regarding the graph structure + """ parser_classes = [Reduce1DParser, SoftmaxExpParser, Reduce1DParser, SoftmaxDivParser] node_ids = [self.node_id + i for i in range(4)] @@ -35,23 +43,55 @@ def get_nodes(self): ) for parser, node_id in zip(parser_classes, node_ids) ] - nodes = [next(parser.run()) for parser in parsers] + self.nodes = tuple(next(parser.run()) for parser in parsers) + def get_nodes(self): + # Parse initial CNs + self.parse_into_subnodes() # Give correct op type and name - for node, node_type in zip(nodes, node_types): + self.set_nodes_name_and_type() + # Override dependencies + self.correct_nodes_operand_source() + # self.correct_nodes_inputs_outputs() + + return self.nodes + + def set_nodes_name_and_type(self): + """Set the name and operator type of all Computation Nodes that stem from the base ONNX node""" + for node, node_type in zip(self.nodes, SoftmaxParser.NODE_TYPES): node.type = node_type - node.name += f"-{node_type}" + node.name += f"-{node_type}/" - # Override dependencies + def correct_nodes_operand_source(self): + """Correct the `input_operand_source` and `constant_operands` of all Computation Nodes that stem from the base + ONNX node""" op_I = Constants.LAYER_OP_I op_W = Constants.LAYER_OP_W - id_max, id_exp, id_sum, _ = node_ids - prev_node_id = nodes[0].input_operand_source[op_I] # Node before max - nodes[1].input_operand_source = {op_I: prev_node_id, op_W: id_max} # Exp - nodes[2].input_operand_source = {op_I: id_exp, op_W: id_sum} # Sum - nodes[3].input_operand_source = {op_I: id_exp, op_W: id_sum} # Div - - return nodes + node_max, node_exp, node_sum, node_div = self.nodes + id_max, id_exp, id_sum, _ = [node.id for node in self.nodes] + prev_node_id = node_max.input_operand_source[op_I] # Node before Softmax + + # Default after generation: input_operand_source = {op_I: prev_node_id} and constant_operands = [W] + node_exp.input_operand_source = {op_I: prev_node_id, op_W: id_max} + node_exp.constant_operands = [] + node_sum.input_operand_source = {op_I: id_exp} + node_div.input_operand_source = {op_I: id_exp, op_W: id_sum} + node_div.constant_operands = [] + + def correct_nodes_inputs_outputs(self): + """Correct the `node_inputs` and `node_outputs` of all Computation Nodes that stem from the base + ONNX node""" + node_max, node_exp, node_sum, node_div = self.nodes + prev_node_name = node_max.input_names[0] # Node before Softmax + next_node_name = node_max.output_names[0] # Node after Softmax + + node_max.output_names = [node_exp.name] + node_exp.input_names = [node_max.name, prev_node_name] + node_exp.output_names = [node_div.name, node_sum.name] + node_sum.input_names = [node_exp.name] + node_sum.output_names = [node_div.name] + node_div.input_names = [node_exp.name, node_sum.name] + node_div.output_names = [next_node_name] class SoftmaxExpParser(OnnxComputeOperatorParser): diff --git a/stream/classes/io/onnx/transpose.py b/stream/classes/io/onnx/transpose.py index f1d2d15..1f32097 100644 --- a/stream/classes/io/onnx/transpose.py +++ b/stream/classes/io/onnx/transpose.py @@ -5,7 +5,7 @@ class TransposeParser(OnnxOperatorParser): """Parses an onnx reshape operator into a TransposeNode.""" - def generate_layer_node_for_transpose(self): + def generate_node(self): predecessors = self.get_node_predecessors() assert len(predecessors) == 1, "An ONNX transpose node with multiple input nodes is not supported" predecessor = predecessors.pop() diff --git a/stream/classes/stages/DetermineHintLoopsStage.py b/stream/classes/stages/DetermineHintLoopsStage.py index 861806e..dc6ca3e 100644 --- a/stream/classes/stages/DetermineHintLoopsStage.py +++ b/stream/classes/stages/DetermineHintLoopsStage.py @@ -1,7 +1,7 @@ import logging import numpy as np -from onnx import helper, numpy_helper +from onnx import ModelProto, helper, numpy_helper from zigzag.stages.Stage import Stage from stream.classes.workload.computation_node import ComputationNode @@ -70,7 +70,7 @@ def get_nb_computation_nodes(self, stack): return nb_computation_nodes @staticmethod - def split_operator(model, node_name, num_splits): + def split_operator(model: ModelProto, node_name: str, num_splits: int): """ Replaces an ONNX Conv or Gemm operator in an ONNX model with a sequence of Conv operators with smaller kernel sizes @@ -117,6 +117,7 @@ def split_operator(model, node_name, num_splits): # Get the shape of the weight of the operator weight_input_shape = None + assert original_node is not None original_weight_name = original_node.input[node_weight_input_idx] for original_weight in graph.initializer: if original_weight.name == original_weight_name: diff --git a/stream/classes/stages/GenerateCNWorkloadHybridStage.py b/stream/classes/stages/GenerateCNWorkloadHybridStage.py index c1d5a4f..951c666 100644 --- a/stream/classes/stages/GenerateCNWorkloadHybridStage.py +++ b/stream/classes/stages/GenerateCNWorkloadHybridStage.py @@ -310,8 +310,8 @@ def get_finer_nodes( # Create the computation node object with the computed ranges of the loop dimensions node_name = original_node.name - node_input_names = original_node.input_names - node_output_names = original_node.output_names + node_input_names = None # original_node.input_names # TODO restore + node_output_names = None # original_node.output_names # If all the output irrelevant loops are at a max, this is producing a final output, so set a flag original_node_output_ir_dims = original_node.loop_relevancy_info.get_ir_layer_dims( Constants.OUTPUT_LAYER_OP @@ -429,10 +429,21 @@ def bounding_box_generator( bounds = self.get_bounding_box_dimensions(producer, consumer, dimensions, inclusive_ranges) yield (i, bounds, None) - def get_nb_input_dimensions(self, node: ComputationNode): - """Return the number of input dimensions this node has. We take the first non-constant input operand.""" - input_operand = list(set(node.input_operands) - set(node.constant_operands))[0] - dims = node.operand_dimensionality_order[input_operand] + def get_nb_input_dimensions(self, node: ComputationNode, operand: LayerOperand): + """Return the number of input dimensions this node has. + # We take the first non-constant input operand.""" + dims = node.operand_dimensionality_order[operand] + + # try: + # input_operand = ( + # Constants.LAYER_OP_I if Constants.LAYER_OP_I not in node.constant_operands else Constants.LAYER_OP_W + # ) + # dims = node.operand_dimensionality_order[Constants.LAYER_OP_I] + # except KeyError: + # # This is dead code since input operands can only be I or W + # input_operand = list(set(node.input_operands) - set(node.constant_operands))[0] + # dims = node.operand_dimensionality_order[input_operand] + if LayerDim("G") in dims and (LayerDim("C") in dims or LayerDim("K") in dims): # because later the generator will merge them into a single channel dim return len(dims) - 1 @@ -447,7 +458,7 @@ def build_rtree( """ props = index.Property() # We assume all nodes in 'nodes' have identical dimensions - props.dimension = self.get_nb_input_dimensions(nodes[0]) + props.dimension = self.get_nb_input_dimensions(nodes[0], operand) rtree = index.Index(self.bounding_box_generator(producer, consumer, nodes, operand), properties=props) return rtree @@ -523,10 +534,10 @@ def get_inter_edges_rtree( dependent_input_operands: list[LayerOperand] = [] for operand, parent_node_id in consumer.input_operand_source.items(): parent_node = self.workload.get_node_with_id(parent_node_id) - assert isinstance(parent_node, Node) if parent_node == producer: dependent_input_operands.append(operand) - elif parent_node: + elif not isinstance(parent_node, ComputationNode): + # Propagate to the first parent CN non_dummy_parents = self.get_non_type_predecessors(parent_node, [DummyNode]) if producer in non_dummy_parents: dependent_input_operands.append(operand) diff --git a/stream/classes/stages/IntraCoreMappingStage.py b/stream/classes/stages/IntraCoreMappingStage.py index 306495b..8efcb7d 100644 --- a/stream/classes/stages/IntraCoreMappingStage.py +++ b/stream/classes/stages/IntraCoreMappingStage.py @@ -133,7 +133,6 @@ def run(self): cme = self.node_hw_performances[equal_node][equal_core] self.node_hw_performances[node][core] = cme self.save_node_hw_performances() - # else: except StopIteration or KeyError: # Compute this (node, core) combination's optimal mapping # Set the node's core allocation to the core_id we want to extract hw performance for diff --git a/stream/classes/workload/dummy_node.py b/stream/classes/workload/dummy_node.py index 3e63111..de5afb7 100644 --- a/stream/classes/workload/dummy_node.py +++ b/stream/classes/workload/dummy_node.py @@ -33,6 +33,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, + input_names=None, # input_names, + output_names=None, # output_names, ) diff --git a/stream/classes/workload/node.py b/stream/classes/workload/node.py index 38a3ce3..36c601c 100644 --- a/stream/classes/workload/node.py +++ b/stream/classes/workload/node.py @@ -29,7 +29,7 @@ def __init__( runtime (int): The runtime of this Node. possible_core_allocation (int): The core id on which this Node can be mapped. inputs: (List[str]): The names of the input tensors of this node - outpus: (List[str]): The names of the output tensors of this node. + outputs: (List[str]): The names of the output tensors of this node. chosen_core_allocation: The final core allocation of this node """ super().__init__(node_id, node_name) @@ -49,12 +49,10 @@ def __init__( # number of data (in bits) only this node produces (not produced by any other node) self.data_produced_unique = 0 - self.input_names = input_names - self.output_names = output_names - self.offchip_bw = None # will be set together with the core allocation - - def __str__(self): - return f"{self.type.capitalize()}Node()" + # self.input_names = input_names # TODO restore + # self.output_names = output_names + # will be set together with the core allocation + self.offchip_bw = None def get_total_energy(self) -> float: """Get the total energy of running this node, including off-chip energy.""" @@ -80,9 +78,6 @@ def get_end(self): """Get the end time in cycles of this node.""" return self.end - # def get_possible_core_allocation(self): - # return self.possible_core_allocation - def set_onchip_energy(self, energy: float): """Set the on-chip energy of running this node. @@ -108,7 +103,7 @@ def set_runtime(self, runtime: int): self.runtime = runtime def set_start(self, start: int): - """Set the start time in cyles of this node. + """Set the start time in cycles of this node. Args: start (int): start time in cycles @@ -140,5 +135,8 @@ def has_end(self) -> bool: def set_offchip_bandwidth(self, offchip_bw: float): self.offchip_bw = offchip_bw + def __str__(self): + return self.name + def __repr__(self): return self.name diff --git a/stream/classes/workload/onnx_workload.py b/stream/classes/workload/onnx_workload.py index 7830bbe..83c38be 100644 --- a/stream/classes/workload/onnx_workload.py +++ b/stream/classes/workload/onnx_workload.py @@ -24,10 +24,9 @@ def add(self, node_id: int, node_obj: Node): self.add_node(node_obj) edges: list[tuple[Node, Node]] = [] - for op, parent_id in node_obj.input_operand_source.items(): + for parent_id in node_obj.input_operand_source.values(): parent_node_obj = self.node_id_to_obj[parent_id] edges.append((parent_node_obj, node_obj)) - node_obj.input_operand_source[op] = parent_id self.add_edges_from(edges) From 9935ef7f49df677c73deff88a113412c9dfb0072 Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Thu, 5 Sep 2024 11:58:34 +0200 Subject: [PATCH 4/7] remove input_names and output_names attribute --- stream/classes/io/onnx/concat.py | 5 ---- stream/classes/io/onnx/conv.py | 6 ---- stream/classes/io/onnx/default.py | 4 --- stream/classes/io/onnx/elementwise.py | 8 ++++-- stream/classes/io/onnx/flatten.py | 4 --- stream/classes/io/onnx/gather.py | 5 ---- stream/classes/io/onnx/lpnormalization.py | 10 +++---- stream/classes/io/onnx/operator_parser.py | 5 ---- stream/classes/io/onnx/pooling.py | 6 ---- stream/classes/io/onnx/reshape.py | 4 --- stream/classes/io/onnx/softmax.py | 28 +++++++++---------- stream/classes/io/onnx/transpose.py | 4 --- .../stages/GenerateCNWorkloadHybridStage.py | 4 --- stream/classes/stages/LayerSplittingStage.py | 26 +++++++---------- stream/classes/workload/communication_node.py | 4 +-- stream/classes/workload/computation_node.py | 4 --- stream/classes/workload/concat_node.py | 6 ---- stream/classes/workload/dnn_workload.py | 17 +---------- stream/classes/workload/dummy_node.py | 4 --- stream/classes/workload/elementwise_node.py | 7 +++-- stream/classes/workload/flatten_node.py | 4 --- stream/classes/workload/gather_node.py | 6 ---- .../classes/workload/lpnormalization_node.py | 9 +++--- stream/classes/workload/node.py | 4 --- stream/classes/workload/pooling_node.py | 5 ---- stream/classes/workload/reshape_node.py | 10 ++----- stream/classes/workload/simd_node.py | 23 --------------- stream/classes/workload/transpose_node.py | 12 -------- 28 files changed, 46 insertions(+), 188 deletions(-) delete mode 100644 stream/classes/workload/simd_node.py diff --git a/stream/classes/io/onnx/concat.py b/stream/classes/io/onnx/concat.py index 26c6b78..2810b8a 100644 --- a/stream/classes/io/onnx/concat.py +++ b/stream/classes/io/onnx/concat.py @@ -11,7 +11,6 @@ def generate_node(self): predecessors = self.get_node_predecessors() axis = self.get_axis_value() - output_names = [self.node.output[0]] input_1, input_2 = self.node.input[0], self.node.input[1] @@ -22,7 +21,6 @@ def generate_node(self): constant_shape = tuple(constant_tensor.shape) variable_input_first = True - input_names = [input_2] except ValueError: # Try second one as constant input constant_tensor = get_onnx_tensor_type(input_2, self.onnx_model) if constant_tensor.category != OnnxTensorCategory.HIDDEN or "constant" not in input_2.lower(): @@ -30,7 +28,6 @@ def generate_node(self): constant_shape = tuple(constant_tensor.shape) variable_input_first = True - input_names = [input_1] return ConcatNode( node_id=self.node_id, @@ -39,8 +36,6 @@ def generate_node(self): axis=axis, constant_shape=constant_shape, variable_input_first=variable_input_first, - input_names=input_names, - output_names=output_names, ) def get_axis_value(self): diff --git a/stream/classes/io/onnx/conv.py b/stream/classes/io/onnx/conv.py index 0015a0a..d87b0b3 100644 --- a/stream/classes/io/onnx/conv.py +++ b/stream/classes/io/onnx/conv.py @@ -139,16 +139,10 @@ def generate_node(self): spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) node_attrs.spatial_mapping = spatial_mapping - # Get the node's input(s) and output(s) tensor names - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - return ComputationNode( node_id=self.node_id, node_name=self.node.name, node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, op_type=ConvParser.OP_TYPE, operand_tensor_reshape=None, ) diff --git a/stream/classes/io/onnx/default.py b/stream/classes/io/onnx/default.py index 99b85a0..d9cfcdc 100644 --- a/stream/classes/io/onnx/default.py +++ b/stream/classes/io/onnx/default.py @@ -7,14 +7,10 @@ class DefaultNodeParser(OnnxOperatorParser): def generate_node(self): predecessors = self.get_node_predecessors() - input_names = list(self.node.input) - output_names = list(self.node.output) return DummyNode( node_id=self.node_id, node_name=self.node.name, predecessors=predecessors, - input_names=input_names, - output_names=output_names, op_type=self.node.op_type.lower(), ) diff --git a/stream/classes/io/onnx/elementwise.py b/stream/classes/io/onnx/elementwise.py index 47f0b91..147857c 100644 --- a/stream/classes/io/onnx/elementwise.py +++ b/stream/classes/io/onnx/elementwise.py @@ -23,8 +23,10 @@ def generate_node(self): # Get the names of the two inputs assert len(self.node.input) == 2, f"Elementwise node has more than two inputs: {self.node.input}" - input_names = [self.node.input[0], self.node.input[1]] # Get the output name - output_names = [self.node.output[0]] - node_obj = ElementwiseNode(self.type, self.name, predecessors, input_names, output_names) + node_obj = ElementwiseNode( + node_id=self.node_id, + node_name=self.name, + predecessor=predecessors, + ) return node_obj diff --git a/stream/classes/io/onnx/flatten.py b/stream/classes/io/onnx/flatten.py index 0eee715..8a6cd3a 100644 --- a/stream/classes/io/onnx/flatten.py +++ b/stream/classes/io/onnx/flatten.py @@ -15,13 +15,9 @@ def generate_node(self): attrs = self.node.attribute # Get the axis which indicates how to flatten the input tensor axis: int | None = get_attribute_ints_with_name("axis", attrs, default=None) # type: ignore - input_names = [self.node.input[0]] - output_names = [self.node.output[0]] return FlattenNode( node_id=self.node_id, node_name=self.node.name, predecessor=predecessor, axis=axis, - input_names=input_names, - output_names=output_names, ) diff --git a/stream/classes/io/onnx/gather.py b/stream/classes/io/onnx/gather.py index a2e6f89..ac48e76 100644 --- a/stream/classes/io/onnx/gather.py +++ b/stream/classes/io/onnx/gather.py @@ -12,17 +12,12 @@ def generate_node(self): axis = self.get_axis_value() indices = self.get_indices_value() - input_names = [self.node.input[0]] - output_names = [self.node.output[0]] - return GatherNode( node_id=self.node_id, node_name=self.node.name, predecessors=predecessors, gather_axis=axis, gather_indices=indices, - input_names=input_names, - output_names=output_names, ) def get_indices_value(self): diff --git a/stream/classes/io/onnx/lpnormalization.py b/stream/classes/io/onnx/lpnormalization.py index 5a0cf5a..4d6dbc4 100644 --- a/stream/classes/io/onnx/lpnormalization.py +++ b/stream/classes/io/onnx/lpnormalization.py @@ -19,9 +19,9 @@ def generate_node(self): if node_input in self.nodes_outputs[n]: predecessors.append(n) - # Get the input names of the operator - input_names = [self.node.input[0]] - # Get the output names of the operator - output_names = [self.node.output[0]] - node_obj = LpNormalizationNode(predecessors, input_names, output_names) + node_obj = LpNormalizationNode( + node_id=self.node_id, + node_name=self.node_name, + predecessor=self.predecessor, + ) return node_obj diff --git a/stream/classes/io/onnx/operator_parser.py b/stream/classes/io/onnx/operator_parser.py index 179876d..f601d81 100644 --- a/stream/classes/io/onnx/operator_parser.py +++ b/stream/classes/io/onnx/operator_parser.py @@ -97,14 +97,9 @@ def generate_node(self): spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) node_attrs.spatial_mapping = spatial_mapping - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - return ComputationNode( node_id=self.node_id, node_name=self.node.name, node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, op_type=self.node.op_type, ) diff --git a/stream/classes/io/onnx/pooling.py b/stream/classes/io/onnx/pooling.py index 3bec703..bbf1c4d 100644 --- a/stream/classes/io/onnx/pooling.py +++ b/stream/classes/io/onnx/pooling.py @@ -122,14 +122,8 @@ def generate_node(self): spatial_mapping = self.accelerator.get_spatial_mapping_from_core(core_allocation) node_attrs.spatial_mapping = spatial_mapping - # Get the node's input(s) and output(s) tensor names - node_input_names = list(self.node.input) - node_output_names = list(self.node.output) - return PoolingNode( node_id=self.node_id, node_name=self.node.name, node_attr=node_attrs, - input_names=node_input_names, - output_names=node_output_names, ) diff --git a/stream/classes/io/onnx/reshape.py b/stream/classes/io/onnx/reshape.py index 0f6e67b..88f089f 100644 --- a/stream/classes/io/onnx/reshape.py +++ b/stream/classes/io/onnx/reshape.py @@ -14,14 +14,10 @@ def generate_node(self): # The operator shape is saved as the second input, so we need to get the input's dimension shape shape = tuple(get_node_input_output_dimension_shapes(self.node, self.onnx_model)[1]) - input_names = [self.node.input[0]] - output_names = [self.node.output[0]] return ReshapeNode( node_id=self.node_id, node_name=self.node.name, predecessor=predecessor, shape=shape, - input_names=input_names, - output_names=output_names, ) diff --git a/stream/classes/io/onnx/softmax.py b/stream/classes/io/onnx/softmax.py index 34a8d2d..1a429b7 100644 --- a/stream/classes/io/onnx/softmax.py +++ b/stream/classes/io/onnx/softmax.py @@ -78,20 +78,20 @@ def correct_nodes_operand_source(self): node_div.input_operand_source = {op_I: id_exp, op_W: id_sum} node_div.constant_operands = [] - def correct_nodes_inputs_outputs(self): - """Correct the `node_inputs` and `node_outputs` of all Computation Nodes that stem from the base - ONNX node""" - node_max, node_exp, node_sum, node_div = self.nodes - prev_node_name = node_max.input_names[0] # Node before Softmax - next_node_name = node_max.output_names[0] # Node after Softmax - - node_max.output_names = [node_exp.name] - node_exp.input_names = [node_max.name, prev_node_name] - node_exp.output_names = [node_div.name, node_sum.name] - node_sum.input_names = [node_exp.name] - node_sum.output_names = [node_div.name] - node_div.input_names = [node_exp.name, node_sum.name] - node_div.output_names = [next_node_name] + # def correct_nodes_inputs_outputs(self): + # """Correct the `node_inputs` and `node_outputs` of all Computation Nodes that stem from the base + # ONNX node""" + # node_max, node_exp, node_sum, node_div = self.nodes + # prev_node_name = node_max.input_names[0] # Node before Softmax + # next_node_name = node_max.output_names[0] # Node after Softmax + + # node_max.output_names = [node_exp.name] + # node_exp.input_names = [node_max.name, prev_node_name] + # node_exp.output_names = [node_div.name, node_sum.name] + # node_sum.input_names = [node_exp.name] + # node_sum.output_names = [node_div.name] + # node_div.input_names = [node_exp.name, node_sum.name] + # node_div.output_names = [next_node_name] class SoftmaxExpParser(OnnxComputeOperatorParser): diff --git a/stream/classes/io/onnx/transpose.py b/stream/classes/io/onnx/transpose.py index 1f32097..0012ae8 100644 --- a/stream/classes/io/onnx/transpose.py +++ b/stream/classes/io/onnx/transpose.py @@ -11,15 +11,11 @@ def generate_node(self): predecessor = predecessors.pop() permute_axes = self.get_permute_indices() - input_names = [self.node.input[0]] - output_names = [self.node.output[0]] return TransposeNode( node_id=self.node_id, node_name=self.node.name, predecessor=predecessor, - input_names=input_names, - output_names=output_names, permute_axes=permute_axes, ) diff --git a/stream/classes/stages/GenerateCNWorkloadHybridStage.py b/stream/classes/stages/GenerateCNWorkloadHybridStage.py index 951c666..635a8fd 100644 --- a/stream/classes/stages/GenerateCNWorkloadHybridStage.py +++ b/stream/classes/stages/GenerateCNWorkloadHybridStage.py @@ -310,8 +310,6 @@ def get_finer_nodes( # Create the computation node object with the computed ranges of the loop dimensions node_name = original_node.name - node_input_names = None # original_node.input_names # TODO restore - node_output_names = None # original_node.output_names # If all the output irrelevant loops are at a max, this is producing a final output, so set a flag original_node_output_ir_dims = original_node.loop_relevancy_info.get_ir_layer_dims( Constants.OUTPUT_LAYER_OP @@ -326,8 +324,6 @@ def get_finer_nodes( sub_id=n, node_name=node_name, node_attr=finer_node_attrs_copy, - input_names=node_input_names, - output_names=node_output_names, op_type=original_node.type, produces_final_output=produces_final_output, group_id=group_id, diff --git a/stream/classes/stages/LayerSplittingStage.py b/stream/classes/stages/LayerSplittingStage.py index efc5b83..945f3ee 100644 --- a/stream/classes/stages/LayerSplittingStage.py +++ b/stream/classes/stages/LayerSplittingStage.py @@ -94,17 +94,6 @@ def __init__( def run(self): for workload_node in self.workload.node_list: if workload_node.type == "conv" or workload_node.type == "gemm": - try: - corresponding_onnx_operator = next( - (n for n in self.onnx_model.graph.node if n.name == workload_node.name) - ) - except StopIteration: - input_names = workload_node.input_names - corresponding_onnx_operator = next( - (n for n in self.onnx_model.graph.node if n.input == input_names) - ) - operator_name = corresponding_onnx_operator.name - if not ( LayerOperand("W") in workload_node.constant_operands or LayerOperand("B") in workload_node.constant_operands @@ -118,10 +107,15 @@ def run(self): split_factor = self.split_factors[workload_node] if not split_factor > 1: continue - ( - split_node_names, - concat_name, - ) = self.split_operator(self.onnx_model, operator_name, split_factor) + + assert any(n.name == workload_node.name for n in self.onnx_model.graph.node), ( + "Some conv or gemm node has been initialized with a name other than the corresponding " + "ONNX node name" + ) + + split_node_names, concat_name = self.split_operator( + self.onnx_model, workload_node.name, split_factor + ) logger.info( f"Split {workload_node.name} into {split_factor} Conv nodes: {split_node_names} and Concat " @@ -142,7 +136,7 @@ def run(self): yield cme, extra_info @staticmethod - def split_operator(model: ModelProto, node_name: str, num_splits: int): + def split_operator(model: ModelProto, node_name: str, num_splits: int) -> tuple[tuple[str, ...], str]: """ Replaces an ONNX Conv or Gemm operator in an ONNX model with a sequence of Conv operators with smaller kernel sizes diff --git a/stream/classes/workload/communication_node.py b/stream/classes/workload/communication_node.py index 0e57fca..9a5ecf2 100644 --- a/stream/classes/workload/communication_node.py +++ b/stream/classes/workload/communication_node.py @@ -5,7 +5,7 @@ class CommunicationNode(Node): """Class that represents a communcation node which is inserted between two nodes but doesn't have any computational information.""" - def __init__(self, communication_core_id, input_names, output_names) -> None: + def __init__(self, communication_core_id: int) -> None: """Initialize the communication node. This initializes the energy and runtime to 0 and identifies on which core id the communcation bus resides. @@ -19,6 +19,4 @@ def __init__(self, communication_core_id, input_names, output_names) -> None: energy=0, runtime=0, core_allocation=communication_core_id, - input_names=input_names, - output_names=output_names, ) diff --git a/stream/classes/workload/computation_node.py b/stream/classes/workload/computation_node.py index 4ef50bf..49d05c6 100644 --- a/stream/classes/workload/computation_node.py +++ b/stream/classes/workload/computation_node.py @@ -30,8 +30,6 @@ def __init__( node_id: int, node_name: str, node_attr: LayerNodeAttributes, - input_names: list[str], - output_names: list[str], op_type: str = "computation", operand_tensor_reshape: OperandTensorReshape | None = None, produces_final_output: bool = False, @@ -49,8 +47,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=node_attr.core_allocation, - input_names=input_names, - output_names=output_names, ) self.sub_id = sub_id diff --git a/stream/classes/workload/concat_node.py b/stream/classes/workload/concat_node.py index 4154cee..dbf150b 100644 --- a/stream/classes/workload/concat_node.py +++ b/stream/classes/workload/concat_node.py @@ -16,8 +16,6 @@ def __init__( axis: int, constant_shape: tuple[int, ...], variable_input_first: bool, - input_names: list[str], - output_names: list[str], ) -> None: """Initialize the ConcatNode @@ -27,8 +25,6 @@ def __init__( constant_shape: the shape of the constant tensor variable_input_first: Wether the result is `concat(input, constant_tensor)` or `concat(constant_tensor, input)` - input_names The input names of this node. - output_names: The output names of this node. """ Node.__init__( self, @@ -39,8 +35,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) LayerNodeABC.__init__(self, node_id=node_id, node_name=node_name) diff --git a/stream/classes/workload/dnn_workload.py b/stream/classes/workload/dnn_workload.py index 5668c13..6a72131 100644 --- a/stream/classes/workload/dnn_workload.py +++ b/stream/classes/workload/dnn_workload.py @@ -23,19 +23,7 @@ def __init__(self, nodes: list[LayerNode], **attr: Any): self.layer_node_list = nodes for node in nodes: - # Create ComputationNode node_name = f"{node.type}_{node.id}" - node_input_names = [ - f"{other_layer_node.type}_{other_layer_node.id}" - for other_layer_node in nodes - if other_layer_node.id in node.input_operand_source.values() - ] - node_output_names = [f"{node_name}_output"] - if len(node_input_names) == 0: - node_input_names = ["the_first_input"] - - # Assume always define the final layer in the end - produces_final_output = not node_output_names op_type = node.type.lower() node_attr = node.extract_node_attr() @@ -43,14 +31,11 @@ def __init__(self, nodes: list[LayerNode], **attr: Any): node_id=node.id, node_name=node_name, node_attr=node_attr, - input_names=node_input_names, - output_names=node_output_names, op_type=op_type, - produces_final_output=produces_final_output, ) # Add to graph - logger.info("Parsed layer node %s | INPUT %s | OUTPUT %s", node_name, node_input_names, node_output_names) + logger.info("Parsed layer node %s", node_name) layer_id_to_obj[computation_node.id] = computation_node self.add_node(computation_node) diff --git a/stream/classes/workload/dummy_node.py b/stream/classes/workload/dummy_node.py index de5afb7..4bdd283 100644 --- a/stream/classes/workload/dummy_node.py +++ b/stream/classes/workload/dummy_node.py @@ -13,8 +13,6 @@ def __init__( node_id: int, node_name: str, predecessors: list[int], - input_names: list[str], - output_names: list[str], op_type: str = "dummy", ) -> None: DummyNodeZigZag.__init__( @@ -33,6 +31,4 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=None, # input_names, - output_names=None, # output_names, ) diff --git a/stream/classes/workload/elementwise_node.py b/stream/classes/workload/elementwise_node.py index 61ba312..7dee6e6 100644 --- a/stream/classes/workload/elementwise_node.py +++ b/stream/classes/workload/elementwise_node.py @@ -6,7 +6,10 @@ class ElementwiseNode(Node): def __init__( - self, node_id: int, node_name: str, predecessor: int, input_names: list[str], output_names: list[str] + self, + node_id: int, + node_name: str, + predecessor: int, ) -> None: super().__init__( node_id=node_id, @@ -16,8 +19,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) self.input_operand_source = {LayerOperand("I"): predecessor} diff --git a/stream/classes/workload/flatten_node.py b/stream/classes/workload/flatten_node.py index 452ecda..fa9e3ca 100644 --- a/stream/classes/workload/flatten_node.py +++ b/stream/classes/workload/flatten_node.py @@ -15,8 +15,6 @@ def __init__( node_name: str, predecessor: int | None, axis: int | None, - input_names: list[str], - output_names: list[str], ) -> None: """Initialize the FlattenNode @@ -31,8 +29,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) self.axis = axis if predecessor is not None: diff --git a/stream/classes/workload/gather_node.py b/stream/classes/workload/gather_node.py index fbc345b..1ace385 100644 --- a/stream/classes/workload/gather_node.py +++ b/stream/classes/workload/gather_node.py @@ -15,8 +15,6 @@ def __init__( predecessors: list[int], gather_axis: int, gather_indices: int | list[int], - input_names: list[str], - output_names: list[str], ) -> None: """Initialize the GatherNode @@ -24,8 +22,6 @@ def __init__( predecessors: The id of this node's parent. gather_axis: Which axis to gather on. gather_indices: Indices of elements to be gathered. - input_names The input names of this node. - output_names: The output names of this node. """ Node.__init__( self, @@ -36,8 +32,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) LayerNodeABC.__init__(self, node_id=node_id, node_name=node_name) diff --git a/stream/classes/workload/lpnormalization_node.py b/stream/classes/workload/lpnormalization_node.py index 8335c9c..566b582 100644 --- a/stream/classes/workload/lpnormalization_node.py +++ b/stream/classes/workload/lpnormalization_node.py @@ -9,14 +9,15 @@ class LpNormalizationNode(Node): """Class that represents an onnx LpNormalization node.""" def __init__( - self, node_id: int, node_name: str, predecessor: int, input_names: list[str], output_names: list[str] + self, + node_id: int, + node_name: str, + predecessor: int, ) -> None: """Initialize the LpNormalization node. Args: predecessors (list): The predecessors of this node. - input_names (list) The input names of this node. - output_names (list): The output names of this node. """ super().__init__( node_id=node_id, @@ -26,8 +27,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) self.input_operand_source = {LayerOperand("I"): predecessor} diff --git a/stream/classes/workload/node.py b/stream/classes/workload/node.py index 36c601c..963000f 100644 --- a/stream/classes/workload/node.py +++ b/stream/classes/workload/node.py @@ -17,8 +17,6 @@ def __init__( offchip_energy: float, runtime: int, possible_core_allocation: list[int], - input_names: list[str], - output_names: list[str], chosen_core_allocation: int | None = None, ) -> None: """Initialize the Node metaclass @@ -49,8 +47,6 @@ def __init__( # number of data (in bits) only this node produces (not produced by any other node) self.data_produced_unique = 0 - # self.input_names = input_names # TODO restore - # self.output_names = output_names # will be set together with the core allocation self.offchip_bw = None diff --git a/stream/classes/workload/pooling_node.py b/stream/classes/workload/pooling_node.py index 3a4d4f0..115822b 100644 --- a/stream/classes/workload/pooling_node.py +++ b/stream/classes/workload/pooling_node.py @@ -9,15 +9,10 @@ def __init__( node_id: int, node_name: str, node_attr: LayerNodeAttributes, - input_names: list[str], - output_names: list[str], ): super().__init__( node_id=node_id, node_name=node_name, node_attr=node_attr, - input_names=input_names, - output_names=output_names, op_type="pooling", ) - self.type = "pooling" diff --git a/stream/classes/workload/reshape_node.py b/stream/classes/workload/reshape_node.py index 82eef07..3ef857b 100644 --- a/stream/classes/workload/reshape_node.py +++ b/stream/classes/workload/reshape_node.py @@ -1,4 +1,4 @@ -from zigzag.datatypes import LayerOperand +from zigzag.datatypes import Constants from zigzag.workload.LayerNodeABC import LayerNodeABC from stream.classes.workload.node import Node @@ -14,8 +14,6 @@ def __init__( node_name: str, predecessor: int, shape: tuple[int, ...], - input_names: list[str], - output_names: list[str], allow_zero: bool = False, ) -> None: """Initialize the ReshapeNode @@ -23,8 +21,6 @@ def __init__( Args: predecessors: The id of this node's parent. shape: The output tensor's shape. - input_names The input names of this node. - output_names: The output names of this node. allow_zero: wether the output shape can be 0 at some dimensions. Iff True, shape `[2,0,3]` becomes `[2,3]` """ Node.__init__( @@ -36,14 +32,12 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) LayerNodeABC.__init__(self, node_id=node_id, node_name=node_name) self.allow_zero = allow_zero self.shape = shape - self.input_operand_source = {LayerOperand("I"): predecessor} + self.input_operand_source = {Constants.LAYER_OP_I: predecessor} def reshape_operand_tensor(self, tensor: NodeTensor): """Reshape the tensor back to the representation needed for producer/consumer.""" diff --git a/stream/classes/workload/simd_node.py b/stream/classes/workload/simd_node.py deleted file mode 100644 index 8943694..0000000 --- a/stream/classes/workload/simd_node.py +++ /dev/null @@ -1,23 +0,0 @@ -# from zigzag.workload.layer_node import LayerNodeAttributes - -# from stream.classes.workload.computation_node import ComputationNode - - -# class SimdNode(ComputationNode): -# def __init__( -# self, -# node_id: int, -# node_name: str, -# node_attr: LayerNodeAttributes, -# input_names: list[str], -# output_names: list[str], -# op_type: str, -# ): -# super().__init__( -# node_id=node_id, -# node_name=node_name, -# node_attr=node_attr, -# input_names=input_names, -# output_names=output_names, -# op_type=op_type, -# ) diff --git a/stream/classes/workload/transpose_node.py b/stream/classes/workload/transpose_node.py index ef998d8..779ba97 100644 --- a/stream/classes/workload/transpose_node.py +++ b/stream/classes/workload/transpose_node.py @@ -13,18 +13,8 @@ def __init__( node_id: int, node_name: str, predecessor: int, - input_names: list[str], - output_names: list[str], permute_axes: list[int] | None = None, ) -> None: - """Initialize the TransposeNode - - Args: - predecessors: The predecessors of this node. - input_names The input names of this node. - output_names: The output names of this node. - """ - Node.__init__( self, node_id=node_id, @@ -34,8 +24,6 @@ def __init__( offchip_energy=0, runtime=0, possible_core_allocation=[-1], - input_names=input_names, - output_names=output_names, ) LayerNodeABC.__init__(self, node_id=node_id, node_name=node_name) From 75e18a7d078aaed53b17c203120258bc5a1d1533 Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Thu, 5 Sep 2024 11:59:08 +0200 Subject: [PATCH 5/7] restore main file input --- main_stream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/main_stream.py b/main_stream.py index 5365f24..a688b0b 100644 --- a/main_stream.py +++ b/main_stream.py @@ -25,7 +25,6 @@ ################################INPUTS################################ accelerator = "stream/inputs/examples/hardware/tpu_like_quad_core.yaml" workload_path = "stream/inputs/examples/workload/resnet18.onnx" -workload_path = "../transformer-dse/outputs/onnx/OPT-125M_B=8_SINGLELAYER_W4A16_prefill.onnx" mapping_path = "stream/inputs/examples/mapping/tpu_like_quad_core.yaml" CN_define_mode = 1 # manually define outer-CN loops hint_loops = [("OY", "all")] From 37b6cce92a890f31a05b6427746799204acebdc8 Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Thu, 5 Sep 2024 13:00:12 +0200 Subject: [PATCH 6/7] use latest zigzag file naming --- .gitignore | 1 + main_stream.py | 2 +- main_stream_layer_splitting.py | 2 +- main_stream_mode_3.py | 2 +- main_stream_mode_4.py | 2 +- main_testing_1_core_with_testing_workload.py | 2 +- main_testing_2_cores_shared.py | 2 +- main_testing_2_cores_with_testing_workload.py | 2 +- ...sting_2_cores_with_testing_workload_3_layers.py | 2 +- main_testing_4_cores_with_testing_workload.py | 2 +- stream/api.py | 2 +- stream/classes/cost_model/communication_manager.py | 2 +- stream/classes/cost_model/memory_manager.py | 4 ++-- stream/classes/cost_model/scheduler.py | 2 +- .../classes/hardware/architecture/accelerator.py | 4 ++-- stream/classes/hardware/architecture/noc/bus.py | 2 +- .../architecture/noc/communication_link.py | 2 +- .../classes/hardware/architecture/noc/mesh_2d.py | 2 +- stream/classes/io/accelerator_factory.py | 2 +- stream/classes/io/accelerator_validator.py | 2 +- stream/classes/io/onnx/gemm.py | 2 +- stream/classes/io/onnx/model.py | 2 +- stream/classes/io/onnx/operator_parser.py | 2 +- .../genetic_algorithm/fitness_evaluator.py | 4 ++-- stream/classes/stages/AcceleratorParserStage.py | 2 +- stream/classes/stages/DetermineHintLoopsStage.py | 2 +- stream/classes/stages/DetermineLayerStacksStage.py | 2 +- .../stages/DetermineSchedulingOrderStage.py | 2 +- .../stages/GenerateCNWorkloadHybridStage.py | 2 +- stream/classes/stages/InterCoreMappingStage.py | 4 ++-- stream/classes/stages/IntraCoreMappingStage.py | 14 +++++++------- stream/classes/stages/LayerSplittingStage.py | 2 +- stream/classes/stages/ModelParserStage.py | 4 ++-- stream/classes/workload/concat_node.py | 2 +- stream/classes/workload/dummy_node.py | 2 +- stream/classes/workload/flatten_node.py | 2 +- stream/classes/workload/gather_node.py | 2 +- stream/classes/workload/node.py | 2 +- stream/classes/workload/reshape_node.py | 2 +- stream/classes/workload/tensor.py | 2 +- stream/classes/workload/transpose_node.py | 2 +- 41 files changed, 52 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index fb1961c..008cbfb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.out +typings # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/main_stream.py b/main_stream.py index a688b0b..a921fff 100644 --- a/main_stream.py +++ b/main_stream.py @@ -2,7 +2,7 @@ import pickle import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.stages.AcceleratorParserStage import ( AcceleratorParserStage as AcceleratorParserStage_, diff --git a/main_stream_layer_splitting.py b/main_stream_layer_splitting.py index 308a868..db4099a 100644 --- a/main_stream_layer_splitting.py +++ b/main_stream_layer_splitting.py @@ -4,7 +4,7 @@ import pickle import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ from stream.classes.stages.GenerateCNWorkloadHybridStage import GenerateCNWorkloadHybridStage diff --git a/main_stream_mode_3.py b/main_stream_mode_3.py index db377a5..923e0e8 100644 --- a/main_stream_mode_3.py +++ b/main_stream_mode_3.py @@ -2,7 +2,7 @@ import pickle import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ from stream.classes.stages.GenerateCNWorkloadHybridStage import GenerateCNWorkloadHybridStage diff --git a/main_stream_mode_4.py b/main_stream_mode_4.py index 9ad5a47..3470a95 100644 --- a/main_stream_mode_4.py +++ b/main_stream_mode_4.py @@ -2,7 +2,7 @@ import pickle import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ from stream.classes.stages.GenerateCNWorkloadHybridStage import GenerateCNWorkloadHybridStage diff --git a/main_testing_1_core_with_testing_workload.py b/main_testing_1_core_with_testing_workload.py index cb4494f..f056529 100644 --- a/main_testing_1_core_with_testing_workload.py +++ b/main_testing_1_core_with_testing_workload.py @@ -1,7 +1,7 @@ import logging as _logging import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.cost_model.cost_model import StreamCostModelEvaluation from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ diff --git a/main_testing_2_cores_shared.py b/main_testing_2_cores_shared.py index 6993c74..cf55a47 100644 --- a/main_testing_2_cores_shared.py +++ b/main_testing_2_cores_shared.py @@ -1,7 +1,7 @@ import logging as _logging import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ from stream.classes.stages.GenerateCNWorkloadHybridStage import GenerateCNWorkloadHybridStage diff --git a/main_testing_2_cores_with_testing_workload.py b/main_testing_2_cores_with_testing_workload.py index e117642..7b1465f 100644 --- a/main_testing_2_cores_with_testing_workload.py +++ b/main_testing_2_cores_with_testing_workload.py @@ -1,7 +1,7 @@ import logging as _logging import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.cost_model.cost_model import StreamCostModelEvaluation from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ diff --git a/main_testing_2_cores_with_testing_workload_3_layers.py b/main_testing_2_cores_with_testing_workload_3_layers.py index af9be98..06b63e5 100644 --- a/main_testing_2_cores_with_testing_workload_3_layers.py +++ b/main_testing_2_cores_with_testing_workload_3_layers.py @@ -1,7 +1,7 @@ import logging as _logging import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.cost_model.cost_model import StreamCostModelEvaluation from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ diff --git a/main_testing_4_cores_with_testing_workload.py b/main_testing_4_cores_with_testing_workload.py index 5b75554..2a19b3f 100644 --- a/main_testing_4_cores_with_testing_workload.py +++ b/main_testing_4_cores_with_testing_workload.py @@ -1,7 +1,7 @@ import logging as _logging import re -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.cost_model.cost_model import StreamCostModelEvaluation from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage as AcceleratorParserStage_ diff --git a/stream/api.py b/stream/api.py index fb20713..bde8251 100644 --- a/stream/api.py +++ b/stream/api.py @@ -1,4 +1,4 @@ -from zigzag.stages.MainStage import MainStage +from zigzag.stages.main import MainStage from stream.classes.stages.AcceleratorParserStage import AcceleratorParserStage from stream.classes.stages.GenerateCNWorkloadHybridStage import GenerateCNWorkloadHybridStage diff --git a/stream/classes/cost_model/communication_manager.py b/stream/classes/cost_model/communication_manager.py index 96c2f4a..d8e0926 100644 --- a/stream/classes/cost_model/communication_manager.py +++ b/stream/classes/cost_model/communication_manager.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING, Any from zigzag.datatypes import Constants, MemoryOperand -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from stream.classes.hardware.architecture.utils import intersections from stream.classes.workload.computation_node import ComputationNode diff --git a/stream/classes/cost_model/memory_manager.py b/stream/classes/cost_model/memory_manager.py index c2eaf38..e706f1b 100644 --- a/stream/classes/cost_model/memory_manager.py +++ b/stream/classes/cost_model/memory_manager.py @@ -4,9 +4,9 @@ import numpy as np from zigzag.datatypes import MemoryOperand -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core +from zigzag.hardware.architecture.memory_instance import MemoryInstance from zigzag.hardware.architecture.memory_level import MemoryLevel -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance from stream.classes.workload.tensor import Tensor diff --git a/stream/classes/cost_model/scheduler.py b/stream/classes/cost_model/scheduler.py index f4c1fac..4524638 100644 --- a/stream/classes/cost_model/scheduler.py +++ b/stream/classes/cost_model/scheduler.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from zigzag.datatypes import Constants, LayerOperand, MemoryOperand -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from stream.classes.workload.computation_node import ComputationNode from stream.classes.workload.onnx_workload import ComputationNodeWorkload diff --git a/stream/classes/hardware/architecture/accelerator.py b/stream/classes/hardware/architecture/accelerator.py index 7bf3df7..52b0cba 100644 --- a/stream/classes/hardware/architecture/accelerator.py +++ b/stream/classes/hardware/architecture/accelerator.py @@ -1,8 +1,8 @@ from math import ceil from zigzag.datatypes import MemoryOperand -from zigzag.hardware.architecture.Core import Core -from zigzag.hardware.architecture.MemoryInstance import MemoryInstance +from zigzag.hardware.architecture.core import Core +from zigzag.hardware.architecture.memory_instance import MemoryInstance from zigzag.mapping.spatial_mapping import SpatialMapping from stream.classes.cost_model.communication_manager import CommunicationManager diff --git a/stream/classes/hardware/architecture/noc/bus.py b/stream/classes/hardware/architecture/noc/bus.py index 874d53c..8682503 100644 --- a/stream/classes/hardware/architecture/noc/bus.py +++ b/stream/classes/hardware/architecture/noc/bus.py @@ -1,5 +1,5 @@ from zigzag.datatypes import Constants -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from stream.classes.hardware.architecture.accelerator import CoreGraph from stream.classes.hardware.architecture.noc.communication_link import CommunicationLink diff --git a/stream/classes/hardware/architecture/noc/communication_link.py b/stream/classes/hardware/architecture/noc/communication_link.py index 2e1581d..9821bb4 100644 --- a/stream/classes/hardware/architecture/noc/communication_link.py +++ b/stream/classes/hardware/architecture/noc/communication_link.py @@ -5,7 +5,7 @@ from stream.classes.cost_model.communication_manager import CommunicationLinkEvent if TYPE_CHECKING: - from zigzag.hardware.architecture.Core import Core + from zigzag.hardware.architecture.core import Core from stream.classes.workload.tensor import Tensor diff --git a/stream/classes/hardware/architecture/noc/mesh_2d.py b/stream/classes/hardware/architecture/noc/mesh_2d.py index e518255..4ea0af2 100644 --- a/stream/classes/hardware/architecture/noc/mesh_2d.py +++ b/stream/classes/hardware/architecture/noc/mesh_2d.py @@ -1,6 +1,6 @@ import numpy as np from zigzag.datatypes import Constants -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from stream.classes.hardware.architecture.accelerator import CoreGraph from stream.classes.hardware.architecture.noc.communication_link import CommunicationLink diff --git a/stream/classes/io/accelerator_factory.py b/stream/classes/io/accelerator_factory.py index 0fd6a71..fedadd0 100644 --- a/stream/classes/io/accelerator_factory.py +++ b/stream/classes/io/accelerator_factory.py @@ -1,6 +1,6 @@ from typing import Any -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from zigzag.parser.accelerator_factory import CoreFactory from stream.classes.hardware.architecture.accelerator import Accelerator diff --git a/stream/classes/io/accelerator_validator.py b/stream/classes/io/accelerator_validator.py index 37d9639..7c10d2f 100644 --- a/stream/classes/io/accelerator_validator.py +++ b/stream/classes/io/accelerator_validator.py @@ -3,7 +3,7 @@ from typing import Any from cerberus import Validator -from zigzag.parser.AcceleratorValidator import AcceleratorValidator as CoreValidator +from zigzag.parser.accelerator_validator import AcceleratorValidator as CoreValidator from zigzag.utils import open_yaml logger = logging.getLogger(__name__) diff --git a/stream/classes/io/onnx/gemm.py b/stream/classes/io/onnx/gemm.py index 84bfd15..d3b8fef 100644 --- a/stream/classes/io/onnx/gemm.py +++ b/stream/classes/io/onnx/gemm.py @@ -1,7 +1,7 @@ import logging from typing import Generator -from zigzag.parser.onnx.GemmParser import GemmParser as GemmParserZigZag +from zigzag.parser.onnx.gemm_parser import GemmParser as GemmParserZigZag from stream.classes.io.onnx.operator_parser import OnnxComputeOperatorParser from stream.classes.workload.computation_node import ComputationNode diff --git a/stream/classes/io/onnx/model.py b/stream/classes/io/onnx/model.py index 7552402..4c8969d 100644 --- a/stream/classes/io/onnx/model.py +++ b/stream/classes/io/onnx/model.py @@ -3,7 +3,7 @@ from onnx import ModelProto, NodeProto from zigzag.parser.onnx.utils import get_onnx_tensor_type, parse_onnx_model_from_path -from zigzag.stages.WorkloadParserStage import WorkloadParserStage +from zigzag.stages.parser.workload_parser import WorkloadParserStage from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.io.onnx.concat import ConcatParser diff --git a/stream/classes/io/onnx/operator_parser.py b/stream/classes/io/onnx/operator_parser.py index f601d81..e686a62 100644 --- a/stream/classes/io/onnx/operator_parser.py +++ b/stream/classes/io/onnx/operator_parser.py @@ -2,7 +2,7 @@ from typing import Any, Generator from onnx import ModelProto, NodeProto -from zigzag.parser.onnx.ONNXOperatorParser import ONNXOperatorParser as ONNXOperatorParserZigZag +from zigzag.parser.onnx.onnx_operator_parser import ONNXOperatorParser as ONNXOperatorParserZigZag from zigzag.parser.onnx.utils import ( get_node_input_output_dimension_shapes, ) diff --git a/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py b/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py index b1163c0..0b16723 100644 --- a/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py +++ b/stream/classes/opt/allocation/genetic_algorithm/fitness_evaluator.py @@ -1,7 +1,7 @@ from zigzag.cost_model.cost_model import CostModelEvaluation -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from zigzag.utils import pickle_deepcopy -from zigzag.workload.ONNXWorkload import ONNXWorkload as Workload +from zigzag.workload.onnx_workload import ONNXWorkload as Workload from stream.classes.cost_model.cost_model import StreamCostModelEvaluation from stream.classes.hardware.architecture.accelerator import Accelerator diff --git a/stream/classes/stages/AcceleratorParserStage.py b/stream/classes/stages/AcceleratorParserStage.py index fad13d3..b1121b4 100644 --- a/stream/classes/stages/AcceleratorParserStage.py +++ b/stream/classes/stages/AcceleratorParserStage.py @@ -1,7 +1,7 @@ import logging from typing import Any -from zigzag.stages.Stage import Stage, StageCallable +from zigzag.stages.stage import Stage, StageCallable from zigzag.utils import open_yaml from stream.classes.hardware.architecture.accelerator import Accelerator diff --git a/stream/classes/stages/DetermineHintLoopsStage.py b/stream/classes/stages/DetermineHintLoopsStage.py index dc6ca3e..af2f889 100644 --- a/stream/classes/stages/DetermineHintLoopsStage.py +++ b/stream/classes/stages/DetermineHintLoopsStage.py @@ -2,7 +2,7 @@ import numpy as np from onnx import ModelProto, helper, numpy_helper -from zigzag.stages.Stage import Stage +from zigzag.stages.stage import Stage from stream.classes.workload.computation_node import ComputationNode diff --git a/stream/classes/stages/DetermineLayerStacksStage.py b/stream/classes/stages/DetermineLayerStacksStage.py index 03b77ee..332e973 100644 --- a/stream/classes/stages/DetermineLayerStacksStage.py +++ b/stream/classes/stages/DetermineLayerStacksStage.py @@ -2,7 +2,7 @@ from typing import Any from zigzag.datatypes import MemoryOperand -from zigzag.stages.Stage import Stage, StageCallable +from zigzag.stages.stage import Stage, StageCallable from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.workload.computation_node import ComputationNode diff --git a/stream/classes/stages/DetermineSchedulingOrderStage.py b/stream/classes/stages/DetermineSchedulingOrderStage.py index 532889f..73467ce 100644 --- a/stream/classes/stages/DetermineSchedulingOrderStage.py +++ b/stream/classes/stages/DetermineSchedulingOrderStage.py @@ -1,7 +1,7 @@ import logging from typing import Any -from zigzag.stages.Stage import Stage, StageCallable +from zigzag.stages.stage import Stage, StageCallable from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.workload.computation_node import ComputationNode diff --git a/stream/classes/stages/GenerateCNWorkloadHybridStage.py b/stream/classes/stages/GenerateCNWorkloadHybridStage.py index 635a8fd..d9e31a5 100644 --- a/stream/classes/stages/GenerateCNWorkloadHybridStage.py +++ b/stream/classes/stages/GenerateCNWorkloadHybridStage.py @@ -5,7 +5,7 @@ from rtree import index from zigzag.datatypes import Constants, LayerDim, LayerOperand -from zigzag.stages.Stage import Stage, StageCallable +from zigzag.stages.stage import Stage, StageCallable from zigzag.utils import pickle_deepcopy from stream.classes.cost_model.group_allocation import GroupIdManager diff --git a/stream/classes/stages/InterCoreMappingStage.py b/stream/classes/stages/InterCoreMappingStage.py index 6c8c03b..5387f4b 100644 --- a/stream/classes/stages/InterCoreMappingStage.py +++ b/stream/classes/stages/InterCoreMappingStage.py @@ -3,8 +3,8 @@ from zigzag.cost_model.cost_model import CostModelEvaluation from zigzag.datatypes import LayerOperand -from zigzag.hardware.architecture.Core import Core -from zigzag.stages.Stage import Stage, StageCallable +from zigzag.hardware.architecture.core import Core +from zigzag.stages.stage import Stage, StageCallable from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.opt.allocation.genetic_algorithm.fitness_evaluator import ( diff --git a/stream/classes/stages/IntraCoreMappingStage.py b/stream/classes/stages/IntraCoreMappingStage.py index 8efcb7d..9dc951c 100644 --- a/stream/classes/stages/IntraCoreMappingStage.py +++ b/stream/classes/stages/IntraCoreMappingStage.py @@ -4,16 +4,16 @@ from zigzag.cost_model.cost_model import CostModelEvaluation from zigzag.datatypes import MemoryOperand -from zigzag.hardware.architecture.Core import Core +from zigzag.hardware.architecture.core import Core from zigzag.hardware.architecture.memory_level import MemoryLevel from zigzag.hardware.architecture.memory_port import DataDirection, PortAllocation from zigzag.mapping.spatial_mapping import SpatialMapping -from zigzag.stages.CostModelStage import CostModelStage -from zigzag.stages.MainStage import MainStage -from zigzag.stages.reduce_stages import MinimalLatencyStage -from zigzag.stages.SpatialMappingGeneratorStage import SpatialMappingGeneratorStage -from zigzag.stages.Stage import Stage, StageCallable -from zigzag.stages.temporal_mapping_generator_stage import TemporalMappingGeneratorStage +from zigzag.stages.evaluation.cost_model_evaluation import CostModelStage +from zigzag.stages.main import MainStage +from zigzag.stages.mapping.spatial_mapping_generation import SpatialMappingGeneratorStage +from zigzag.stages.mapping.temporal_mapping_generator_stage import TemporalMappingGeneratorStage +from zigzag.stages.results.reduce_stages import MinimalLatencyStage +from zigzag.stages.stage import Stage, StageCallable from zigzag.utils import pickle_deepcopy from stream.classes.hardware.architecture.accelerator import Accelerator diff --git a/stream/classes/stages/LayerSplittingStage.py b/stream/classes/stages/LayerSplittingStage.py index 945f3ee..f71be36 100644 --- a/stream/classes/stages/LayerSplittingStage.py +++ b/stream/classes/stages/LayerSplittingStage.py @@ -8,7 +8,7 @@ from onnx import ModelProto, helper, numpy_helper from onnx.shape_inference import infer_shapes from zigzag.datatypes import Constants, LayerOperand -from zigzag.stages.Stage import Stage, StageCallable +from zigzag.stages.stage import Stage, StageCallable from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.stages import utils diff --git a/stream/classes/stages/ModelParserStage.py b/stream/classes/stages/ModelParserStage.py index d0339a9..9c59811 100644 --- a/stream/classes/stages/ModelParserStage.py +++ b/stream/classes/stages/ModelParserStage.py @@ -1,8 +1,8 @@ import logging from typing import Any -from zigzag.stages.Stage import Stage, StageCallable -from zigzag.stages.WorkloadParserStage import WorkloadParserStage as ZigZagWorkloadParserStage +from zigzag.stages.parser.workload_parser import WorkloadParserStage as ZigZagWorkloadParserStage +from zigzag.stages.stage import Stage, StageCallable from stream.classes.hardware.architecture.accelerator import Accelerator from stream.classes.io.onnx.model import ONNXModelParser diff --git a/stream/classes/workload/concat_node.py b/stream/classes/workload/concat_node.py index dbf150b..0c11f58 100644 --- a/stream/classes/workload/concat_node.py +++ b/stream/classes/workload/concat_node.py @@ -1,5 +1,5 @@ from zigzag.datatypes import LayerOperand -from zigzag.workload.LayerNodeABC import LayerNodeABC +from zigzag.workload.layer_node_abc import LayerNodeABC from stream.classes.workload.node import Node from stream.utils import NodeTensor diff --git a/stream/classes/workload/dummy_node.py b/stream/classes/workload/dummy_node.py index 4bdd283..5ec4a1c 100644 --- a/stream/classes/workload/dummy_node.py +++ b/stream/classes/workload/dummy_node.py @@ -1,4 +1,4 @@ -from zigzag.workload.DummyNode import DummyNode as DummyNodeZigZag +from zigzag.workload.dummy_node import DummyNode as DummyNodeZigZag from stream.classes.workload.node import Node diff --git a/stream/classes/workload/flatten_node.py b/stream/classes/workload/flatten_node.py index fa9e3ca..25d9fc0 100644 --- a/stream/classes/workload/flatten_node.py +++ b/stream/classes/workload/flatten_node.py @@ -1,6 +1,6 @@ import numpy as np from zigzag.datatypes import LayerOperand -from zigzag.workload.LayerNodeABC import LayerNodeABC +from zigzag.workload.layer_node_abc import LayerNodeABC from stream.classes.workload.node import Node from stream.utils import NodeTensor diff --git a/stream/classes/workload/gather_node.py b/stream/classes/workload/gather_node.py index 1ace385..06526c8 100644 --- a/stream/classes/workload/gather_node.py +++ b/stream/classes/workload/gather_node.py @@ -1,5 +1,5 @@ from zigzag.datatypes import LayerOperand -from zigzag.workload.LayerNodeABC import LayerNodeABC +from zigzag.workload.layer_node_abc import LayerNodeABC from stream.classes.workload.node import Node from stream.utils import NodeTensor diff --git a/stream/classes/workload/node.py b/stream/classes/workload/node.py index 963000f..a6f932c 100644 --- a/stream/classes/workload/node.py +++ b/stream/classes/workload/node.py @@ -1,6 +1,6 @@ from abc import ABCMeta -from zigzag.workload.LayerNodeABC import LayerNodeABC +from zigzag.workload.layer_node_abc import LayerNodeABC class Node(LayerNodeABC, metaclass=ABCMeta): diff --git a/stream/classes/workload/reshape_node.py b/stream/classes/workload/reshape_node.py index 3ef857b..afb888d 100644 --- a/stream/classes/workload/reshape_node.py +++ b/stream/classes/workload/reshape_node.py @@ -1,5 +1,5 @@ from zigzag.datatypes import Constants -from zigzag.workload.LayerNodeABC import LayerNodeABC +from zigzag.workload.layer_node_abc import LayerNodeABC from stream.classes.workload.node import Node from stream.utils import NodeTensor diff --git a/stream/classes/workload/tensor.py b/stream/classes/workload/tensor.py index 530658c..8a47646 100644 --- a/stream/classes/workload/tensor.py +++ b/stream/classes/workload/tensor.py @@ -3,7 +3,7 @@ from zigzag.datatypes import LayerDim, LayerOperand if TYPE_CHECKING: - from zigzag.hardware.architecture.MemoryInstance import MemoryInstance + from zigzag.hardware.architecture.memory_instance import MemoryInstance from stream.classes.cost_model.memory_manager import MemoryManager from stream.classes.hardware.architecture.accelerator import Accelerator diff --git a/stream/classes/workload/transpose_node.py b/stream/classes/workload/transpose_node.py index 779ba97..05fee95 100644 --- a/stream/classes/workload/transpose_node.py +++ b/stream/classes/workload/transpose_node.py @@ -1,5 +1,5 @@ from zigzag.datatypes import LayerOperand -from zigzag.workload.LayerNodeABC import LayerNodeABC +from zigzag.workload.layer_node_abc import LayerNodeABC from stream.classes.workload.node import Node from stream.utils import NodeTensor From 3d3dc779a0059cb64dbce5fb33a5dbbf65e64441 Mon Sep 17 00:00:00 2001 From: RobinGeens Date: Thu, 5 Sep 2024 16:14:23 +0200 Subject: [PATCH 7/7] bugfix in softmax exp equation --- stream/classes/io/onnx/softmax.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stream/classes/io/onnx/softmax.py b/stream/classes/io/onnx/softmax.py index 1a429b7..f8a43b9 100644 --- a/stream/classes/io/onnx/softmax.py +++ b/stream/classes/io/onnx/softmax.py @@ -120,10 +120,10 @@ def get_layer_node_user_format(self, input_shape: list[int], output_shape: list[ data["loop_dims"] = ["K", "C"] case 3: data["equation"] = "O[b][k][c]+=I[b][k][c]+W[b][k]" - data["loop_dims"] = ["B", "C", "k"] + data["loop_dims"] = ["B", "K", "C"] case 4: data["equation"] = "O[b][h][k][c]+=I[b][h][k][c]+W[b][h][k]" - data["loop_dims"] = ["B", "H", "C", "k"] + data["loop_dims"] = ["B", "H", "K", "C"] case _: raise NotImplementedError