diff --git a/teaal/hifiber/op.py b/teaal/hifiber/op.py index 81e051f..f5e3073 100644 --- a/teaal/hifiber/op.py +++ b/teaal/hifiber/op.py @@ -148,6 +148,18 @@ def gen(self) -> str: return "*" +class ONotIn(Operator): + """ + The HiFiber not in operator + """ + + def gen(self) -> str: + """ + Generate the HiFiber code for the ONotIn operator + """ + return "not in" + + class OOr(Operator): """ The HiFiber or operator diff --git a/teaal/ir/component.py b/teaal/ir/component.py index 9949b0b..05f5ecf 100644 --- a/teaal/ir/component.py +++ b/teaal/ir/component.py @@ -24,7 +24,9 @@ Representation an hardware component """ -from typing import Any, Dict, Iterable, List, Optional, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union + +S = TypeVar("S") class Component: @@ -32,13 +34,14 @@ class Component: Representation an hardware component """ - def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None: + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: """ Construct a component """ self.name = name - self.attrs = attrs - self.bindings: Any = {} + self.num_instances = num_instances + self.bindings = bindings def get_name(self) -> str: """ @@ -46,6 +49,18 @@ def get_name(self) -> str: """ return self.name + def get_num_instances(self) -> int: + """ + Get the number of instances + """ + return self.num_instances + + def get_bindings(self) -> Dict[str, List[dict]]: + """ + Get the operations that are bound to this component + """ + return self.bindings + def __eq__(self, other: object) -> bool: """ The == operator for components @@ -55,11 +70,17 @@ def __eq__(self, other: object) -> bool: return self.__key() == other.__key() return False - def __key(self) -> Iterable[Any]: + def __hash__(self) -> int: + """ + Hash the component + """ + return hash(repr(self)) + + def __key(self) -> Tuple[Any, ...]: """ A tuple of all fields of a component """ - return (self.name, self.attrs, self.bindings) + return (self.name, self.num_instances, self.bindings) def __repr__(self) -> str: """ @@ -69,38 +90,97 @@ def __repr__(self) -> str: for key in self.__key()] return "(" + type(self).__name__ + ", " + ", ".join(strs) + ")" - -class ComputeComponent(Component): - """ - A Component for compute (acting also as a superclass for all compute - operations) - """ - - def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None: + def _check_attr( + self, + attrs: dict, + key: str, + type_: Type[S]) -> Optional[S]: """ - Construct a compute component + Check that the attribute is correctly specified """ - super().__init__(name, attrs, bindings) - self.bindings = {} + if key not in attrs.keys(): + return None - for binding in bindings: - einsum = binding["einsum"] - if einsum not in self.bindings.keys(): - self.bindings[einsum] = [] + if not isinstance(attrs[key], type_): + class_ = type(self).__name__[:-9] + raise ValueError("Bad " + + key + + " " + + str(attrs[key]) + + " for " + + class_ + + " " + + self.name) - # Append the dictionary containing the other properties - info = binding.copy() - del info["einsum"] - self.bindings[einsum].append(info) + return attrs[key] - def get_bindings(self, einsum: str) -> List[dict]: + def _check_float_attr(self, attrs: dict, key: str) -> Optional[float]: """ - Get the operations that are bound for this einsum + Check that the attribute is correctly specified """ - if einsum not in self.bindings.keys(): - return [] + if key not in attrs.keys(): + return None + + if attrs[key] == "inf": + return float("inf") + + if not isinstance( + attrs[key], + float) and not isinstance( + attrs[key], + int): + class_ = type(self).__name__[:-9] + raise ValueError("Bad " + + key + + " " + + str(attrs[key]) + + " for " + + class_ + + " " + + self.name) + + return attrs[key] + + def _check_str_attr( + self, + attrs: dict, + key: str, + options: Set[str]) -> Optional[str]: + """ + Check that a string attribute is correctly specified + """ + if key not in attrs.keys(): + return None - return self.bindings[einsum] + class_ = type(self).__name__[:-9] + if not isinstance(attrs[key], str): + raise ValueError("Bad " + + key + + " " + + str(attrs[key]) + + " for " + + class_ + + " " + + self.name) + + if attrs[key] not in options: + raise ValueError( + attrs[key] + + " is not a valid value for attribute " + + key + + " of class " + + class_ + + ". Choose one of " + + str(options)) + + return attrs[key] + + +class FunctionalComponent(Component): + """ + Superclass for all functional unit components (compute, intersection, mergers, etc.) + """ + pass class MemoryComponent(Component): @@ -108,74 +188,331 @@ class MemoryComponent(Component): Superclass for all memory components """ - def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None: + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: """ Construct a memory component """ - super().__init__(name, attrs, bindings) - self.bindings = {} + super().__init__(name, num_instances, attrs, bindings) + + self.bandwidth = self._check_attr(attrs, "bandwidth", int) + + self.tensor_bindings: Dict[str, Dict[str, List[dict]]] = {} + for einsum in self.bindings.keys(): + self.tensor_bindings[einsum] = {} + for binding in self.bindings[einsum]: + if "tensor" not in binding: + raise ValueError( + "Tensor not specified for Einsum " + + einsum + + " in binding to " + + self.name) + + tensor = binding["tensor"] + if "rank" not in binding: + raise ValueError( + "Rank not specified for tensor " + + tensor + + " in Einsum " + + einsum + + " in binding to " + + self.name) + + if "type" not in binding: + raise ValueError( + "Type not specified for tensor " + + tensor + + " in Einsum " + + einsum + + " in binding to " + + self.name) + + types = {"coord", "payload", "elem"} + if binding["type"] not in types: + raise ValueError("Type " + + str(binding["type"]) + + " for " + + self.name + + " on tensor " + + tensor + + " in Einsum " + + einsum + + " not one of " + + str(types)) + + if "format" not in binding: + raise ValueError( + "Format not specified for tensor " + + tensor + + " in Einsum " + + einsum + + " in binding to " + + self.name) + + if binding["tensor"] not in self.tensor_bindings[einsum]: + self.tensor_bindings[einsum][binding["tensor"]] = [] + self.tensor_bindings[einsum][binding["tensor"]].append(binding) + + def get_bandwidth(self) -> int: + """ + Get the bandwidth + """ + if self.bandwidth is None: + raise ValueError( + "Bandwidth unspecified for component " + + self.name) - for binding in bindings: - self.bindings[binding["tensor"]] = binding["rank"] + return self.bandwidth - def get_binding(self, tensor: str) -> Optional[str]: + def get_binding(self, einsum: str, tensor: str, rank: str, + type_: str, format_: str) -> Optional[Dict[str, Any]]: """ - Given a tensor, give the rank bound to this memory + Given a tensor, get a list of bindings to that rank """ - if tensor not in self.bindings.keys(): + if einsum not in self.tensor_bindings: + return None + + if tensor not in self.tensor_bindings[einsum]: return None - return self.bindings[tensor] + final_binding: Optional[Dict[str, Any]] = None + for binding in self.tensor_bindings[einsum][tensor]: + if binding["rank"] == rank and binding["type"] == type_ and binding["format"] == format_: + + if final_binding is None: + final_binding = binding + + else: + raise ValueError("Multiple bindings for " + str( + [("einsum", einsum), ("tensor", tensor), ("rank", rank), ("type", type_), ("format", format_)])) + + return final_binding + def _Component__key(self) -> Tuple[Any, ...]: + """ + A tuple of all fields + """ + return (self.name, self.num_instances, self.bindings, self.bandwidth) -class BuffetComponent(MemoryComponent): + +class BufferComponent(MemoryComponent): """ - A Component for Buffet + A Component for a buffer """ - pass + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: + """ + Construct a buffer component + """ + super().__init__(name, num_instances, attrs, bindings) + + self.depth = self._check_float_attr(attrs, "depth") + self.width = self._check_attr(attrs, "width", int) + + def get_depth(self) -> float: + """ + Get the buffer depth + """ + if self.depth is None: + raise ValueError("Depth unspecified for component " + self.name) + + return self.depth + + def get_width(self) -> int: + """ + Get the buffer width + """ + if self.width is None: + raise ValueError("Width unspecified for component " + self.name) + + return self.width + + def _Component__key(self) -> Tuple[Any, ...]: + """ + A tuple of all fields + """ + return ( + self.name, + self.num_instances, + self.bindings, + self.bandwidth, + self.depth, + self.width) + + +class BuffetComponent(BufferComponent): + """ + A Component for a Buffet + """ -class CacheComponent(MemoryComponent): + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: + """ + Construct a buffet component + """ + super().__init__(name, num_instances, attrs, bindings) + for einsum in self.tensor_bindings: + for tensor, tensor_bindings in self.tensor_bindings[einsum].items( + ): + for binding in tensor_bindings: + if "evict-on" not in binding: + raise ValueError( + "Evict-on not specified for tensor " + + tensor + + " in Einsum " + + einsum + + " in binding to " + + self.name) + + if "style" not in binding: + binding["style"] = "lazy" + + styles = {"lazy", "eager"} + if binding["style"] not in styles: + raise ValueError("Style " + + str(binding["style"]) + + " for " + + self.name + + " on tensor " + + tensor + + " in Einsum " + + einsum + + " not one of " + + str(styles)) + + if binding["style"] == "eager": + binding["root"] = binding["rank"] + + def expand_eager(self, + einsum: str, + tensor: str, + format_: str, + ranks: List[str], + types: List[List[str]]) -> None: + """ + Expand eager bindings to have separate bindings for each rank + """ + if tensor not in self.tensor_bindings[einsum]: + return + + for binding in self.tensor_bindings[einsum][tensor].copy(): + if binding["style"] != "eager": + continue + + if binding["format"] != format_: + continue + + root_rank = binding["rank"] + + new_binding_template = { + "tensor": tensor, + "evict-on": binding["evict-on"], + "style": "eager", + "format": binding["format"], + "root": root_rank} + start_i = ranks.index(root_rank) + if binding["type"] == "coord" and "payload" in types[start_i]: + new_binding = {**new_binding_template, ** + {"rank": root_rank, "type": "payload"}} + self.tensor_bindings[einsum][tensor].append(new_binding) + self.bindings[einsum].append(new_binding) + + for rank, rank_types in zip( + ranks[start_i + 1:], types[start_i + 1:]): + for type_ in rank_types: + new_binding = {**new_binding_template, + **{"rank": rank, "type": type_}} + self.tensor_bindings[einsum][tensor].append(new_binding) + self.bindings[einsum].append(new_binding) + + +class CacheComponent(BufferComponent): """ A Component for a Cache """ + pass + + +class ComputeComponent(FunctionalComponent): + """ + A Component for a compute functional unit + """ - def get_depth(self) -> int: + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: """ - Get the cache depth + Construct a compute component """ - return self.attrs["depth"] + super().__init__(name, num_instances, attrs, bindings) - def get_width(self) -> int: + type_ = self._check_str_attr(attrs, "type", {"mul", "add"}) + if type_ is None: + raise ValueError("Type unspecified for component " + self.name) + self.type = type_ + + def get_type(self) -> str: """ - Get the cache width + Get the type of compute component """ - return self.attrs["width"] + return self.type + + def _Component__key(self) -> Tuple[Any, ...]: + """ + A tuple of all fields + """ + return (self.name, self.num_instances, self.bindings, self.type) class DRAMComponent(MemoryComponent): """ A Component for DRAM """ + pass + + +class IntersectorComponent(FunctionalComponent): + """ + A Component superclass for all intersectors + """ - def get_bandwidth(self) -> int: + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: """ - Get the bandwidth + Construct an intersector component """ - return self.attrs["bandwidth"] + super().__init__(name, num_instances, attrs, bindings) - def get_datawidth(self) -> int: - """ - Get the datawidth - """ - return self.attrs["datawidth"] + for einsum, einsum_bindings in bindings.items(): + for binding in einsum_bindings: + if "rank" not in binding: + raise ValueError( + "Rank unspecified in Einsum " + + einsum + + " in binding to " + + self.name) -class LeaderFollowerComponent(ComputeComponent): +class LeaderFollowerComponent(IntersectorComponent): """ A Component for leader-follower intersection """ - pass + + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: + """ + Construct a leader-follower intersector component + """ + super().__init__(name, num_instances, attrs, bindings) + + for einsum, einsum_bindings in bindings.items(): + for binding in einsum_bindings: + if "leader" not in binding: + raise ValueError( + "Leader unspecified in Einsum " + + einsum + + " in binding to " + + self.name) class MergerComponent(Component): @@ -183,47 +520,198 @@ class MergerComponent(Component): A Component for a merger """ - def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None: + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: + """ + Construct a merger component + """ + super().__init__(name, num_instances, attrs, bindings) + + # TODO: change back to int + inputs = self._check_float_attr(attrs, "inputs") + if inputs is None: + raise ValueError("Inputs unspecified for component " + self.name) + self.inputs = inputs + + # TODO: change back to int + comparator_radix = self._check_float_attr(attrs, "comparator_radix") + if comparator_radix is None: + raise ValueError( + "Comparator radix unspecified for component " + + self.name) + self.comparator_radix = comparator_radix + + outputs = self._check_attr(attrs, "outputs", int) + if outputs is None: + self.outputs = 1 + else: + self.outputs = outputs + + order = self._check_str_attr(attrs, "order", {"fifo", "opt"}) + if order is None: + self.order = "fifo" + else: + self.order = order + + reduce_ = self._check_attr(attrs, "reduce", bool) + if reduce_: + raise NotImplementedError( + "Concurrent merge and reduction not supported") + self.reduce = False + + self.tensor_bindings: Dict[str, Dict[str, List[dict]]] = {} + for einsum, einsum_bindings in self.bindings.items(): + self.tensor_bindings[einsum] = {} + for binding in einsum_bindings: + if "tensor" not in binding: + raise ValueError( + "Tensor not specified for Einsum " + + einsum + + " in binding to " + + self.name) + + tensor = binding["tensor"] + if tensor not in self.tensor_bindings[einsum]: + self.tensor_bindings[einsum][tensor] = [] + + if "init-ranks" not in binding: + raise ValueError( + "Initial ranks not specified for tensor " + + tensor + + " in Einsum " + + einsum + + " in binding to " + + self.name) + + if "final-ranks" not in binding: + raise ValueError( + "Final ranks not specified for tensor " + + tensor + + " in Einsum " + + einsum + + " in binding to " + + self.name) + + self.tensor_bindings[einsum][tensor].append(binding) + + def get_comparator_radix(self) -> float: + """ + Get the comparator_radix + """ + return self.comparator_radix + + def get_init_ranks(self, einsum: str, tensor: str, + final_ranks: List[str]) -> Optional[List[str]]: + """ + Get the initial ranks for the given merge + """ + if einsum not in self.tensor_bindings: + return None + + if tensor not in self.tensor_bindings[einsum]: + return None + + init_ranks: Optional[List[str]] = None + for binding in self.tensor_bindings[einsum][tensor]: + if binding["final-ranks"] == final_ranks: + if init_ranks is not None: + raise ValueError("Merge binding from both " + + str(init_ranks) + + " and " + + str(binding["init-ranks"]) + + " to " + + str(final_ranks)) + + init_ranks = binding["init-ranks"] + + return init_ranks + + def get_inputs(self) -> float: """ - Construct a compute component + Get the number of inputs """ - super().__init__(name, attrs, bindings) + return self.inputs - self.bindings = [] - for binding in bindings: - init = binding["init_ranks"] - d = binding["swap_depth"] - final = init[:d] + [init[d + 1]] + [init[d]] + init[(d + 2):] - - info = binding.copy() - info["final_ranks"] = final + def get_order(self) -> str: + """ + Get the order + """ + return self.order - self.bindings.append(info) + def get_outputs(self) -> int: + """ + Get the number of outputs + """ + return self.outputs - def get_bindings(self) -> List[dict]: + def get_reduce(self) -> bool: """ - Get the operations that are bound to this merger + Get whether or not the merger performs concurrent reduction """ - return self.bindings + return self.reduce - def get_next_latency(self) -> Union[int, str]: + def _Component__key(self) -> Tuple[Any, ...]: """ - Get the latency of accessing the next element + A tuple of all fields """ - return self.attrs["next_latency"] + return ( + self.name, + self.num_instances, + self.bindings, + self.inputs, + self.comparator_radix, + self.outputs, + self.order, + self.reduce) + - def get_radix(self) -> float: +class SequencerComponent(FunctionalComponent): + """ + A Component for a sequencer + """ + + def __init__(self, name: str, num_instances: int, attrs: dict, + bindings: Dict[str, List[dict]]) -> None: """ - Get the radix + Construct a sequencer component """ - if self.attrs["radix"] == "inf": - return float("inf") + super().__init__(name, num_instances, attrs, bindings) + + num_ranks = self._check_attr(attrs, "num_ranks", int) + if num_ranks is None: + raise ValueError( + "Number of ranks unspecified for sequencer " + + self.name) + + self.ranks: Dict[str, List[str]] = {} + for einsum, ebindings in self.bindings.items(): + if len(ebindings) > num_ranks: + raise ValueError( + "Too many ranks bound to sequencer " + + self.name + + " during Einsum " + + einsum) + + self.ranks[einsum] = [] + for binding in ebindings: + self.ranks[einsum].append(binding["rank"]) - return self.attrs["radix"] + def get_ranks(self, einsum: str) -> List[str]: + """ + Get the ranks sequenced by this sequencer + """ + return self.ranks[einsum] -class SkipAheadComponent(ComputeComponent): +class SkipAheadComponent(IntersectorComponent): """ A Component for skip-ahead intersection """ pass + + +class TwoFingerComponent(IntersectorComponent): + """ + A Component for two-finger intersection + """ + pass diff --git a/teaal/ir/equation.py b/teaal/ir/equation.py index 3ad21e6..d9b1df0 100644 --- a/teaal/ir/equation.py +++ b/teaal/ir/equation.py @@ -28,7 +28,7 @@ from lark.lexer import Token from lark.tree import Tree -from typing import Any, Dict, Iterable, List, Tuple +from typing import Any, Dict, Iterable, List, Optional, Tuple from teaal.ir.tensor import Tensor from teaal.parse.utils import ParseUtils @@ -56,6 +56,27 @@ def get_factor_order(self) -> Dict[str, Tuple[int, int]]: """ return self.factor_order + def get_iter(self, + tensors: List[Tensor]) -> Tuple[Optional[Tensor], + List[List[Tensor]]]: + """ + Organize the tensors as they are iterated in the for loop + Returns (Optional[output], [[tensors intersected together] unioned together]) + """ + output: Optional[Tensor] = None + inputs: List[List[Tensor]] = [[] for _ in self.term_tensors] + for tensor in tensors: + if tensor.get_is_output(): + output = tensor + continue + + inputs[self.factor_order[tensor.root_name()][0]].append(tensor) + + for term in inputs: + term.sort(key=lambda t: self.factor_order[t.root_name()][1]) + + return output, [term for term in inputs if term] + def get_in_update(self) -> List[List[bool]]: """ Get the information about which values are actually used in the update diff --git a/teaal/ir/flow_graph.py b/teaal/ir/flow_graph.py index 083ad45..52f7aa3 100644 --- a/teaal/ir/flow_graph.py +++ b/teaal/ir/flow_graph.py @@ -29,6 +29,7 @@ from sympy import Symbol from typing import cast, Dict, List, Optional, Tuple +from teaal.ir.component import * from teaal.ir.flow_nodes import * from teaal.ir.iter_graph import IterationGraph from teaal.ir.metrics import Metrics @@ -94,7 +95,7 @@ def __build(self) -> None: self.graph = nx.DiGraph() self.iter_map: Dict[str, List[str]] = {} - self.__build_loop_nest() + chain = self.__build_loop_nest() self.__build_output() # Add Swizzle, GetRoot and FiberNodes for each tensor @@ -116,10 +117,6 @@ def __build(self) -> None: # Get the root fiber self.__build_swizzle_root_fiber(tensor, True) - # Add CollectingNodes - for tensor in self.program.get_equation().get_tensors(): - self.__build_collecting(tensor) - iter_graph = IterationGraph(self.program) while iter_graph.peek_concord()[0] is not None: self.__build_fiber_nodes(iter_graph, flatten_info) @@ -136,31 +133,6 @@ def __build(self) -> None: tensor.reset() tensor.set_is_output(is_output) - def __build_collecting(self, tensor: Tensor) -> None: - """ - Build a CollectingNode should it be required - """ - # None if there is no hardware - if not self.metrics: - return - - # None if the tensor is never stored in DRAM - if not self.metrics.in_dram(tensor): - return - - # None if the tensor is stationary - if self.metrics.on_chip_stationary(tensor): - return - - # Otherwise, add a CollectingNode - root = tensor.root_name() - rank = self.metrics.get_on_chip_rank(tensor) - swizzle_node = SwizzleNode(root, tensor.get_ranks(), "loop-order") - collecting_node = CollectingNode(root, rank) - - self.graph.add_edge(swizzle_node, collecting_node) - self.graph.add_edge(collecting_node, MetricsNode("Start")) - def __build_dyn_part( self, tensor: Tensor, partitioning: Tuple[str, ...], flatten_info: Dict[str, List[Tuple[str, ...]]]) -> None: """ @@ -180,6 +152,7 @@ def __build_dyn_part( for rank in partitioning: self.graph.add_edge(RankNode(root, rank), swizzle_node) + self.program.apply_partition_swizzling(tensor) # Add to flattening info flatten_info[root].append(partitioning) @@ -250,7 +223,7 @@ def __build_fiber_nodes(self, iter_graph: IterationGraph, # We need a EagerInputNode and an IntervalNode if at least one tensor # will be projected and it is a partitioned rank (so we don't know the # bounds) - if any(tensor.peek() != rank.lower() for tensor in tensors) and \ + if any(tensor.peek_clean() != rank for tensor in tensors) and \ self.program.get_partitioning().split_rank_name(rank)[1] == "0": self.__build_project_interval(rank) @@ -270,7 +243,8 @@ def __build_fiber_nodes(self, iter_graph: IterationGraph, get_payload_node) for rank in ranks: - loop_rank = part.get_final_rank_id(tensor, rank) + loop_rank = part.get_final_rank_id( + tensor.get_init_ranks(), rank) self.graph.add_edge(LoopNode(loop_rank), get_payload_node) for ranks, tensor in iter_graph.pop_discord(): @@ -279,9 +253,9 @@ def __build_fiber_nodes(self, iter_graph: IterationGraph, get_payload_node, FiberNode( tensor.fiber_name())) - def __build_loop_nest(self) -> None: + def __build_loop_nest(self) -> List[Node]: """ - Build the loop nest + Build the loop nest, returns the chain of nodes """ loop_order = self.program.get_loop_order().get_ranks() @@ -289,23 +263,47 @@ def __build_loop_nest(self) -> None: chain: List[Node] = [OtherNode("StartLoop")] for rank in loop_order: chain.append(LoopNode(rank)) - self.graph.add_edge(chain[-2], chain[-1]) + chain.append(OtherNode("Body")) + for rank in reversed(loop_order): + chain.append(EndLoopNode(rank)) + chain.append(OtherNode("Footer")) + + # Note that the chain is guaranteed to have at least two nodes + for i in range(len(chain) - 1): + self.graph.add_edge(chain[i], chain[i + 1]) - # Add the graphics generation, body, and footer + # Add the graphics generation self.graph.add_edge(OtherNode("Graphics"), OtherNode("StartLoop")) self.graph.add_edge(OtherNode("Output"), OtherNode("Graphics")) - self.graph.add_edge(chain[-1], OtherNode("Body")) - self.graph.add_edge(OtherNode("Body"), OtherNode("Footer")) # If we have Metrics, we need to add the MetricsNodes if self.metrics: self.graph.add_edge(OtherNode("StartLoop"), MetricsNode("Start")) self.graph.add_edge(MetricsNode("Start"), chain[1]) - self.graph.add_edge(OtherNode("Body"), MetricsNode("End")) + metrics_chain: List[Node] = [] + for rank in loop_order: + metrics_chain.append(MetricsHeaderNode(rank)) + metrics_chain.append(MetricsNode("Body")) + for rank in reversed(loop_order): + metrics_chain.append(MetricsFooterNode(rank)) + + j = 0 + for i, metrics_node in enumerate(metrics_chain): + self.graph.add_edge(chain[i + j], metrics_chain[i]) + self.graph.add_edge(metrics_chain[i], chain[i + j + 1]) + + if metrics_node == MetricsNode("Body"): + j = 1 + + self.graph.add_edge(MetricsNode("Start"), metrics_chain[0]) + self.graph.add_edge(metrics_chain[-1], MetricsNode("End")) + self.graph.add_edge(chain[-2], MetricsNode("End")) self.graph.add_edge(MetricsNode("End"), OtherNode("Footer")) self.graph.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + return chain + def __build_output(self) -> None: """ Build all of the output-specific edges @@ -403,6 +401,22 @@ def __build_static_part(self, tensor: Tensor, self.graph.add_edge(swizzle_node, part_node) + # Add an additional swizzle node to ensure that the tensor always + # starts in the correct order before being merged by a hardware + # merger + if self.metrics: + init_ranks = self.metrics.get_merger_init_ranks( + root, tensor.get_ranks()) + if init_ranks: + metrics_swizzle_node = SwizzleNode( + root, init_ranks, "metrics") + + for rank in init_ranks: + self.graph.add_edge( + RankNode(root, rank), metrics_swizzle_node) + + self.graph.add_edge(metrics_swizzle_node, swizzle_node) + # Otherwise, add the edge from the source rank to the partitioning else: self.graph.add_edge(RankNode(root, partitioning[0]), part_node) @@ -441,6 +455,23 @@ def __build_swizzle_root_fiber(self, tensor: Tensor, static: bool) -> None: if static: self.graph.add_edge(swizzle_node, OtherNode("Graphics")) + # Add an additional swizzle node to ensure that the tensor always + # starts in the correct order before being merged by a hardware merger + if self.metrics: + init_ranks = self.metrics.get_merger_init_ranks( + root, tensor.get_ranks()) + if init_ranks: + metrics_swizzle_node = SwizzleNode(root, init_ranks, "metrics") + + for rank in init_ranks: + self.graph.add_edge( + RankNode( + root, + rank), + metrics_swizzle_node) + + self.graph.add_edge(metrics_swizzle_node, swizzle_node) + def __connect_dyn_part(self, tensor: Tensor, rank: str, flatten_info: Dict[str, List[Tuple[str, ...]]]) -> None: """ diff --git a/teaal/ir/flow_nodes.py b/teaal/ir/flow_nodes.py index d063e2f..97ee04c 100644 --- a/teaal/ir/flow_nodes.py +++ b/teaal/ir/flow_nodes.py @@ -25,24 +25,22 @@ """ import abc -from typing import Any, Iterable, List, Tuple +from typing import Any, Iterable, List, Optional, Tuple from teaal.ir.node import Node -class CollectingNode(Node): +class EagerInputNode(Node): """ - A Node to turn on reuse distance collection for a particular rank of a - tensor + A node that ensures that the inputs are eager """ - def __init__(self, tensor: str, rank: str) -> None: + def __init__(self, rank: str, tensors: List[str]) -> None: """ - Construct a node for the collection of reuse metrics for a tensor's - rank + Construct a EagerInputNode """ - self.tensor = tensor self.rank = rank + self.tensors = tensors def get_rank(self) -> str: """ @@ -50,30 +48,29 @@ def get_rank(self) -> str: """ return self.rank - def get_tensor(self) -> str: + def get_tensors(self) -> List[str]: """ Accessor for the tensor """ - return self.tensor + return self.tensors def _Node__key(self) -> Iterable[Any]: """ - Iterable of fields of a Collecting + Iterable of fields of a FromFiberNode """ - return self.tensor, self.rank + return self.rank, self.tensors -class EagerInputNode(Node): +class EndLoopNode(Node): """ - A node that ensures that the inputs are eager + A Node representing the end of a loop """ - def __init__(self, rank: str, tensors: List[str]) -> None: + def __init__(self, rank: str) -> None: """ - Construct a EagerInputNode + Construct a EndLoopNode """ self.rank = rank - self.tensors = tensors def get_rank(self) -> str: """ @@ -81,17 +78,11 @@ def get_rank(self) -> str: """ return self.rank - def get_tensors(self) -> List[str]: - """ - Accessor for the tensor - """ - return self.tensors - def _Node__key(self) -> Iterable[Any]: """ - Iterable of fields of a FromFiberNode + Iterable of fields of a EndLoopNode """ - return self.rank, self.tensors + return self.rank, class FiberNode(Node): @@ -260,6 +251,54 @@ def _Node__key(self) -> Iterable[Any]: return self.rank, +class MetricsFooterNode(Node): + """ + A Node for collecting metrics before the start of the given loop + """ + + def __init__(self, rank: str) -> None: + """ + Construct a MetricsFooterNode + """ + self.rank = rank + + def get_rank(self) -> str: + """ + Accessor for the rank + """ + return self.rank + + def _Node__key(self) -> Iterable[Any]: + """ + Iterable of fields of a MetricsFooterNode + """ + return self.rank, + + +class MetricsHeaderNode(Node): + """ + A Node for collecting metrics before the start of the given loop + """ + + def __init__(self, rank: str) -> None: + """ + Construct a MetricsHeaderNode + """ + self.rank = rank + + def get_rank(self) -> str: + """ + Accessor for the rank + """ + return self.rank + + def _Node__key(self) -> Iterable[Any]: + """ + Iterable of fields of a MetricsHeaderNode + """ + return self.rank, + + class MetricsNode(Node): """ A Node for metrics collection diff --git a/teaal/ir/fusion.py b/teaal/ir/fusion.py new file mode 100644 index 0000000..7ba3ba7 --- /dev/null +++ b/teaal/ir/fusion.py @@ -0,0 +1,117 @@ +""" +MIT License + +Copyright (c) 2023 University of Illinois + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +Representation of the fusion schedule of this accelerator +""" + +from typing import List, Optional, Set + +from teaal.ir.component import * +from teaal.ir.hardware import Hardware +from teaal.ir.program import Program + + +class Fusion: + """ + Representation of the fusion schedule of the accelerator + """ + + def __init__(self, hardware: Hardware) -> None: + """ + Construct a new fusion object + """ + self.hardware = hardware + + self.blocks: List[List[str]] = [] + self.curr_block: List[str] = [] + self.fused_ranks: List[str] = [] + + self.curr_config: Optional[str] = None + self.components_used: Set[str] = set() + + self.component_dict: Dict[str, List[str]] = {} + + def add_einsum(self, program: Program) -> None: + """ + Add the information corresponding to this Einsum + """ + einsum = program.get_equation().get_output().root_name() + loop_ranks = program.get_loop_order().get_ranks() + + spacetime = program.get_spacetime() + if not spacetime: + raise ValueError("Undefined spacetime for Einsum " + einsum) + + space_ranks = spacetime.get_space() + + # Get the temporal ranks in all loop orders before the first spatial + # rank + fused_ranks: List[str] + if space_ranks: + fused_ranks = loop_ranks[:loop_ranks.index(space_ranks[0])] + else: + fused_ranks = loop_ranks + + # Get the components used for this Einsum + components_used = set() + for component in self.hardware.get_components( + einsum, FunctionalComponent): + if component.get_bindings()[einsum]: + components_used.add(component.get_name()) + + # Get the config + config = self.hardware.get_config(einsum) + + # Check if the fusion conditions are met + if config == self.curr_config and fused_ranks == self.fused_ranks and not self.components_used.intersection( + components_used): + self.curr_block.append(einsum) + self.components_used = self.components_used.union(components_used) + + # Otherwise, start a new block + else: + self.blocks.append([einsum]) + self.curr_block = self.blocks[-1] + self.fused_ranks = fused_ranks + self.curr_config = config + + # Prepare to record the components contributing to the exectuion time + self.component_dict[einsum] = [] + + def add_component(self, einsum: str, component: str) -> None: + """ + Add a component whose time is being tracked + """ + self.component_dict[einsum].append(component) + + def get_blocks(self) -> List[List[str]]: + """ + Get the Einsums organized by their fusion blocks + """ + return self.blocks + + def get_components(self, einsum: str) -> List[str]: + """ + Get the names of the components used for this Einsum + """ + return self.component_dict[einsum] diff --git a/teaal/ir/hardware.py b/teaal/ir/hardware.py index cecd586..eb54047 100644 --- a/teaal/ir/hardware.py +++ b/teaal/ir/hardware.py @@ -24,33 +24,58 @@ Representation of the hardware of an accelerator """ -from typing import Dict, Type +from typing import Dict, Set, Type, TypeVar from teaal.ir.component import * from teaal.ir.level import Level +from teaal.ir.program import Program + from teaal.parse import * +T = TypeVar("T") + class Hardware: """ Representation of the hardware of an accelerator """ - def __init__(self, arch: Architecture, bindings: Bindings) -> None: + def __init__( + self, + arch: Architecture, + bindings: Bindings, + program: Program) -> None: """ Construct the hardware + + TODO: The program is only used to get the Einsum name; standardize + so all use program or all take it as an argument """ + self.bindings = bindings + self.program = program + self.components: Dict[str, Component] = {} + # Get the configuration for each Einsum + self.configs = {} + for einsum in self.program.get_all_einsums(): + self.configs[einsum] = self.bindings.get_config(einsum) + spec = arch.get_spec() if spec is None: raise ValueError("Empty architecture specification") - subtree = spec["architecture"]["subtree"] - if len(subtree) != 1: - raise ValueError("Architecture must have a single root level") + # Build the architecture tree for each configuration + self.tree = {} + for config in spec["architecture"]: + subtree = spec["architecture"][config] + if len(subtree) != 1: + raise ValueError( + "Configuration " + + config + + " must have a single root level") - self.tree = self.__build_level(subtree[0], bindings) + self.tree[config] = self.__build_level(subtree[0]) def get_component(self, name: str) -> Component: """ @@ -58,198 +83,158 @@ def get_component(self, name: str) -> Component: """ return self.components[name] - def get_compute_path(self, einsum: str) -> List[Level]: + def get_components(self, einsum: str, class_: Type[T]) -> List[T]: """ - Get a list of levels with dataflow corresponding to this einsum + Get a list of components relevant to this einsum """ - return self.__compute_helper(einsum, self.tree) + components: List[T] = [] + for name in self.bindings.get_bindings()[einsum]: + component = self.components[name] + if isinstance(component, class_): + components.append(component) + return components - def get_compute_components(self, einsum: str) -> List[ComputeComponent]: + def get_config(self, einsum: str) -> str: """ - Get a list of compute components relevant to this einsum + Get the name of the hardware configuration for this Einsum """ - path = self.get_compute_path(einsum) + return self.configs[einsum] - components = [] - for level in path: - for component in level.get_local(): - if isinstance(component, ComputeComponent) and \ - component.get_bindings(einsum): - components.append(component) + def get_frequency(self, einsum: str) -> int: + """ + The clock_frequency (in Hz) should be specified as an attribute at the + top level + """ + top_level = self.tree[self.configs[einsum]] + freq = top_level.get_attr("clock_frequency") - return components + if freq is None: + raise ValueError( + "Unspecified clock frequency for config " + + self.configs[einsum]) - def get_merger_components(self) -> List[MergerComponent]: + if isinstance(freq, str): + raise ValueError( + "Bad clock frequency for config " + + self.configs[einsum]) + + return freq + + def get_prefix(self, einsum: str) -> str: """ - Get all merger components + Get the prefix for collected metrics for the given Einsum """ - mergers = [] - for component in self.components.values(): - if isinstance(component, MergerComponent): - mergers.append(component) - - return mergers + return self.bindings.get_prefix(einsum) def get_traffic_path( self, - einsum: str, - tensor: str) -> List[MemoryComponent]: + tensor: str, + rank: str, + type_: str, + format_: str) -> List[Tuple[MemoryComponent, str]]: """ - Get a list of paths this tensor will be loaded into + Get a list of components this tensor will be loaded into and either + a lazy style or the source rank of the eager load """ - paths = self.__traffic_helper(tensor, self.tree) - - # Merge all paths together - final: List[MemoryComponent] = [] - compute_path = self.get_compute_path(einsum) - - for path in paths: - sub_path = Hardware.__sub_path(path, compute_path) - - if len(final) < len(sub_path) and final == sub_path[:len(final)]: - final = sub_path + einsum = self.program.get_equation().get_output().root_name() - elif len(sub_path) < len(final) and sub_path == final[:len(sub_path)]: - pass + components: List[Tuple[MemoryComponent, str]] = [] - elif sub_path == final: - pass + levels = [(self.tree[self.configs[einsum]], 0)] + depths_covered = set() + while levels: + level, depth = levels.pop() - else: - raise ValueError( - "Multiple bindings for einsum " + - einsum + - " and tensor " + - tensor) + for component in level.get_local(): + if not isinstance(component, MemoryComponent): + continue + + binding = component.get_binding( + einsum, tensor, rank, type_, format_) + if binding: + if isinstance( + component, + BuffetComponent) and binding["style"] == "eager": + components.append((component, binding["root"])) + else: + components.append((component, "lazy")) + + if depth in depths_covered: + raise ValueError( + "Multiple traffic paths for tensor " + + tensor + + " in Einsum " + + einsum) + depths_covered.add(depth) + + levels.extend((tree, depth + 1) for tree in level.get_subtrees()) - return final + return components def get_tree(self) -> Level: """ Get the architecture tree """ - return self.tree - - @staticmethod - def __sub_path( - mem_path: List[MemoryComponent], - compute_path: List[Level]) -> List[MemoryComponent]: - """ - Return the prefix of the mem_path captured by this compute_path - """ - i = 0 - for level in compute_path: - if mem_path[i] in level.get_local(): - i += 1 - - if i == len(mem_path): - break + einsum = self.program.get_equation().get_output().root_name() + return self.tree[self.configs[einsum]] - return mem_path[:i] - - def __build_component(self, local: dict, bindings: Bindings) -> Component: + def __build_component(self, local: dict, num_instances: int) -> Component: """ Build a component """ class_: Type[Component] - if local["class"].lower() == "buffet": + class_name = local["class"].lower() + if class_name == "buffet": class_ = BuffetComponent - elif local["class"].lower() == "cache": + elif class_name == "cache": class_ = CacheComponent - elif local["class"].lower() == "compute": + elif class_name == "compute": class_ = ComputeComponent - elif local["class"].lower() == "dram": + elif class_name == "dram": class_ = DRAMComponent - elif local["class"].lower() == "leaderfollower": - class_ = LeaderFollowerComponent + elif class_name == "intersector": + type_ = local["attributes"]["type"].lower() + if type_ == "leader-follower": + class_ = LeaderFollowerComponent + + elif type_ == "skip-ahead": + class_ = SkipAheadComponent + + elif type_ == "two-finger": + class_ = TwoFingerComponent - elif local["class"].lower() == "merger": + else: + raise ValueError("Unknown intersection type: " + type_) + + elif class_name == "merger": class_ = MergerComponent - elif local["class"].lower() == "skipahead": - class_ = SkipAheadComponent + elif class_name == "sequencer": + class_ = SequencerComponent else: raise ValueError("Unknown class: " + local["class"]) name = local["name"] - binding = bindings.get(name) + binding = self.bindings.get_component(name) - component = class_(name, local["attributes"], binding) + component = class_(name, num_instances, local["attributes"], binding) self.components[component.get_name()] = component return component - def __build_level(self, tree: dict, bindings: Bindings) -> Level: + def __build_level(self, tree: dict) -> Level: """ Build the levels of the architecture tree """ attrs = tree["attributes"] - local = [self.__build_component(comp, bindings) + local = [self.__build_component(comp, tree["num"]) for comp in tree["local"]] - subtrees = [self.__build_level(subtree, bindings) + subtrees = [self.__build_level(subtree) for subtree in tree["subtree"]] return Level(tree["name"], tree["num"], attrs, local, subtrees) - - def __compute_helper(self, einsum: str, level: Level) -> List[Level]: - """ - Recursive implementation to find the dataflow to compute for a given - einsum - """ - # Recurse down the tree - paths = [] - for subtree in level.get_subtrees(): - sub_path = self.__compute_helper(einsum, subtree) - if sub_path: - paths.append(sub_path) - - if len(paths) > 1: - raise ValueError("Only one compute path allowed per einsum") - - if paths: - return [level] + paths[0] - - # Check if a local component performs compute for this einsum - root = False - for comp in level.get_local(): - if isinstance(comp, ComputeComponent) and \ - comp.get_bindings(einsum): - return [level] - - return [] - - def __traffic_helper(self, tensor: str, - level: Level) -> List[List[MemoryComponent]]: - """ - Recursive implementation to find the memory traffic pattern of a tensor - from a given subtree - """ - # Recurse down the tree - paths = [] - for subtree in level.get_subtrees(): - paths.extend(self.__traffic_helper(tensor, subtree)) - - # Check if the memory components at this level store the tensor - mem_components = [] - for comp in level.get_local(): - if isinstance(comp, MemoryComponent) and comp.get_binding(tensor): - mem_components.append(comp) - - # Return a list of paths - if not paths: - return [[mem] for mem in mem_components] - - if not mem_components: - return paths - - final = [] - for mem in mem_components: - for path in paths: - final.append([mem] + path) - - return final diff --git a/teaal/ir/loop_order.py b/teaal/ir/loop_order.py index 9ba668a..93af527 100644 --- a/teaal/ir/loop_order.py +++ b/teaal/ir/loop_order.py @@ -83,7 +83,9 @@ def apply(self, tensor: Tensor) -> None: # Get the names of the final rank ids for the tensor final_ids = [] for rank in tensor.get_ranks(): - final_ids.append(self.partitioning.get_final_rank_id(tensor, rank)) + final_ids.append( + self.partitioning.get_final_rank_id( + tensor.get_init_ranks(), rank)) # Order the current rank ids based on their final posititon expanded: List[List[str]] = [[] for _ in range(len(self.ranks))] diff --git a/teaal/ir/metrics.py b/teaal/ir/metrics.py index 24c138d..19a03f9 100644 --- a/teaal/ir/metrics.py +++ b/teaal/ir/metrics.py @@ -24,10 +24,11 @@ Representation of the metrics that need to be collected for this accelerator """ -from typing import Tuple +from typing import Dict, List, Optional, Tuple, Union from teaal.ir.component import * from teaal.ir.hardware import Hardware +from teaal.ir.iter_graph import IterationGraph from teaal.ir.program import Program from teaal.ir.tensor import Tensor from teaal.parse.format import Format @@ -45,256 +46,512 @@ def __init__( hardware: Hardware, format_: Format) -> None: """ - Construct a new metrics object + Construct a new Metrics object """ self.program = program self.hardware = hardware self.format = format_ - # Check that we can collect metrics for this accelerator - self.__check_configuration() + self.__build_format_options() + self.__build_eager_evicts() + self.__expand_eager() - # Get the final form of all tensors - for tensor in self.program.get_equation().get_tensors(): - self.program.apply_all_partitioning(tensor) - self.program.get_loop_order().apply(tensor) + self.__build_coiter_ranks() + self.__build_fiber_traces() + self.__build_traffic_paths() - # Collect the memory traffic information - self.__build_dram_tensors() - self.__build_off_chip_traffic_info() - self.__build_stationary() + def get_coiter(self, rank: str) -> Optional[Component]: + """ + Get the coiterator used for this rank + """ + if rank not in self.coiterators: + return None - # Reset all tensors - for tensor in self.program.get_equation().get_tensors(): - is_output = tensor.get_is_output() - tensor.reset() - tensor.set_is_output(is_output) + return self.coiterators[rank] - # Collect other information - self.__build_mergers() + def get_coiter_traces(self, coiter: str, rank: str) -> List[str]: + """ + Get the trace names used for this coiterator + """ + return self.coiter_traces[coiter][rank] - def get_compute_components(self) -> List[ComputeComponent]: + def get_collected_iter_info(self) -> Set[str]: """ - Get all relevant compute components for this Einsum + Get the specification for which ranks iteration needs to be traced """ + ranks = set() einsum = self.program.get_equation().get_output().root_name() - return self.hardware.get_compute_components(einsum) + for sequencer in self.hardware.get_components( + einsum, SequencerComponent): + ranks.update(sequencer.get_ranks(einsum)) - def get_format(self, tensor: Tensor) -> dict: + return ranks + + def get_collected_tensor_info( + self, tensor: str) -> Set[Tuple[str, str, bool]]: """ - Get the format specification for the given tensor + Get a specification for which ranks need to be collected in the form + {(rank, type, consumable)}, where type is one of + - "fiber" - corresponding to iteration over that fiber + - "iter" - corresponding to the iteration of the loop nest + - rank - the rank that the eager iteration starts at """ - return self.format.get_spec(tensor.root_name()) - - def get_merger_components(self) -> List[Tuple[MergerComponent, dict]]: + # Collect traces for data traffic + info = set() + einsum = self.program.get_equation().get_output().root_name() + if tensor in self.traffic_paths: + for rank, paths in self.traffic_paths[tensor][1].items(): + for i, path in enumerate(paths): + for component, style in path: + if isinstance(component, DRAMComponent): + continue + + if style == "lazy": + info.add((rank, "fiber", False)) + fiber_trace = self.get_fiber_trace( + tensor, rank, True) + if i == 1 and fiber_trace != "iter" and fiber_trace[:11] != "get_payload": + info.add((rank, "iter", False)) + + else: + info.add((rank, style, False)) + + # Collect traces for intersection + if not tensor == einsum: + tensor_ir = self.program.get_equation().get_tensor(tensor) + part_ir = self.program.get_partitioning() + final_ranks = part_ir.partition_ranks( + tensor_ir.get_init_ranks(), part_ir.get_all_parts(), True, True) + + for intersector in self.hardware.get_components( + einsum, IntersectorComponent): + for binding in intersector.get_bindings()[einsum]: + if isinstance(intersector, LeaderFollowerComponent) and \ + binding["leader"] != tensor: + continue + + if binding["rank"] not in final_ranks: + continue + + info.add((binding["rank"], "fiber", True)) + + return info + + def get_eager_evict_on(self, tensor: str, rank: str) -> List[str]: """ - Get all relevant merger components and the relevant tensor being merged + Get the ranks eager load should be evicted on in loop order """ - return self.mergers + ranks = [] + for loop_rank, evicts in self.eager_evicts.items(): + if (tensor, rank) in evicts: + ranks.append(loop_rank) + + ranks.sort(key=self.program.get_loop_order().get_ranks().index) + return ranks - def get_on_chip_buffer(self, tensor: Tensor) -> MemoryComponent: + def get_eager_evicts(self, rank: str) -> List[Tuple[str, str]]: """ - Gets the on-chip buffer for a particular tensor + Get the subtrees that were eager loaded and should be evicted on this + rank """ - if not self.in_dram(tensor): - raise ValueError( - "Tensor " + - tensor.root_name() + - " not stored in DRAM") + if rank not in self.eager_evicts: + return [] - return self.on_chip_buffer[tensor.root_name()] + return self.eager_evicts[rank] - def get_on_chip_rank(self, tensor: Tensor) -> str: + def get_eager_write(self) -> bool: """ - Returns the rank of the given tensor that is used for memory traffic + Returns True if the kernel perfoms an eager write """ - if not self.in_dram(tensor): - raise ValueError( - "Tensor " + - tensor.root_name() + - " not stored in DRAM") + return self.eager_write - return self.on_chip_rank[tensor.root_name()][1] + def get_fiber_trace( + self, + tensor: str, + rank: str, + is_read_trace: bool) -> str: + """ + Get the name of the fiber trace for this fiber + """ + # If the rank is not in the set of fiber_traces (not in the loop + # order), it must be being iterated with a get payload + if rank not in self.fiber_traces: + return "get_payload_" + tensor + return self.fiber_traces[rank][tensor][is_read_trace] - def in_dram(self, tensor: Tensor) -> bool: + def get_format(self) -> Format: """ - Returns True if the tensor is stored in DRAM + Get the parsed format yaml """ - return tensor.root_name() in self.dram_tensors + return self.format - def on_chip_stationary(self, tensor: Tensor) -> bool: + def get_hardware(self) -> Hardware: """ - Returns True if this tensor is stationary (i.e., its DRAM traffic - can be computed by calculating its footprint) + Get the hardware IR """ - return tensor.root_name() in self.stationary + return self.hardware - def __build_dram_tensors(self) -> None: + def get_loop_formats(self) -> Dict[str, str]: """ - Build the set of tensors stored in DRAM + Get the tensors that have assigned formats during the loop nest as + well as the corresponding format """ - self.dram_tensors = set() - einsum = self.program.get_equation().get_output().root_name() + loop_formats = {} + for tensor, (format_, _) in self.traffic_paths.items(): + loop_formats[tensor] = format_ + return loop_formats - # For each tensor - for tensor in self.program.get_equation().get_tensors(): - path = self.hardware.get_traffic_path(einsum, tensor.root_name()) + def get_merger_init_ranks(self, tensor: str, + final_ranks: List[str]) -> Optional[List[str]]: + """ + Get the initial ranks for merges that must be tracked by the hardware + """ + einsum = self.program.get_equation().get_output().root_name() + mergers = self.hardware.get_components(einsum, MergerComponent) + init_ranks: Optional[List[str]] = None + for merger in mergers: + opt_init_ranks = merger.get_init_ranks(einsum, tensor, final_ranks) - if not path or not isinstance(path[0], DRAMComponent): + if opt_init_ranks is None: continue - if len(path) < 2: + if init_ranks is not None: raise ValueError( - "Tensor " + - tensor.root_name() + - " never buffered on chip") + "Multiple bindings for merge of tensor " + + tensor + + " to final rank order " + + str(final_ranks)) - self.dram_tensors.add(tensor.root_name()) + init_ranks = opt_init_ranks - def __build_mergers(self) -> None: + return init_ranks + + def get_source_memory( + self, + component: str, + tensor: str, + rank: str, + type_: str) -> Optional[MemoryComponent]: """ - Build a list of mergers that will be relevant + Get the source for this data """ - all_mergers = self.hardware.get_merger_components() - easy_access = {} + t = ["coord", "payload", "elem"].index(type_) - for merger in all_mergers: - for binding in merger.get_bindings(): - # Create a map from - # (tensor name, init ranks, final ranks) to the component - name = binding["tensor"] - init = tuple(binding["init_ranks"]) - final = tuple(binding["final_ranks"]) + if tensor not in self.traffic_paths: + return None - easy_access[(name, init, final)] = (merger, binding) - - self.mergers = [] - part = self.program.get_partitioning() - - def check_tensor(tensor): - """ - Check if the tensor matches a merge operation, if so, add it - """ - name = tensor.root_name() - init = tuple(tensor.get_ranks()) - self.program.get_loop_order().apply(tensor) - final = tuple(tensor.get_ranks()) - - if (name, init, final) in easy_access.keys(): - self.mergers.append(easy_access[(name, init, final)]) - - for tensor in self.program.get_equation().get_tensors(): - # If it is the output, we swizzle on the way out - is_output = tensor.get_is_output() - if is_output: - name = tensor.root_name() + path = self.traffic_paths[tensor][1][rank][t] + component_ir = self.hardware.get_component(component) + if not isinstance(component_ir, MemoryComponent): + raise ValueError( + "Destination component " + + component + + " not a memory") - # With the output, we first swizzle back, and then flatten - self.program.apply_all_partitioning(tensor) - self.program.get_loop_order().apply(tensor) + inds = [i for i, (comp, _) in enumerate(path) if comp == component_ir] + if not inds: + return None - init = tuple(tensor.get_ranks()) - tensor.reset() - self.program.apply_all_partitioning(tensor) - final = tuple(tensor.get_ranks()) + if inds[0] == 0: + return None - if (name, init, final) in easy_access.keys(): - self.mergers.append(easy_access[(name, init, final)]) + return path[inds[0] - 1][0] - else: - name = tensor.root_name() + def __build_coiter_ranks(self) -> None: + """ + Map the ranks to the coiterators that coiterate over them + """ + self.coiterators: Dict[str, Component] = {} + einsum = self.program.get_equation().get_output().root_name() + for intersector in self.hardware.get_components( + einsum, IntersectorComponent): + for binding in intersector.get_bindings()[einsum]: + rank = binding["rank"] + # Not clear how to map co-iterators onto multiple components + if rank in self.coiterators: + raise NotImplementedError - # First apply all static partitioning - for ranks in part.get_static_parts(): - # TODO: allow flattening - if len(ranks) > 1: - raise ValueError("Cannot deal with this yet") - rank = ranks[0] - if rank in tensor.get_ranks(): - # TODO Support flattening - self.program.apply_partitioning(tensor, (rank,)) + self.coiterators[rank] = intersector - check_tensor(tensor) + def __build_eager_evicts(self) -> None: + """ + Build a dictionary describing the ranks eager accesses will be evicted on - # Now check any dynamic swizzling after partitioning - # opt_rank = tensor.peek() - # while opt_rank is not None: - # if opt_rank.upper() in part.get_dyn_parts().keys(): - # tensor.from_fiber() - # self.program.apply_partitioning( - # tensor, (opt_rank.upper(),)) + self.eager_evicts: Dict[evict_rank, List[Tuple[tensor, root_rank]]] + """ + einsum = self.program.get_equation().get_output().root_name() - # check_tensor(tensor) + self.eager_evicts: Dict[str, List[Tuple[str, str]]] = {} + for buffet in self.hardware.get_components(einsum, BuffetComponent): + for binding in buffet.get_bindings()[einsum]: + if binding["style"] != "eager": + continue - # tensor.pop() - # opt_rank = tensor.peek() + evict_on = binding["evict-on"] + if evict_on not in self.eager_evicts: + self.eager_evicts[evict_on] = [] - tensor.reset() - tensor.set_is_output(is_output) + self.eager_evicts[evict_on].append( + (binding["tensor"], binding["root"])) - def __build_off_chip_traffic_info(self) -> None: + def __build_fiber_traces(self) -> None: """ - Build a mapping from tensors to the rank buffered on chip + Build the fiber traces + + self.fiber_traces: Dict[rank, Dict[tensor, Dict[is_read_trace, trace]]] + self.coiter_traces: Dict[component, Dict[rank, List[trace]]] """ - self.on_chip_rank = {} - self.on_chip_buffer = {} + part_ir = self.program.get_partitioning() einsum = self.program.get_equation().get_output().root_name() - # For each tensor + iter_graph = IterationGraph(self.program) for tensor in self.program.get_equation().get_tensors(): - # We don't care about tensors not in DRAM - if not self.in_dram(tensor): - continue - - name = tensor.root_name() - path = self.hardware.get_traffic_path(einsum, name) - - # Get the bindings - mem_binding = path[0].get_binding(name) - on_chip_binding = path[1].get_binding(name) + self.program.apply_all_partitioning(tensor) + self.program.get_loop_order().apply(tensor) - # Indicates an error with Hardware.get_traffic_path() - if not mem_binding or not on_chip_binding: - raise ValueError("Something is wrong...") # pragma: no cover + # Get the corresponding traces + self.fiber_traces: Dict[str, Dict[str, Dict[bool, str]]] = {} + self.coiter_traces: Dict[str, Dict[str, List[str]]] = {} + + # TODO: Think about when we want the pre-projected and when we want the + # post-projected traces + + rank, tensors = iter_graph.peek_concord() + while rank: + # Create empty dictionaries for new ranks + for tensor in tensors: + trank = tensor.peek_clean() + if trank not in self.fiber_traces: + self.fiber_traces[trank] = {} + + output, inputs = self.program.get_equation().get_iter(tensors) + + parent = "iter" + next_label = 0 + if output and not inputs: + # If there is only an output, there is no separate read and + # write trace + self.fiber_traces[rank][output.root_name()] = { + True: parent, False: parent} + + # Advance the iteration graph + iter_graph.pop_concord() + iter_graph.pop_discord() + rank, tensors = iter_graph.peek_concord() + continue - # Build a dictionary of tensors to the - # (rank in DRAM, rank in last on-chip buffer) - self.on_chip_rank[name] = (mem_binding, on_chip_binding) + if output: + self.fiber_traces[rank][output.root_name()] = { + True: "populate_read_0", False: "populate_write_0"} + + parent = "populate_1" + + next_label = 2 + + union_label: Optional[int] = None + if len(inputs) > 1: + union_label = next_label + next_label += 2 + + for i, term in enumerate(inputs): + if len(term) == 1: + trank = term[0].peek_clean() + + if i + 1 < len(inputs): + self.fiber_traces[trank][term[0].root_name()] = { + True: "union_" + str(union_label)} + # i + 1 == len(inputs) + else: + self.fiber_traces[trank][term[0].root_name()] = { + True: parent} + + # Otherwise we have multiple tensors intersected together + else: + # Not clear which intersection should performed + # with this component + if rank in self.coiterators and len(inputs) > 1: + raise NotImplementedError + + # Reorganize the leader to be first + tensors = term.copy() + if rank in self.coiterators and isinstance( + self.coiterators[rank], LeaderFollowerComponent): + for binding in self.coiterators[rank].get_bindings()[ + einsum]: + if binding["rank"] == rank: + leader = binding["leader"] + break + + leader_tensor = self.program.get_equation().get_tensor(leader) + tensors.remove(leader_tensor) + tensors.insert(0, leader_tensor) + + for j, tensor in enumerate(tensors[:-1]): + trank = tensor.peek_clean() + self.fiber_traces[trank][tensor.root_name()] = { + True: "intersect_" + str(next_label)} + + if rank in self.coiterators and isinstance( + self.coiterators[rank], LeaderFollowerComponent) and j + 2 < len(tensors): + next_label += 1 + else: + next_label += 2 + + trank = tensors[-1].peek_clean() + + self.fiber_traces[trank][tensors[-1].root_name() + ] = {True: "intersect_" + str(next_label - 1)} + + if rank in self.coiterators: + coiter = self.coiterators[rank] + if coiter.get_name() not in self.coiter_traces: + self.coiter_traces[coiter.get_name()] = {} + self.coiter_traces[coiter.get_name()][rank] = [] + + traces = self.coiter_traces[coiter.get_name()][rank] + if isinstance(coiter, LeaderFollowerComponent): + # TODO: Can the leader-follower component store + # this info itself + leader = "" + for binding in coiter.get_bindings()[einsum]: + if binding["rank"] == rank: + leader = binding["leader"] + break + traces.append( + self.fiber_traces[rank][leader][True]) + + else: + # Do not support tracing intersection of more than + # two components + if len(tensors) > 2: + raise NotImplementedError + + for tensor in tensors: + trank = tensor.peek_clean() + traces.append( + self.fiber_traces[rank][tensor.root_name()][True]) + + if union_label is not None: + parent = "union_" + str(union_label + 1) + union_label = next_label + next_label += 2 + + # Advance the iteration graph + iter_graph.pop_concord() + iter_graph.pop_discord() + rank, tensors = iter_graph.peek_concord() - # Save the component where the tensor is buffered on-chip - self.on_chip_buffer[name] = path[1] + # Reset all tensors + for tensor in self.program.get_equation().get_tensors(): + is_output = tensor.get_is_output() + tensor.reset() + tensor.set_is_output(is_output) - def __build_stationary(self) -> None: + def __build_format_options(self) -> None: """ - Build a set of DRAM -> on chip stationary tensors + Build a set of possible formats for each tensor + + self.format_options: Dict[tensor, List[format]] """ - self.stationary = set() - einsum = self.program.get_equation().get_output().root_name() + self.format_options: Dict[str, List[str]] = {} + for tensor_ir in self.program.get_equation().get_tensors(): + tensor = tensor_ir.root_name() + self.format_options[tensor] = [] - for name, (mem_rank, on_chip_rank) in self.on_chip_rank.items(): - tensor = self.program.get_equation().get_tensor(name) + spec = self.format.get_spec(tensor) - if mem_rank != "root": - raise NotImplementedError + # Identify the formats that can correspond to the iteration of this + # loop nest + loop_order = self.program.get_loop_order() + for format_ in spec: + format_ranks = spec[format_]["rank-order"] - prefix = tensor.get_prefix(on_chip_rank) + temp_tensor = Tensor(tensor, format_ranks) + loop_order.apply(temp_tensor) - # The tensor is stationary if its prefix is also a prefix to the - # loop order - if prefix == self.program.get_loop_order().get_ranks()[ - :len(prefix)]: - self.stationary.add(name) + if temp_tensor.get_ranks() == format_ranks: + self.format_options[tensor].append(format_) - def __check_configuration(self) -> None: + def __build_traffic_paths(self) -> None: """ - There are many mappings that we cannot model right now. Make sure this - is a legal configuration + Build a dictionary of used loop formats: + Dict[tensor, Tuple[format, Dict[rank, Tuple[coord_path, payload_path, elem_path]]]] """ - # Check that there is no dynamic partitioning - if self.program.get_partitioning().get_dyn_parts() != set(): - raise NotImplementedError + self.traffic_paths: Dict[str, + Tuple[str, + Dict[str, + Tuple[List[Tuple[MemoryComponent, str]], + List[Tuple[MemoryComponent, str]], + List[Tuple[MemoryComponent, str]]]]]] = {} + for tensor_ir in self.program.get_equation().get_tensors(): + tensor = tensor_ir.root_name() + spec = self.format.get_spec(tensor) + + # Build the set of specs to collect + einsum = self.program.get_equation().get_output().root_name() + + for format_ in self.format_options[tensor]: + for rank in spec[format_]: + if rank == "rank-order": + continue + + coord_path = self.hardware.get_traffic_path( + tensor, rank, "coord", format_) + payload_path = self.hardware.get_traffic_path( + tensor, rank, "payload", format_) + elem_path = self.hardware.get_traffic_path( + tensor, rank, "elem", format_) + + if tensor in self.traffic_paths and self.traffic_paths[ + tensor][0] != format_: + raise ValueError("Multiple potential formats " + + str({self.traffic_paths[tensor][0], format_}) + + " for tensor " + + tensor + + " in Einsum " + + einsum) + + if tensor not in self.traffic_paths: + self.traffic_paths[tensor] = (format_, {}) + + self.traffic_paths[tensor][1][rank] = ( + coord_path, payload_path, elem_path) + + def __expand_eager(self): + """ + Expand all eager bindings + """ + einsum = self.program.get_equation().get_output().root_name() - # Check that there are at most three tensors (no danger of multiple - # intersections per rank) - if len(self.program.get_equation().get_tensors()) > 3: - raise NotImplementedError + self.eager_write = False + for tensor_ir in self.program.get_equation().get_tensors(): + tensor = tensor_ir.root_name() + spec = self.format.get_spec(tensor) + + for format_ in self.format_options[tensor]: + types = [] + for rank in spec[format_]["rank-order"]: + types.append([]) + if "layout" in spec[format_][rank] and \ + spec[format_][rank]["layout"] == "interleaved": + types[-1].append("elem") + continue + + if "cbits" in spec[format_][rank] and \ + spec[format_][rank]["cbits"] > 0: + types[-1].append("coord") + + if "pbits" in spec[format_][rank] and \ + spec[format_][rank]["pbits"] > 0: + types[-1].append("payload") + + for component in self.hardware.get_components( + einsum, BuffetComponent): + + if tensor_ir.get_is_output(): + for binding in component.get_bindings()[einsum]: + if binding["style"] == "eager": + self.eager_write = True + + component.expand_eager( + einsum, tensor, format_, spec[format_]["rank-order"], types) diff --git a/teaal/ir/partitioning.py b/teaal/ir/partitioning.py index c091d0b..1033b2c 100644 --- a/teaal/ir/partitioning.py +++ b/teaal/ir/partitioning.py @@ -94,6 +94,8 @@ def get_all_parts(self) -> Set[Tuple[str, ...]]: def get_available(self, rank: str) -> Set[str]: """ Get the tensor ranks that may be available with this rank + + TODO: Cache this information """ avail: Set[str] = set() avail.add(rank) @@ -146,7 +148,7 @@ def get_dyn_parts(self) -> Set[Tuple[str, ...]]: """ return self.dyn_parts - def get_final_rank_id(self, tensor: Tensor, rank: str) -> str: + def get_final_rank_id(self, init_ranks: Iterable[str], rank: str) -> str: """ Get the name of this rank in the final loop order """ @@ -165,7 +167,7 @@ def get_final_rank_id(self, tensor: Tensor, rank: str) -> str: # If all flattened ranks do not appear in the tensor, the final # rank ID is the bottom flattened rank for rank in node.get_ranks(): - if self.get_root_name(rank) not in tensor.get_init_ranks(): + if self.get_root_name(rank) not in init_ranks: comp = min else: node = comp( diff --git a/teaal/ir/program.py b/teaal/ir/program.py index 1dca8b4..f04216e 100644 --- a/teaal/ir/program.py +++ b/teaal/ir/program.py @@ -71,6 +71,12 @@ def __init__(self, einsum: Einsum, mapping: Mapping) -> None: self.tensors[tensor.root_name()] = tensor + # Get all einsums + self.einsums = [] + for expr in self.einsum.get_expressions(): + self.einsums.append( + str(next(expr.find_data("output")).children[0])) + self.einsum_ind: Optional[int] = None self.equation: Optional[Equation] = None self.es_tensors: List[Tensor] = [] @@ -175,6 +181,12 @@ def apply_partition_swizzling(self, tensor: Tensor) -> None: tensor.get_ranks()) tensor.update_ranks(new_ranks) + def get_all_einsums(self) -> List[str]: + """ + Get a list of all of the Einsums (as specified by their output tensor) + """ + return self.einsums + def get_equation(self) -> Equation: """ Get the parse tree representation of the einsum diff --git a/teaal/ir/spacetime.py b/teaal/ir/spacetime.py index acacade..374807e 100644 --- a/teaal/ir/spacetime.py +++ b/teaal/ir/spacetime.py @@ -124,7 +124,8 @@ def get_style(self, rank: str) -> str: """ Get the style of display for the given rank """ - return self.styles[rank] + final = self.partitioning.get_final_rank_id([rank], rank) + return self.styles[final] def get_time(self) -> List[str]: """ diff --git a/teaal/ir/tensor.py b/teaal/ir/tensor.py index 7e30536..f59c7ff 100644 --- a/teaal/ir/tensor.py +++ b/teaal/ir/tensor.py @@ -120,6 +120,12 @@ def peek(self) -> Optional[str]: return self.__get_rank() return None + def peek_clean(self) -> str: + """ + Peek at the top rank; should only be called if there is a rank to look at + """ + return self.ranks[self.iter_ptr] + def peek_rest(self) -> List[str]: """ Return the list of ranks that have not yet been iterated over for this diff --git a/teaal/parse/arch.py b/teaal/parse/arch.py index 2dad9c0..aedbd75 100644 --- a/teaal/parse/arch.py +++ b/teaal/parse/arch.py @@ -52,57 +52,60 @@ def __init__(self, yaml: Optional[dict]) -> None: if not isinstance(self.yaml["architecture"], dict): raise ValueError("Bad architecture spec: " + str(self.yaml)) - subtrees = self.yaml["architecture"]["subtree"].copy() + subtrees = {} + for config in self.yaml["architecture"]: + subtrees[config] = self.yaml["architecture"][config].copy() - while subtrees: - tree = subtrees.pop() + for config in subtrees: + while subtrees[config]: + tree = subtrees[config].pop() - if "name" not in tree.keys(): - raise ValueError("Unnamed subtree: " + repr(tree)) + if "name" not in tree.keys(): + raise ValueError("Unnamed subtree: " + repr(tree)) - name_tree = LevelParser.parse(tree["name"]) + name_tree = LevelParser.parse(tree["name"]) - if name_tree.data == "single": - tree["name"] = str(name_tree.children[0]) - tree["num"] = 1 + if name_tree.data == "single": + tree["name"] = str(name_tree.children[0]) + tree["num"] = 1 - elif name_tree.data == "multiple": - tree["name"] = str(name_tree.children[0]) + elif name_tree.data == "multiple": + tree["name"] = str(name_tree.children[0]) - num = name_tree.children[1] - if isinstance(num, Tree): - # This error should be caught by the LevelParser - raise ValueError( - "Unknown num: " + repr(num)) # pragma: no cover + num = name_tree.children[1] + if isinstance(num, Tree): + # This error should be caught by the LevelParser + raise ValueError( + "Unknown num: " + repr(num)) # pragma: no cover - tree["num"] = int(num) + 1 + tree["num"] = int(num) + 1 - else: - # This error should be caught by the LevelParser - raise ValueError( - "Unknown level name: " + - repr(name_tree)) # pragma: no cover + else: + # This error should be caught by the LevelParser + raise ValueError( + "Unknown level name: " + + repr(name_tree)) # pragma: no cover - if "attributes" not in tree.keys(): - tree["attributes"] = {} + if "attributes" not in tree.keys(): + tree["attributes"] = {} - if "local" not in tree.keys(): - tree["local"] = [] + if "local" not in tree.keys(): + tree["local"] = [] - for local in tree["local"]: - if "name" not in local.keys(): - raise ValueError("Unnamed local: " + repr(local)) + for local in tree["local"]: + if "name" not in local.keys(): + raise ValueError("Unnamed local: " + repr(local)) - if "class" not in local.keys(): - raise ValueError("Unclassed local: " + repr(local)) + if "class" not in local.keys(): + raise ValueError("Unclassed local: " + repr(local)) - if "attributes" not in local.keys(): - local["attributes"] = {} + if "attributes" not in local.keys(): + local["attributes"] = {} - if "subtree" not in tree.keys(): - tree["subtree"] = [] + if "subtree" not in tree.keys(): + tree["subtree"] = [] - subtrees.extend(tree["subtree"]) + subtrees[config].extend(tree["subtree"]) @classmethod def from_file(cls, filename: str) -> "Architecture": diff --git a/teaal/parse/bindings.py b/teaal/parse/bindings.py index 3fb33b4..ece3406 100644 --- a/teaal/parse/bindings.py +++ b/teaal/parse/bindings.py @@ -24,7 +24,7 @@ Parse the input YAML for the bindings """ -from typing import List, Optional +from typing import Dict, List, Optional from teaal.parse.yaml import YamlParser @@ -39,12 +39,30 @@ def __init__(self, yaml: Optional[dict]) -> None: Read the YAML input """ - self.components = {} + self.components: Dict[str, Dict[str, List[dict]]] = {} + self.configs = {} + self.prefixes = {} if yaml is None or "bindings" not in yaml.keys(): return - for binding in yaml["bindings"]: - self.components[binding["name"]] = binding["bindings"] + for einsum in yaml["bindings"]: + self.components[einsum] = {} + + configured = False + for binding in yaml["bindings"][einsum]: + if "config" in binding: + self.configs[einsum] = binding["config"] + self.prefixes[einsum] = binding["prefix"] + + configured = True + + else: + self.components[einsum][binding["component"] + ] = binding["bindings"] + + if not configured: + raise ValueError( + "Accelerator config and prefix missing for Einsum " + einsum) @classmethod def from_file(cls, filename: str) -> "Bindings": @@ -60,11 +78,32 @@ def from_str(cls, string: str) -> "Bindings": """ return cls(YamlParser.parse_str(string)) - def get(self, name) -> List[dict]: + def get_component(self, name: str) -> Dict[str, List[dict]]: """ Get the binding information for a component """ - if name not in self.components.keys(): - return [] + info = {} + + for einsum in self.components: + if name in self.components[einsum].keys(): + info[einsum] = self.components[einsum][name] + + return info + + def get_bindings(self) -> Dict[str, Dict[str, List[dict]]]: + """ + Get the binding information for all components + """ + return self.components - return self.components[name] + def get_config(self, einsum: str) -> str: + """ + Get the hardware configuration for a given Einsum + """ + return self.configs[einsum] + + def get_prefix(self, einsum: str) -> str: + """ + Get the metrics prefix for the given Einsum + """ + return self.prefixes[einsum] diff --git a/teaal/parse/format.py b/teaal/parse/format.py index 7f3ad90..8cb2528 100644 --- a/teaal/parse/format.py +++ b/teaal/parse/format.py @@ -45,6 +45,15 @@ def __init__(self, yaml: Optional[dict]) -> None: self.yaml = yaml["format"] + for tensor, formats in self.yaml.items(): + for format_, spec in formats.items(): + if "rank-order" not in spec.keys(): + raise ValueError( + "Rank order not specified for tensor " + + tensor + + " in format " + + format_) + @classmethod def from_file(cls, filename: str) -> "Format": """ @@ -64,6 +73,6 @@ def get_spec(self, tensor: str) -> dict: Get the specification for a particular tensor """ if tensor not in self.yaml.keys(): - raise ValueError("Format unspecified for tensor " + tensor) + return {} return self.yaml[tensor] diff --git a/teaal/trans/collector.py b/teaal/trans/collector.py index 00f2ee5..6202c98 100644 --- a/teaal/trans/collector.py +++ b/teaal/trans/collector.py @@ -26,6 +26,7 @@ from teaal.hifiber import * from teaal.ir.component import * +from teaal.ir.fusion import Fusion from teaal.ir.metrics import Metrics from teaal.ir.program import Program from teaal.ir.tensor import Tensor @@ -37,12 +38,60 @@ class Collector: Translate the metrics collection """ - def __init__(self, program: Program, metrics: Metrics) -> None: + def __init__( + self, + program: Program, + metrics: Metrics, + fusion: Fusion) -> None: """ Construct a collector object """ self.program = program self.metrics = metrics + self.fusion = fusion + + # tree_traces: Optional[Dict[rank, Dict[is_read, Set[tensor]]]] + self.tree_traces: Optional[Dict[str, Dict[bool, Set[str]]]] = None + + def create_component(self, component: Component, rank: str) -> Statement: + """ + Create a component to track metrics + """ + name = component.get_name() + if isinstance(component, LeaderFollowerComponent): + constructor = "LeaderFollowerIntersector" + elif isinstance(component, SkipAheadComponent): + constructor = "SkipAheadIntersector" + elif isinstance(component, TwoFingerComponent): + constructor = "TwoFingerIntersector" + else: + raise ValueError( + "Unable to create consumable metrics component for " + + name + " of type " + type(component).__name__) + + return SAssign(AVar(name + "_" + rank), EFunc(constructor, [])) + + def consume_traces(self, component: str, rank: str) -> Statement: + """ + Consume the traces to track this component + """ + component_ir = self.metrics.get_hardware().get_component(component) + + if isinstance(component_ir, IntersectorComponent): + tracker_name = EVar(component + "_" + rank) + traces = self.metrics.get_coiter_traces(component, rank) + consume_args = [[AJust(EString(rank)), + AJust(EString(trace))] for trace in traces] + args = [AJust(EMethod(EVar("Metrics"), "consumeTrace", arg)) + for arg in consume_args] + return SExpr(EMethod(tracker_name, "addTraces", args)) + + else: + raise ValueError( + "Unable to consume traces for component " + + component + + " of type " + + type(component_ir).__name__) def dump(self) -> Statement: """ @@ -54,33 +103,35 @@ def dump(self) -> Statement: if self.program.get_einsum_ind() == 0: block.add(SAssign(AVar("metrics"), EDict({}))) - einsum = EString(self.program.get_equation().get_output().root_name()) - block.add(SAssign(AAccess(EVar("metrics"), einsum), EDict({}))) + einsum = self.program.get_equation().get_output().root_name() + block.add( + SAssign( + AAccess( + EVar("metrics"), EString(einsum)), EDict( + {}))) - # Add the memory traffic information - for tensor in self.program.get_equation().get_tensors(): - # First revert the output to its loop nest form - if tensor.get_is_output(): - tensor.reset() - tensor.set_is_output(True) - self.program.apply_all_partitioning(tensor) - self.program.get_loop_order().apply(tensor) + # Create the formats + block.add(self.__build_formats()) + + # Track the traffic + block.add(self.__build_traffic()) + + # Track the merges + block.add(self.__build_merges()) - # Add the memory traffic information - block.add(self.__mem_metrics(tensor)) + # Track the compute + block.add(self.__build_compute()) - # Fix the output tensor - if tensor.get_is_output(): - tensor.reset() - tensor.set_is_output(True) + # Track the intersections + block.add(self.__build_intersections()) - # Add the compute information - for compute in self.metrics.get_compute_components(): - block.add(self.__compute_metrics(compute)) + # Track the sequences + block.add(self.__build_sequencers()) - # Add the merger information - for merge, name in self.metrics.get_merger_components(): - block.add(self.__merger_metrics(merge, name)) + # Add the final execution time modeling + num_einsums = len(self.program.get_all_einsums()) + if self.program.get_einsum_ind() + 1 == num_einsums: + block.add(self.__build_time()) return block @@ -91,195 +142,990 @@ def end() -> Statement: """ return SExpr(EMethod(EVar("Metrics"), "endCollect", [])) - def set_collecting(self, tensor_name: str, rank: str) -> Statement: + def make_body(self) -> Statement: + """ + Make the body of the loop + """ + return self.__make_iter_num("body") + + def make_loop_footer(self, rank: str) -> Statement: + """ + Make a footer for the loop + """ + block = SBlock([]) + + if self.tree_traces is None: + raise ValueError( + "Unconfigured collector. Make sure to first call start()") + + # Collect the iteration number if necessary + block.add(self.__make_iter_num(rank)) + + # Consume a trace if necessary + coiter = self.metrics.get_coiter(rank) + if coiter is not None: + block.add(self.consume_traces(coiter.get_name(), rank)) + + # Eagerly store subtrees as necessary + for tensor in self.tree_traces[rank][False]: + block.add(self.trace_tree(tensor, rank, False)) + + return block + + def make_loop_header(self, rank: str) -> Statement: + """ + Make a header for a loop + """ + block = SBlock([]) + + if self.tree_traces is None: + raise ValueError( + "Unconfigured collector. Make sure to first call start()") + + loop_ranks = ["root"] + self.program.get_loop_order().get_ranks() + i = loop_ranks.index(rank) + + # Save the set of subtrees already eagerly loaded + eager_evicts = self.metrics.get_eager_evicts(loop_ranks[i - 1]) + for tensor, root in eager_evicts: + tracker = "eager_" + tensor.lower() + "_" + root.lower() + "_read" + block.add(SAssign(AVar(tracker), EFunc("set", ()))) + + # Eagerly load new subtrees as necessary + for tensor in self.tree_traces[rank][True]: + block.add(self.trace_tree(tensor, rank, True)) + + return block + + def register_ranks(self) -> Statement: + """ + Register the given ranks + """ + block = SBlock([]) + for rank in self.program.get_loop_order().get_ranks(): + block.add( + SExpr( + EMethod( + EVar("Metrics"), "registerRank", [ + AJust( + EString(rank))]))) + + return block + + def set_collecting( + self, + tensor: Optional[str], + rank: str, + type_: str, + consumable: bool, + is_read_trace: bool) -> Statement: """ Collect the statistics about a tensor """ - tensor = self.program.get_equation().get_tensor(tensor_name) - args = [AJust(EString(rank)), AJust(EBool(True))] - call = EMethod(EVar(tensor.tensor_name()), "setCollecting", args) + block = SBlock([]) + if tensor is None: + if type_ != "iter": + raise ValueError( + "Tensor must be specified for trace type " + type_) + trace = "iter" + + elif type_ == "fiber": + trace = self.metrics.get_fiber_trace(tensor, rank, is_read_trace) - return SExpr(call) + # Type is an eager rank + else: + trace = "eager_" + tensor.lower() + "_" + type_.lower() + if is_read_trace: + trace += "_read" + else: + trace += "_write" + + # We want to collect the iteration number for the last loop + # rank + output = self.program.get_equation().get_tensor(tensor) + final_tensor = Tensor( + output.root_name(), output.get_init_ranks()) + self.program.apply_all_partitioning(final_tensor) + self.program.get_loop_order().apply(final_tensor) + + iter_var = final_tensor.get_ranks()[-1].lower() + "_iter_num" + # TODO: Add a separate None type + block.add(SAssign(AVar(iter_var), EVar("None"))) + + args: List[Argument] = [ + AJust( + EString(rank)), AParam( + "type_", EString(trace)), AParam( + "consumable", EBool(consumable))] + + block.add(SExpr(EMethod(EVar("Metrics"), "trace", args))) + return block def start(self) -> Statement: """ Start metrics collection """ - loop_order = self.program.get_loop_order() - order = [EString(rank) for rank in loop_order.get_ranks()] - call = EMethod(EVar("Metrics"), "beginCollect", [AJust(EList(order))]) + block = SBlock([]) + + einsum = self.program.get_equation().get_output().root_name() + prefix = EString(self.metrics.get_hardware().get_prefix(einsum)) + call = EMethod(EVar("Metrics"), "beginCollect", [AJust(prefix)]) + + block.add(SExpr(call)) + + block.add(self.__build_components()) + + block.add(self.__build_match_ranks()) + + stmt, register = self.__build_trace_ranks() + block.add(stmt) + + if register: + block.add(self.register_ranks()) + + return block + + def trace_tree( + self, + tensor: str, + rank: str, + is_read_trace: bool) -> Statement: + """ + Trace a subtree under the fiber specified + """ + fiber = tensor.lower() + "_" + rank.lower() + + trace = "eager_" + fiber + if is_read_trace: + trace += "_read" + else: + trace += "_write" + + args: List[Argument] = [AJust(EString(trace))] + if not is_read_trace: + # We want to use the iteration number for the last loop rank + output = self.program.get_equation().get_tensor(tensor) + final_tensor = Tensor(output.root_name(), output.get_init_ranks()) + self.program.apply_all_partitioning(final_tensor) + self.program.get_loop_order().apply(final_tensor) + + iter_var = final_tensor.get_ranks()[-1].lower() + "_iter_num" + args.append(AParam("iteration_num", EVar(iter_var))) + + trace_stmt = SExpr(EMethod(EVar(fiber), "trace", args)) + if not is_read_trace: + return trace_stmt - return SExpr(call) + # If read, only read the first time + loop_ranks = self.program.get_loop_order().get_ranks() + tensor_ir = self.program.get_equation().get_tensor(tensor) - def __compute_metrics(self, component: ComputeComponent) -> Statement: + get_final = self.program.get_partitioning().get_final_rank_id + evict_rank = self.metrics.get_eager_evict_on(tensor, rank)[-1] + er_ind = loop_ranks.index(get_final([evict_rank], evict_rank)) + tree_ind = loop_ranks.index(get_final([rank], rank)) + + key = [] + for loop_rank in loop_ranks[er_ind + 1:tree_ind]: + if loop_rank in tensor_ir.get_ranks(): + key.append(EVar(loop_rank.lower())) + key_tuple = ETuple(tuple(key)) + + cond = EBinOp(key_tuple, ONotIn(), EVar(trace)) + add_key = SExpr(EMethod(EVar(trace), "add", [AJust(key_tuple)])) + return SIf((cond, SBlock([add_key, trace_stmt])), [], None) + + def __add_collection(self, + trace: Tuple[Optional[str], + str, + str, + bool, + bool], + traces: Set[Tuple[Optional[str], + str, + str, + bool, + bool]]) -> Statement: """ - Get the compute metrics for this hardware + Add a collection and update the set of traces + """ + if trace not in traces: + traces.add(trace) + return self.set_collecting(*trace) + + return SBlock([]) + + def __get_trace(self, binding: dict, + is_read: bool) -> Tuple[str, Statement]: + """ + Get the (trace, HiFiber to produce the trace) """ einsum = self.program.get_equation().get_output().root_name() - metrics = EAccess(EVar("metrics"), EString(einsum)) + prefix = self.metrics.get_hardware().get_prefix(einsum) + \ + "-" + binding["rank"] + "-" + block = SBlock([]) + if "style" in binding and binding["style"] == "eager": + trace_fn = prefix + "eager_" + \ + binding["tensor"].lower() + "_" + binding["root"].lower() + if is_read: + trace_fn += "_read" + else: + trace_fn += "_write" + trace_fn += ".csv" - for binding in component.get_bindings(einsum): - if isinstance(component, LeaderFollowerComponent): - rank = binding["rank"] - leader = self.__get_leader(rank, binding["leader"]) + # Otherwise binding is lazy + else: + fiber_trace = self.metrics.get_fiber_trace( + binding["tensor"], binding["rank"], is_read) - args = [] - args.append(AJust(EMethod(EVar("Metrics"), "dump", []))) - args.append(AJust(EString(rank))) - args.append(AJust(EInt(leader))) + if binding["type"] == "payload" and fiber_trace != "iter" and \ + fiber_trace[:11] != "get_payload": + input_fn = prefix + fiber_trace + ".csv" + filter_fn = prefix + "iter.csv" + trace_fn = prefix + fiber_trace + "_payload.csv" - access = AAccess(metrics, EString(rank + " intersections")) - count = EMethod(EVar("Compute"), "lfCount", args) - block.add(SAssign(access, count)) + args = [AJust(EString(fn)) + for fn in [input_fn, filter_fn, trace_fn]] + block.add(SExpr(EMethod(EVar("Traffic"), "filterTrace", args))) - elif isinstance(component, SkipAheadComponent): - rank = binding["rank"] + else: + trace_fn = prefix + fiber_trace + ".csv" - args = [] - args.append(AJust(EMethod(EVar("Metrics"), "dump", []))) - args.append(AJust(EString(rank))) + return trace_fn, block + + def __build_components(self) -> Statement: + """ + Build the creation of any necessary hardware components + """ + block = SBlock([]) + einsum = self.program.get_equation().get_output().root_name() - access = AAccess(metrics, EString(rank + " intersections")) - count = EMethod(EVar("Compute"), "skipCount", args) - block.add(SAssign(access, count)) + for component in self.metrics.get_hardware().get_components(einsum, + IntersectorComponent): + name = component.get_name() - else: + for binding in component.get_bindings()[einsum]: + block.add(self.create_component(component, binding["rank"])) + + return block + + def __build_compute(self) -> Statement: + """ + Add the code to count compute operations + """ + block = SBlock([]) + einsum = self.program.get_equation().get_output().root_name() + + metrics_einsum = EAccess(EVar("metrics"), EString(einsum)) + metrics_dump = EAccess( + EMethod( + EVar("Metrics"), + "dump", + []), + EString("Compute")) + for fu in self.metrics.get_hardware().get_components(einsum, ComputeComponent): + block.add( + SAssign( + AAccess( + metrics_einsum, EString( + fu.get_name())), EDict( + {}))) + + metrics_fu = EAccess(metrics_einsum, EString(fu.get_name())) + ops = [] + for binding in fu.get_bindings()[einsum]: op = binding["op"] + ops.append(EString(op)) + block.add( + SAssign( + AAccess(metrics_fu, EString(op)), + EAccess(metrics_dump, EString("payload_" + op)))) - args = [] - args.append(AJust(EMethod(EVar("Metrics"), "dump", []))) - args.append(AJust(EString(op))) + # TODO: Handle multi-op functional units + assert len(ops) == 1 - access = AAccess(metrics, EString(op)) - count = EMethod(EVar("Compute"), "opCount", args) - block.add(SAssign(access, count)) + # op_freq = cycles / s * ops / cycle + op_freq = self.metrics.get_hardware().get_frequency(einsum) * \ + fu.get_num_instances() + time = EBinOp(EAccess(metrics_fu, ops[0]), ODiv(), EInt(op_freq)) + + metrics_time = AAccess(metrics_fu, EString("time")) + block.add(SAssign(metrics_time, time)) + self.fusion.add_component(einsum, fu.get_name()) return block - def __get_leader(self, rank: str, leader: str) -> int: + def __build_formats(self) -> Statement: """ - Get the index of the leader + Add the code to build the formats dictionary """ - i = 0 - for tensor in self.program.get_equation().get_tensors(): - if tensor.get_is_output(): + formats_dict: Dict[Expression, Expression] = {} + part_ir = self.program.get_partitioning() + for tensor, format_ in self.metrics.get_loop_formats().items(): + loop_format = self.metrics.get_format().get_spec(tensor)[format_] + rank_order = loop_format["rank-order"] + + # If there is dynamic partitioning applied we cannot use the + # existing tensor + build_new = False + + # TODO: This should be in teaal.ir.partitioning + tensor_ir = self.program.get_equation().get_tensor(tensor) + old_ranks: List[str] = [] + new_ranks = tensor_ir.get_init_ranks() + while old_ranks != new_ranks: + old_ranks = new_ranks + new_ranks = part_ir.partition_ranks( + new_ranks, part_ir.get_static_parts(), False, True) + + for static_rank in new_ranks: + if (static_rank,) in part_ir.get_dyn_parts(): + build_new = True + break + + if part_ir.is_flattened(static_rank): + build_new = True + break + + tensor_expr: Expression + if build_new: + rank_ids = TransUtils.build_expr(rank_order) + + shape: List[Expression] = [] + for rank in rank_order: + if not part_ir.is_flattened(rank): + shape.append(EVar(part_ir.get_root_name(rank))) + continue + + unpacked = part_ir.unpack(rank) + roots = [part_ir.get_root_name(src) for src in unpacked] + rank_shape: Expression = EVar(roots[0]) + for root in roots[1:]: + rank_shape = EBinOp(rank_shape, OMul(), EVar(root)) + shape.append(rank_shape) + + args = [ + AParam( + "rank_ids", rank_ids), AParam( + "shape", EList(shape))] + tensor_expr = EFunc("Tensor", args) + + else: + tensor_expr = EVar( + tensor + "_" + "".join(rank_order)) + + format_yaml = TransUtils.build_expr(loop_format) + + formats_dict[EString(tensor)] = EFunc( + "Format", [AJust(tensor_expr), AJust(format_yaml)]) + + return SAssign(AVar("formats"), EDict(formats_dict)) + + def __build_intersections(self) -> Statement: + """ + Add the code to compute the intersection operations + """ + block = SBlock([]) + einsum = self.program.get_equation().get_output().root_name() + + metrics_einsum = EAccess(EVar("metrics"), EString(einsum)) + for intersector in self.metrics.get_hardware().get_components(einsum, + IntersectorComponent): + isect_name = intersector.get_name() + metrics_isect = AAccess(metrics_einsum, EString(isect_name)) + block.add(SAssign(metrics_isect, EInt(0))) + + for binding in intersector.get_bindings()[einsum]: + isects = EMethod( + EVar( + isect_name + + "_" + + binding["rank"]), + "getNumIntersects", + []) + block.add(SIAssign(metrics_isect, OAdd(), isects)) + + # op_freq = cycles / s * ops / cycle + op_freq = self.metrics.get_hardware().get_frequency(einsum) * \ + intersector.get_num_instances() + metrics_isect_expr = EAccess(metrics_einsum, EString(isect_name)) + time = EBinOp(metrics_isect_expr, ODiv(), EInt(op_freq)) + + metrics_time = AAccess(metrics_isect_expr, EString("time")) + block.add(SAssign(metrics_time, time)) + self.fusion.add_component(einsum, intersector.get_name()) + + return block + + def __build_match_ranks(self) -> Statement: + """ + Add the code to match ranks, e.g., if we have flattening + """ + block = SBlock([]) + + part_ir = self.program.get_partitioning() + for rank in self.program.get_loop_order().get_ranks(): + if not part_ir.is_flattened(rank): continue - # TODO: Cover this when we allow more than two tensors - # See test test_dump_leader_follower_not_intersected - if rank not in tensor.get_ranks(): - continue # pragma: no cover + unpacked = part_ir.unpack(rank) + roots = [] + for unpack_rank in unpacked: + if part_ir.get_final_rank_id( + [unpack_rank], unpack_rank) == rank: + args = [AJust(EString(rank)), AJust(EString(unpack_rank))] + block.add( + SExpr( + EMethod( + EVar("Metrics"), + "matchRanks", + args))) - if tensor.root_name() == leader: - return i + roots.append(EVar(part_ir.get_root_name(unpack_rank))) - i += 1 + args = [AJust(EString(rank)), AJust(ETuple(tuple(roots)))] + block.add(SExpr(EMethod(EVar("Metrics"), "associateShape", args))) - raise ValueError("Tensor " + leader + " has no rank " + rank) + return block - def __mem_metrics(self, tensor: Tensor) -> Statement: + def __build_merges(self) -> Statement: """ - Get the memory metrics for a given tensor + Add the code to compute the merge operations """ block = SBlock([]) + einsum = self.program.get_equation().get_output().root_name() - # Dictionary accesses - einsum = EString(self.program.get_equation().get_output().root_name()) - metrics = EAccess(EVar("metrics"), einsum) - fp_access = (metrics, EString(tensor.root_name() + " footprint")) - tf_access = (metrics, EString(tensor.root_name() + " traffic")) - - # No memory traffic if the tensor is not stored in DRAM - if not self.metrics.in_dram(tensor): - block.add(SAssign(AAccess(*fp_access), EInt(0))) - block.add(SAssign(AAccess(*tf_access), EInt(0))) - return block - - # Make a format for this tensor - name = tensor.tensor_name() - spec = TransUtils.build_expr(self.metrics.get_format(tensor)) - constr = EFunc("Format", [AJust(EVar(name)), AJust(spec)]) - format_ = name + "_format" - block.add(SAssign(AVar(format_), constr)) - - # Compute its memory footprint - footprint = EMethod(EVar(format_), "getTensor", []) - block.add(SAssign(AAccess(*fp_access), footprint)) - - # If it is stationary, its footprint is its traffic, else compue - # the traffic - if self.metrics.on_chip_stationary(tensor): - block.add(SAssign(AAccess(*tf_access), EAccess(*fp_access))) + metrics_einsum = EAccess(EVar("metrics"), EString(einsum)) + for merger in self.metrics.get_hardware().get_components(einsum, MergerComponent): + merger_name = merger.get_name() + block.add( + SAssign( + AAccess( + metrics_einsum, EString(merger_name)), EDict( + {}))) + metrics_merger = EAccess(metrics_einsum, EString(merger_name)) + tensors = [] + for binding in merger.get_bindings()[einsum]: + init_ranks = binding["init-ranks"] + final_ranks = binding["final-ranks"] - else: - # First compute the traffic from loading the buffered subtrees - traffic = self.__mem_traffic(tensor) + input_ = binding["tensor"] + "_" + "".join(init_ranks) + tensor_name = EVar(input_) + tensors.append(tensor_name) + + # TODO: Way more complicated merges are possible than a single + # swap + depth = EInt([i == f for i, f in zip( + init_ranks, final_ranks)].index(False)) - # TODO: Make this more realistic - # We assume that the other ranks are secretly buffered - # somewhere else - buffer_rank = self.metrics.get_on_chip_rank(tensor) - prefix = tensor.get_prefix(buffer_rank) + # TODO: Need to first update the HiFiber to use new merge + # hardware spec + radix = TransUtils.build_expr(merger.get_comparator_radix()) + next_latency: Expression + if merger.get_inputs() < float("inf"): + next_latency = EInt(1) + else: + next_latency = EString("N") - for rank in prefix: - arg = AJust(EString(rank)) - rank_fp = EMethod(EVar(format_), "getRank", [arg]) + args = [ + AJust(expr) for expr in [ + tensor_name, + depth, + radix, + next_latency]] + swaps_call = EMethod(EVar("Compute"), "numSwaps", args) + block.add( + SAssign( + AAccess( + metrics_merger, + EString(input_)), + swaps_call)) - traffic = EBinOp(traffic, OAdd(), rank_fp) + # Compute the time required + # TODO: Support more than one tensor per merger + assert len(tensors) == 1 - block.add(SAssign(AAccess(*tf_access), traffic)) + # op_freq = cycles / s * ops / cycle + op_freq = self.metrics.get_hardware().get_frequency(einsum) * \ + merger.get_num_instances() + time = EBinOp( + EAccess( + metrics_merger, + tensors[0]), + ODiv(), + EInt(op_freq)) + + metrics_time = AAccess(metrics_merger, EString("time")) + block.add(SAssign(metrics_time, time)) + self.fusion.add_component(einsum, merger.get_name()) return block - def __mem_traffic(self, tensor: Tensor) -> Expression: + def __build_sequencers(self) -> Statement: """ - Get the expression for computing the memory traffic for this tensor + Add a block to track the sequencers """ - buffer_ = self.metrics.get_on_chip_buffer(tensor) + block = SBlock([]) - if isinstance(buffer_, BuffetComponent): - args = [] - args.append(AJust(EVar(tensor.tensor_name()))) - args.append(AJust(EString(self.metrics.get_on_chip_rank(tensor)))) - args.append(AJust(EVar(tensor.tensor_name() + "_format"))) + einsum = self.program.get_equation().get_output().root_name() + metrics_einsum = EAccess(EVar("metrics"), EString(einsum)) - return EMethod(EVar("Traffic"), "buffetTraffic", args) + for seq in self.metrics.get_hardware().get_components(einsum, SequencerComponent): + seq_assn = AAccess(metrics_einsum, EString(seq.get_name())) + block.add(SAssign(seq_assn, EDict({}))) + seq_expr = EAccess(metrics_einsum, EString(seq.get_name())) - elif isinstance(buffer_, CacheComponent): - capacity = buffer_.get_depth() * buffer_.get_width() + ranks = [] + for rank in seq.get_ranks(einsum): + ranks.append(rank) + trace = self.metrics.get_hardware().get_prefix(einsum) + \ + "-" + rank + "-iter.csv" + num_iters = EMethod( + EVar("Compute"), "numIters", [ + AJust( + EString(trace))]) + seq_rank = AAccess(seq_expr, EString(rank)) + block.add(SAssign(seq_rank, num_iters)) - args = [] - args.append(AJust(EVar(tensor.tensor_name()))) - args.append(AJust(EString(self.metrics.get_on_chip_rank(tensor)))) - args.append(AJust(EVar(tensor.tensor_name() + "_format"))) - args.append(AJust(EInt(capacity))) + # Compute time + steps: Optional[Expression] = None + for rank in ranks: + new_steps = EAccess(seq_expr, EString(rank)) + if steps: + steps = EBinOp(steps, OAdd(), new_steps) + else: + steps = new_steps - return EMethod(EVar("Traffic"), "cacheTraffic", args) + assert steps is not None - else: - # This error should be caught by the Hardware constructor - raise ValueError( - "Unknown MemoryComponent " + - repr(buffer_)) # pragma: no cover + op_freq = self.metrics.get_hardware().get_frequency(einsum) * \ + seq.get_num_instances() + time = EBinOp(EParens(steps), ODiv(), EInt(op_freq)) - def __merger_metrics( - self, - component: MergerComponent, - binding: dict) -> Statement: + metrics_time = AAccess(seq_expr, EString("time")) + block.add(SAssign(metrics_time, time)) + self.fusion.add_component(einsum, seq.get_name()) + + return block + + def __build_time(self) -> Statement: + """ + Add the code necessary to compute the final execution time + """ + sblock = SBlock([]) + + # Save the Einsum blocks + metrics = EVar("metrics") + blocks = TransUtils.build_expr(self.fusion.get_blocks()) + sblock.add(SAssign(AAccess(metrics, EString("blocks")), blocks)) + + # Compute the execution time + time: Optional[Expression] = None + for block in self.fusion.get_blocks(): + + # Collect up the statistics for the block + component_time: Dict[str, Expression] = {} + for einsum in block: + metrics_einsum = EAccess(metrics, EString(einsum)) + for comp in self.fusion.get_components(einsum): + new_time = EAccess( + EAccess( + metrics_einsum, + EString(comp)), + EString("time")) + + if comp in component_time: + component_time[comp] = EBinOp( + component_time[comp], OAdd(), new_time) + else: + component_time[comp] = new_time + + # Sort components to enable testing + comps = sorted(component_time.keys()) + + # Compute block time by taking the max + block_time: Expression + if len(comps) == 0: + block_time = EInt(0) + elif len(comps) == 1: + block_time = component_time[comp] + else: + comp_args = [AJust(component_time[comp]) for comp in comps] + block_time = EFunc("max", comp_args) + + # The execution time is the sum of all of the blocks + if time: + time = EBinOp(time, OAdd(), block_time) + else: + time = block_time + + assert time is not None + + sblock.add(SAssign(AAccess(metrics, EString("time")), time)) + + return sblock + + def __build_trace_ranks(self) -> Tuple[Statement, bool]: + """ + Add code to trace all necessary ranks + Returns (new code, need to register ranks explicitly) + + Note: explicit rank registration is necessary if we have eager loading + of fibers + """ + block = SBlock([]) + einsum = self.program.get_equation().get_output().root_name() + loop_order = self.program.get_loop_order().get_ranks() + + traces: Set[Tuple[Optional[str], str, str, bool, bool]] = set() + trace: Tuple[Optional[str], str, str, bool, bool] + + register = False + self.tree_traces = {rank: {True: set(), False: set()} + for rank in loop_order} + available = [(rank, self.program.get_partitioning().get_available(rank)) + for rank in reversed(loop_order)] + + for sequencer in self.metrics.get_hardware().get_components(einsum, + SequencerComponent): + for rank in sequencer.get_ranks(einsum): + trace = (None, rank, "iter", False, True) + block.add(self.__add_collection(trace, traces)) + + for tensor in self.program.get_equation().get_tensors(): + tensor_name = tensor.root_name() + + # Collect the necessary traces for each tensor + for rank, type_, consumable in self.metrics.get_collected_tensor_info( + tensor_name): + + # If we are collecting the loop's trace + if type_ == "iter": + trace = (None, rank, type_, consumable, True) + block.add(self.__add_collection(trace, traces)) + + # Otherwise, get the fiber's read (and maybe write) + else: + trace = (tensor_name, rank, type_, consumable, True) + block.add(self.__add_collection(trace, traces)) + + if tensor.get_is_output(): + trace = (tensor_name, rank, type_, consumable, False) + block.add(self.__add_collection(trace, traces)) + + # Type is fiber if lazy and root of the eager access if + # lazy + if type_ != "fiber": + + # Register the rank order explicitly + register = True + + # Eagerly load a subtree right before the given loop + loaded = False + for loop_rank, avail in available: + if type_ in avail: + self.tree_traces[loop_rank][True].add( + tensor_name) + loaded = True + break + assert loaded + + # Eagerly store a subtree right before we move onto the + # next subtree + if tensor.get_is_output(): + final_tensor = Tensor( + tensor.root_name(), tensor.get_init_ranks()) + self.program.apply_all_partitioning(final_tensor) + self.program.get_loop_order().apply(final_tensor) + + i = final_tensor.get_ranks().index(type_) + if i == 0: + store_rank = loop_order[0] + else: + one_above_rank = final_tensor.get_ranks()[ + i - 1] + + stored = False + for j, (loop_rank, avail) in enumerate( + available): + if one_above_rank in avail: + stored = True + break + assert stored + + # Unreversed index -> len(loop_order) - j - 1 + # Store rank is one below -> + 1 + store_rank = loop_order[len(loop_order) - j] + + # Trace the eager tree + self.tree_traces[store_rank][False].add( + tensor_name) + + return block, register + + def __build_traffic(self) -> Statement: """ - Get the merge metrics for this component + Add the code to compute traffic """ + block = SBlock([]) einsum = self.program.get_equation().get_output().root_name() - metrics = EAccess(EVar("metrics"), EString(einsum)) - name = EVar(binding["tensor"] + "_" + "".join(binding["init_ranks"])) - depth = EInt(binding["swap_depth"]) - radix = TransUtils.build_expr(component.get_radix()) - next_latency = TransUtils.build_expr(component.get_next_latency()) + active_bindings: Dict[str, List[dict]] = {} + # Filter out the bindings to ignore + for buffer_ in self.metrics.get_hardware().get_components(einsum, BufferComponent): + active_bindings[buffer_.get_name()] = [] + for binding in buffer_.get_bindings()[einsum]: + format_ = self.metrics.get_format().get_spec( + binding["tensor"])[binding["format"]] + rank = binding["rank"] + type_ = binding["type"] + + # First make sure that this binding actually corresponds to + # traffic + check_cbits = type_ == "coord" or type_ == "elem" + check_pbits = type_ == "payload" or type_ == "elem" + if check_cbits and ( + "cbits" not in format_[rank] or format_[rank]["cbits"] == 0): + # Inconsequential line to make the coverage test go in here + x = 1 + continue + if check_pbits and ( + "pbits" not in format_[rank] or format_[rank]["pbits"] == 0): + # Inconsequential line to make the coverage test go in here + x = 1 + continue + + active_bindings[buffer_.get_name()].append(binding) + + metrics_einsum = EAccess(EVar("metrics"), EString(einsum)) + traffic_dict: Dict[str, Set[str]] = {} + for buffer_ in self.metrics.get_hardware().get_components(einsum, BufferComponent): + bindings = TransUtils.build_expr( + active_bindings[buffer_.get_name()]) + bindings_var = AVar("bindings") + + block.add(SAssign(bindings_var, bindings)) + + # Create the traces for each buffer + # TODO: What if the binding is for an unswizzled tensor + traces = {} + ranks = set() + for binding in active_bindings[buffer_.get_name()]: + format_ = self.metrics.get_format().get_spec( + binding["tensor"])[binding["format"]] + rank = binding["rank"] + ranks.add(rank) + type_ = binding["type"] + + # Now add the trace + trace, create_trace = self.__get_trace(binding, True) + block.add(create_trace) + traces[(binding["tensor"], rank, type_, "read")] = trace + tensor_ir = self.program.get_equation( + ).get_tensor(binding["tensor"]) + if tensor_ir.get_is_output(): + trace, create_trace = self.__get_trace(binding, False) + block.add(create_trace) + traces[(binding["tensor"], rank, type_, "write")] = trace + + # Also need to add the evict-on rank to the set of ranks if one + # exists + if "evict-on" in binding: + ranks.add(binding["evict-on"]) + + traces_dict = TransUtils.build_expr(traces) + block.add(SAssign(AVar("traces"), traces_dict)) + + args = [ + AJust( + EVar("bindings")), + AJust( + EVar("formats")), + AJust( + EVar("traces")), + AJust( + TransUtils.build_expr( + buffer_.get_width() * + buffer_.get_depth())), + AJust( + TransUtils.build_expr( + buffer_.get_width()))] + + # Match ranks not in the loop order to their corresponding rank in + # the loop order + rank_map = {} + for rank in ranks: + if rank == "root": + continue + + final_rank = self.program.get_partitioning( + ).get_final_rank_id([rank], rank) + if final_rank != rank: + rank_map[rank] = final_rank + + if rank_map: + args.append(AJust(TransUtils.build_expr(rank_map))) + + if isinstance(buffer_, BuffetComponent): + traffic_func = "buffetTraffic" + # Buffer is a cache + else: + traffic_func = "cacheTraffic" + + block.add( + SAssign( + AVar("traffic"), + EMethod( + EVar("Traffic"), + traffic_func, + args))) + + # Now add it to the metrics dictionary + added = set() + for binding in active_bindings[buffer_.get_name()]: + tensor = binding["tensor"] + rank = binding["rank"] + type_ = binding["type"] + + tensor_ir = self.program.get_equation().get_tensor(tensor) + src_component = self.metrics.get_source_memory( + buffer_.get_name(), tensor, rank, type_) + + if src_component is None: + continue + + src = src_component.get_name() + + if src not in traffic_dict: + traffic_dict[src] = set() + block.add( + SAssign( + AAccess( + metrics_einsum, EString(src)), EDict( + {}))) + + metrics_src = EAccess(metrics_einsum, EString(src)) + metrics_tensor = EAccess(metrics_src, EString(tensor)) + if tensor not in traffic_dict[src]: + traffic_dict[src].add(tensor) + block.add( + SAssign( + AAccess( + metrics_src, EString(tensor)), EDict( + {}))) + block.add( + SAssign( + AAccess( + metrics_tensor, + EString("read")), + EInt(0))) + + if tensor_ir.get_is_output(): + block.add( + SAssign( + AAccess( + metrics_tensor, + EString("write")), + EInt(0))) + + if (src, tensor) not in added: + traffic_access = EAccess( + EAccess( + EVar("traffic"), + EInt(0)), + EString(tensor)) + block.add( + SIAssign( + AAccess( + metrics_tensor, + EString("read")), + OAdd(), + EAccess( + traffic_access, + EString("read")))) + + if tensor_ir.get_is_output(): + block.add( + SIAssign( + AAccess( + metrics_tensor, EString("write")), + OAdd(), + EAccess(traffic_access, EString("write")))) + + added.add((src, tensor)) + + # Compute the time it took to perform this traffic + for src, tensors in traffic_dict.items(): + bits: Optional[Expression] = None + metrics_src = EAccess(metrics_einsum, EString(src)) + + # Note: not technically necessary, just to make the testing + # deterministic + sorted_tensors = sorted(tensors) + + for tensor in sorted_tensors: + metrics_tensor = EAccess(metrics_src, EString(tensor)) + new_bits: Expression = EAccess(metrics_tensor, EString("read")) + + if tensor == einsum: + new_bits = EBinOp( + new_bits, OAdd(), EAccess( + metrics_tensor, EString("write"))) + + if bits: + bits = EBinOp(bits, OAdd(), new_bits) + else: + bits = new_bits + + # Should always have at least some traffic (error above if not) + assert bits is not None + bits = EParens(bits) + + component = self.metrics.get_hardware().get_component(src) + assert isinstance(component, MemoryComponent) + + metrics_time = AAccess(metrics_src, EString("time")) + # Note: the current model assumes perfect load balance + time = EBinOp( + bits, + ODiv(), + EInt( + component.get_bandwidth() * + component.get_num_instances())) + + block.add(SAssign(metrics_time, time)) + self.fusion.add_component(einsum, src) + + return block + + def __make_iter_num(self, rank: str) -> Statement: + """ + Save the iteration number if necessary + """ + # We don't need the iteration number if we are not doing an eager write + if not self.metrics.get_eager_write(): + return SBlock([]) + + loop_order = self.program.get_loop_order().get_ranks() + ["body"] + output = self.program.get_equation().get_output() + + # We want to collect the iteration number for the last loop rank + final_tensor = Tensor(output.root_name(), output.get_init_ranks()) + self.program.apply_all_partitioning(final_tensor) + self.program.get_loop_order().apply(final_tensor) + + # We don't need the iteration number of this rank if it is the top rank + # since we can never eager access a 0-tensor + i = loop_order.index(rank) + if i == 0: + return SBlock([]) + + # We only want the iteration number of the output's bottom rank + if loop_order[i - 1] != final_tensor.get_ranks()[-1]: + return SBlock([]) - args = [AJust(arg) for arg in [name, depth, radix, next_latency]] + iter_var = AVar(final_tensor.get_ranks()[-1].lower() + "_iter_num") + iter_num = EMethod(EMethod(EVar("Metrics"), "getIter", []), "copy", []) - access = AAccess(metrics, EString(name.gen() + " merge ops")) - count = EMethod(EVar("Compute"), "swapCount", args) - return SAssign(access, count) + return SAssign(iter_var, iter_num) diff --git a/teaal/trans/equation.py b/teaal/trans/equation.py index 7ab1235..db0b8e2 100644 --- a/teaal/trans/equation.py +++ b/teaal/trans/equation.py @@ -28,6 +28,8 @@ from typing import cast, Dict, List, Optional, Type from teaal.hifiber import * +from teaal.ir.component import * +from teaal.ir.metrics import Metrics from teaal.ir.program import Program from teaal.ir.tensor import Tensor from teaal.parse.utils import ParseUtils @@ -40,11 +42,12 @@ class Equation: equation at the bottom of the loop nest """ - def __init__(self, program: Program) -> None: + def __init__(self, program: Program, metrics: Optional[Metrics]) -> None: """ Construct a new Equation """ self.program = program + self.metrics = metrics def make_eager_inputs(self, rank: str, inputs: List[str]) -> Statement: """ @@ -52,7 +55,8 @@ def make_eager_inputs(self, rank: str, inputs: List[str]) -> Statement: """ tensors = [self.program.get_equation().get_tensor(input_) for input_ in inputs] - iter_expr = self.__make_input_iter_expr(rank, tensors) + _, input_tensors = self.program.get_equation().get_iter(tensors) + iter_expr = self.__make_input_iter_expr(rank, input_tensors) # Use Fiber.fromLazy() to translate # Note: Assume that if we are making eager inputs, then we are @@ -106,29 +110,27 @@ def make_iter_expr(self, rank: str, tensors: List[Tensor]) -> Expression: if not tensors: raise ValueError("Must iterate over at least one tensor") + output, inputs = self.program.get_equation().get_iter(tensors) + # If there are no input tensors, we need to iterRangeShapeRef on the # output - output_tensor = self.__get_output_tensor(tensors) - if len(tensors) == 1 and output_tensor: + if len(tensors) == 1 and output: iter_output = self.__make_output_only_iter_expr(rank) return self.__add_enumerate(rank, iter_output) # Build the expression of the inputs - expr = self.__make_input_iter_expr(rank, tensors) + expr = self.__make_input_iter_expr(rank, inputs) # Finally, add in the output - if output_tensor: - trank = output_tensor.peek() - if trank is not None: - trank = trank.upper() - + if output: + trank = output.peek_clean() if trank != rank: raise ValueError( "Cannot project into the output tensor. Replace " + rank + " with " + str(trank) + " in the loop order") expr = Equation.__add_operator( - EVar(output_tensor.fiber_name()), OLtLt(), expr) + EVar(output.fiber_name()), OLtLt(), expr) return self.__add_enumerate(rank, expr) @@ -193,14 +195,13 @@ def make_payload(self, rank: str, tensors: List[Tensor]) -> Payload: "Must have at least one tensor to make the payload") # Separate the tensors into terms - terms = self.__separate_terms(tensors) - output_tensor = self.__get_output_tensor(tensors) + output, inputs = self.program.get_equation().get_iter(tensors) payload: Payload - if terms: + if inputs: # Construct the term payloads term_payloads = [] - for term in terms: + for term in inputs: payload = PVar(term[-1].fiber_name()) for factor in reversed(term[:-1]): payload = PTuple([PVar(factor.fiber_name()), payload]) @@ -212,11 +213,11 @@ def make_payload(self, rank: str, tensors: List[Tensor]) -> Payload: payload = PTuple([PVar("_"), term_payload, payload]) # Put the output on the outside - if output_tensor: - payload = PTuple([PVar(output_tensor.fiber_name()), payload]) + if output: + payload = PTuple([PVar(output.fiber_name()), payload]) - elif output_tensor: - payload = PVar(output_tensor.fiber_name()) + elif output: + payload = PVar(output.fiber_name()) else: # We should never get to this state @@ -317,7 +318,9 @@ def __need_enumerate(self, rank: str) -> bool: spacetime = self.program.get_spacetime() enum_st = spacetime is not None and spacetime.emit_pos(rank) - return enum_int or enum_st + enum_metrics = self.metrics is None + + return (enum_int or enum_st) and enum_metrics @staticmethod def __frac_coords(sexpr: Basic) -> bool: @@ -329,16 +332,6 @@ def __frac_coords(sexpr: Basic) -> bool: return any(Equation.__frac_coords(arg) for arg in sexpr.args) - def __get_output_tensor(self, tensors: List[Tensor]) -> Optional[Tensor]: - """ - Get the output tensor if it exists - """ - output = self.program.get_equation().get_output() - if output in tensors: - return output - else: - return None - def __in_update(self, factor: str) -> bool: """ Returns true if the factor should be included in the update @@ -350,23 +343,18 @@ def __iter_fiber(self, rank: str, tensor: Tensor) -> Expression: """ Get fiber for iteration (may involve projection) """ - trank = tensor.peek() - if trank is None: - raise ValueError( - "Cannot iterate over payload " + - tensor.fiber_name()) + trank = tensor.peek_clean() # If this fiber is already over the correct rank, we can iterate over # it directly - rank = rank.lower() if trank == rank: return EVar(tensor.fiber_name()) # Otherwise, we need to project partitioning = self.program.get_partitioning() - root, suffix = partitioning.split_rank_name(rank.upper()) + root, suffix = partitioning.split_rank_name(rank) root = root.lower() - troot = partitioning.get_root_name(trank.upper()).lower() + troot = partitioning.get_root_name(trank).lower() # If we are going to project, get the iteration rank in terms of the # tensor rank @@ -384,19 +372,20 @@ def __iter_fiber(self, rank: str, tensor: Tensor) -> Expression: # If not, we do not need to translate the halo else: sexpr = CoordAccess.isolate_rank(sexpr, troot) - sexpr = sexpr.subs(troot, trank) + sexpr = sexpr.subs(troot, trank.lower()) - lambda_ = ELambda([trank], CoordAccess.build_expr(sexpr)) + lambda_ = ELambda([trank.lower()], CoordAccess.build_expr(sexpr)) args = [AParam("trans_fn", lambda_)] # Build the interval if we need to make sure that the halo does not # add extra computation if suffix == "": - interval = ETuple([EInt(0), EVar(rank.upper())]) + interval = ETuple([EInt(0), EVar(rank)]) args.append(AParam("interval", interval)) elif suffix == "0": - interval = ETuple([EVar(rank + "_start"), EVar(rank + "_end")]) + interval = ETuple([EVar(rank.lower() + "_start"), + EVar(rank.lower() + "_end")]) args.append(AParam("interval", interval)) project = EMethod(EVar(tensor.fiber_name()), "project", args) @@ -416,20 +405,53 @@ def __iter_fiber(self, rank: str, tensor: Tensor) -> Expression: def __make_input_iter_expr( self, rank: str, - tensors: List[Tensor]) -> Expression: + tensors: List[List[Tensor]]) -> Expression: """ Make the iteration expression for the inputs """ - # Separate the tensors into terms - terms = self.__separate_terms(tensors) + leader_follower = False + leader = "" + if self.metrics is not None: + intersector = self.metrics.get_coiter(rank) + + # If this uses leader-follower intersection + if isinstance(intersector, LeaderFollowerComponent): + leader_follower = True + + einsum = self.program.get_equation().get_output().root_name() + for binding in intersector.get_bindings()[einsum]: + if binding["rank"] == rank: + leader = binding["leader"] + break # Combine terms with intersections intersections = [] - for term in terms: - expr = self.__iter_fiber(rank, term[-1]) - for factor in reversed(term[:-1]): - fiber = self.__iter_fiber(rank, factor) - expr = Equation.__add_operator(fiber, OAnd(), expr) + for term in tensors: + expr: Expression + if leader_follower: + # If there is more than one term, there is ambiguity we are + # not capturing + assert len(tensors) == 1 + + leader_tensor = self.program.get_equation().get_tensor(leader) + fiber_args = [self.__iter_fiber(rank, leader_tensor)] + + for factor in term: + if factor.root_name() == leader: + continue + + # TODO: Only uncompressed fibers can follow + fiber_args.append(self.__iter_fiber(rank, factor)) + + args: List[Argument] = [AJust(fiber) for fiber in fiber_args] + args.append(AParam("style", EString("leader-follower"))) + expr = EMethod(EVar("Fiber"), "intersection", args) + + else: + expr = self.__iter_fiber(rank, term[-1]) + for factor in reversed(term[:-1]): + fiber = self.__iter_fiber(rank, factor) + expr = Equation.__add_operator(fiber, OAnd(), expr) intersections.append(expr) # Combine intersections with a union @@ -438,20 +460,3 @@ def __make_input_iter_expr( expr = Equation.__add_operator(intersection, OOr(), expr) return expr - - def __separate_terms(self, tensors: List[Tensor]) -> List[List[Tensor]]: - """ - Separate a list of tensors according to which term they belong to - """ - # Separate the tensors - terms: List[List[Tensor]] = [[] - for _ in self.program.get_equation().get_term_tensors()] - for tensor in tensors: - if tensor.get_is_output(): - continue - - terms[self.program.get_equation().get_factor_order()[ - tensor.root_name()][0]].append(tensor) - - # Remove any empty lists - return [term for term in terms if term] diff --git a/teaal/trans/graphics.py b/teaal/trans/graphics.py index 1ce026d..d96827e 100644 --- a/teaal/trans/graphics.py +++ b/teaal/trans/graphics.py @@ -25,6 +25,7 @@ """ from teaal.hifiber import * +from teaal.ir.metrics import Metrics from teaal.ir.program import Program from teaal.ir.spacetime import SpaceTime from teaal.trans.canvas import Canvas @@ -35,11 +36,12 @@ class Graphics: Generate the HiFiber code for displaying tensors """ - def __init__(self, program: Program) -> None: + def __init__(self, program: Program, metrics: Optional[Metrics]) -> None: """ Construct a graphics object """ self.program = program + self.metrics = metrics self.canvas = Canvas(program) def make_body(self) -> Statement: @@ -49,7 +51,7 @@ def make_body(self) -> Statement: body = SBlock([]) spacetime = self.program.get_spacetime() - if spacetime is not None: + if spacetime is not None and self.metrics is None: # If we are using slip, increment the timestamp if spacetime.get_slip(): @@ -77,7 +79,7 @@ def make_footer(self) -> Statement: Create the loop footer for graphics """ spacetime = self.program.get_spacetime() - if spacetime is not None: + if spacetime is not None and self.metrics is None: return self.canvas.display_canvas() else: return SBlock([]) @@ -90,7 +92,7 @@ def make_header(self) -> Statement: # If displayable, add the graphics information spacetime = self.program.get_spacetime() - if spacetime is not None: + if spacetime is not None and self.metrics is None: header.add(self.canvas.create_canvas()) # Create the timestamp dictionary if we want slip diff --git a/teaal/trans/header.py b/teaal/trans/header.py index 28676f5..70007f0 100644 --- a/teaal/trans/header.py +++ b/teaal/trans/header.py @@ -25,9 +25,10 @@ """ from sympy import Symbol -from typing import Iterable, Set +from typing import Iterable, Optional, Set from teaal.hifiber import * +from teaal.ir.metrics import Metrics from teaal.ir.program import Program from teaal.ir.tensor import Tensor from teaal.parse.utils import ParseUtils @@ -41,15 +42,22 @@ class Header: Generate the HiFiber code for loop headers """ - def __init__(self, program: Program, partitioner: Partitioner) -> None: + def __init__( + self, + program: Program, + metrics: Optional[Metrics], + partitioner: Partitioner) -> None: """ Construct a new Header object """ self.program = program + self.metrics = metrics self.partitioner = partitioner - @staticmethod - def make_get_payload(tensor: Tensor, ranks: Iterable[str]) -> Statement: + def make_get_payload( + self, + tensor: Tensor, + ranks: Iterable[str]) -> Statement: """ Make a call to getPayload() or getPayloadRef() """ @@ -58,8 +66,15 @@ def make_get_payload(tensor: Tensor, ranks: Iterable[str]) -> Statement: else: func = "getPayload" - rank_arg = [AJust(EVar(rank.lower())) for rank in ranks] - call = EMethod(EVar(tensor.fiber_name()), func, rank_arg) + args: List[Argument] = [AJust(EVar(rank.lower())) for rank in ranks] + if self.metrics: + args.append( + AParam( + "trace", + EString( + "get_payload_" + + tensor.root_name()))) + call = EMethod(EVar(tensor.fiber_name()), func, args) for _ in ranks: tensor.pop() @@ -88,7 +103,11 @@ def make_output(self) -> Statement: constr = EFunc("Tensor", args) return SAssign(AVar(tensor.tensor_name()), constr) - def make_swizzle(self, tensor: Tensor, type_: str) -> Statement: + def make_swizzle( + self, + tensor: Tensor, + ranks: List[str], + type_: str) -> Statement: """ Make call to swizzleRanks() (as necessary) """ @@ -99,6 +118,8 @@ def make_swizzle(self, tensor: Tensor, type_: str) -> Statement: self.program.get_loop_order().apply(tensor) elif type_ == "partitioning": self.program.apply_partition_swizzling(tensor) + elif type_ == "metrics": + tensor.swizzle(ranks) else: raise ValueError("Unknown swizzling reason: " + type_) @@ -147,7 +168,7 @@ def __make_shape(self, args: List[Argument]) -> List[Argument]: if loop_order.is_ready( part.get_final_rank_id( - output, ranks[i]), pos): + output.get_init_ranks(), ranks[i]), pos): final_pos[ranks[i]] = pos i += 1 @@ -167,7 +188,10 @@ def __make_shape(self, args: List[Argument]) -> List[Argument]: avail[i] = True # If at least one rank is not available, we need an explicit shape - if not all(avail): - args.append(TransUtils.build_shape(output)) + if not all(avail) or self.metrics is not None: + # TODO: Test that this removes the partitioning + unpart_ranks = [part.get_root_name( + rank) for rank in output.get_ranks()] + args.append(TransUtils.build_shape(unpart_ranks)) return args diff --git a/teaal/trans/hifiber.py b/teaal/trans/hifiber.py index 2dcc5d1..025e1bb 100644 --- a/teaal/trans/hifiber.py +++ b/teaal/trans/hifiber.py @@ -29,6 +29,7 @@ from teaal.hifiber import * from teaal.ir.flow_graph import FlowGraph from teaal.ir.flow_nodes import * +from teaal.ir.fusion import Fusion from teaal.ir.hardware import Hardware from teaal.ir.iter_graph import IterationGraph from teaal.ir.metrics import Metrics @@ -64,9 +65,10 @@ def __init__( self.hardware: Optional[Hardware] = None self.format = format_ if arch and bindings and arch.get_spec(): - self.hardware = Hardware(arch, bindings) + self.hardware = Hardware(arch, bindings, self.program) + self.fusion = Fusion(self.hardware) - self.trans_utils = TransUtils() + self.trans_utils = TransUtils(self.program) self.hifiber = SBlock([]) for i in range(len(einsum.get_expressions())): @@ -83,30 +85,28 @@ def __translate(self, i: int) -> Statement: self.metrics: Optional[Metrics] = None if self.hardware and self.format: self.metrics = Metrics(self.program, self.hardware, self.format) + self.fusion.add_einsum(self.program) # Create the flow graph and get the relevant nodes flow_graph = FlowGraph(self.program, self.metrics, ["hoist"]) nodes = flow_graph.get_sorted() # Create all relevant translator objects - self.graphics = Graphics(self.program) + self.graphics = Graphics(self.program, self.metrics) self.partitioner = Partitioner(self.program, self.trans_utils) - self.header = Header(self.program, self.partitioner) + self.header = Header(self.program, self.metrics, self.partitioner) self.graph = IterationGraph(self.program) - self.eqn = Equation(self.program) + self.eqn = Equation(self.program, self.metrics) if self.metrics: - self.collector = Collector(self.program, self.metrics) + self.collector = Collector(self.program, self.metrics, self.fusion) - stmt = self.__trans_nodes(nodes, 0)[1] + stmt = self.__trans_nodes(nodes)[1] self.program.reset() return stmt - def __trans_nodes(self, - nodes: List[Node], - depth: int) -> Tuple[int, - Statement]: + def __trans_nodes(self, nodes: List[Node]) -> Tuple[int, Statement]: """ Recursive function to generate the actual HiFiber program """ @@ -115,10 +115,33 @@ def __trans_nodes(self, i = 0 while i < len(nodes): node = nodes[i] - if isinstance(node, FromFiberNode): + + if isinstance(node, EagerInputNode): + code.add( + self.eqn.make_eager_inputs( + node.get_rank(), + node.get_tensors())) + + elif isinstance(node, EndLoopNode): + return i + 1, code + + elif isinstance(node, FromFiberNode): tensor = self.program.get_equation().get_tensor(node.get_tensor()) code.add(Header.make_tensor_from_fiber(tensor)) + elif isinstance(node, GetPayloadNode): + tensor = self.program.get_equation().get_tensor(node.get_tensor()) + code.add( + self.header.make_get_payload( + tensor, node.get_ranks())) + + elif isinstance(node, GetRootNode): + tensor = self.program.get_equation().get_tensor(node.get_tensor()) + code.add(Header.make_get_root(tensor)) + + elif isinstance(node, IntervalNode): + code.add(self.eqn.make_interval(node.get_rank())) + elif isinstance(node, LoopNode): # Generate the for loop rank, tensors = self.graph.peek_concord() @@ -127,26 +150,45 @@ def __trans_nodes(self, payload = self.eqn.make_payload(cast(str, rank), tensors) # Recurse for the for loop body - j, body = self.__trans_nodes(nodes[(i + 1):], depth + 1) + j, body = self.__trans_nodes(nodes[(i + 1):]) code.add(SFor(payload, expr, body)) i += j + elif isinstance(node, MetricsNode): + if node.get_type() == "Body": + code.add(self.collector.make_body()) + + elif node.get_type() == "Dump": + code.add(self.collector.dump()) + + elif node.get_type() == "End": + code.add(self.collector.end()) + + elif node.get_type() == "Start": + code.add(self.collector.start()) + + else: + raise ValueError( + "Unknown node: " + + repr(node)) # pragma: no cover + + elif isinstance(node, MetricsFooterNode): + code.add(self.collector.make_loop_footer(node.get_rank())) + + elif isinstance(node, MetricsHeaderNode): + code.add(self.collector.make_loop_header(node.get_rank())) + elif isinstance(node, OtherNode): if node.get_type() == "Body": code.add(self.eqn.make_update()) code.add(self.graphics.make_body()) elif node.get_type() == "Footer": - if depth == 0: - code.add( - Footer.make_footer( - self.program, - self.graphics, - self.partitioner)) - - else: - # Pop back up a level and retry this node - return i, code + code.add( + Footer.make_footer( + self.program, + self.graphics, + self.partitioner)) elif node.get_type() == "Graphics": code.add(self.graphics.make_header()) @@ -168,49 +210,11 @@ def __trans_nodes(self, elif isinstance(node, SwizzleNode): tensor = self.program.get_equation().get_tensor(node.get_tensor()) - code.add(self.header.make_swizzle(tensor, node.get_type())) - - elif isinstance(node, GetRootNode): - tensor = self.program.get_equation().get_tensor(node.get_tensor()) - code.add(Header.make_get_root(tensor)) - - elif isinstance(node, EagerInputNode): - code.add( - self.eqn.make_eager_inputs( - node.get_rank(), - node.get_tensors())) - - elif isinstance(node, IntervalNode): - code.add(self.eqn.make_interval(node.get_rank())) - - elif isinstance(node, MetricsNode): - if node.get_type() == "Dump": - code.add(self.collector.dump()) - - elif node.get_type() == "End": - if depth == 0: - code.add(self.collector.end()) - else: - # Pop back up a level and retry this node - return i, code - - elif node.get_type() == "Start": - code.add(self.collector.start()) - - else: - raise ValueError( - "Unknown node: " + - repr(node)) # pragma: no cover - - elif isinstance(node, CollectingNode): code.add( - self.collector.set_collecting( - node.get_tensor(), - node.get_rank())) - - elif isinstance(node, GetPayloadNode): - tensor = self.program.get_equation().get_tensor(node.get_tensor()) - code.add(Header.make_get_payload(tensor, node.get_ranks())) + self.header.make_swizzle( + tensor, + node.get_ranks(), + node.get_type())) else: raise ValueError( diff --git a/teaal/trans/partitioner.py b/teaal/trans/partitioner.py index a00c5c6..88aba61 100644 --- a/teaal/trans/partitioner.py +++ b/teaal/trans/partitioner.py @@ -396,9 +396,8 @@ def __split_follower( # Make sure there is no translation needed between the leader and # follower tensors' ranks leader_tensor = self.program.get_equation().get_tensor(leader) - leader_rank = leader_tensor.peek() - assert leader_rank is not None - lroot = self.program.get_partitioning().get_root_name(leader_rank.upper()) + leader_rank = leader_tensor.peek_clean() + lroot = self.program.get_partitioning().get_root_name(leader_rank) root = self.program.get_partitioning().get_root_name(rank) if root != lroot: raise ValueError( diff --git a/teaal/trans/utils.py b/teaal/trans/utils.py index 3a81e63..ea4abee 100644 --- a/teaal/trans/utils.py +++ b/teaal/trans/utils.py @@ -29,6 +29,7 @@ from typing import Any from teaal.hifiber import * +from teaal.ir.program import Program from teaal.ir.tensor import Tensor @@ -37,8 +38,9 @@ class TransUtils: Different utilities for generating HiFiber programs """ - def __init__(self) -> None: + def __init__(self, program: Program) -> None: self.count = -1 + self.program = program @staticmethod def build_expr(obj: Any) -> Expression: @@ -63,6 +65,10 @@ def build_expr(obj: Any) -> Expression: for key, val in obj.items()} return EDict(dict_) + elif isinstance(obj, tuple): + tuple_ = [TransUtils.build_expr(elem) for elem in obj] + return ETuple(tuple(tuple_)) + else: raise ValueError("Unable to translate " + str(obj) + " with type " + str(type(obj))) @@ -85,12 +91,12 @@ def build_set_rank_ids(tensor: Tensor, name: str) -> Statement: return SExpr(set_call) @staticmethod - def build_shape(tensor: Tensor) -> Argument: + def build_shape(ranks: Sequence[str]) -> Argument: """ Build the shape argument """ - ranks = [EVar(rank) for rank in tensor.get_ranks()] - return AParam("shape", EList(ranks)) + rank_vars = [EVar(rank) for rank in ranks] + return AParam("shape", EList(rank_vars)) @staticmethod def build_swizzle( diff --git a/tests/hifiber/test_op.py b/tests/hifiber/test_op.py index ad8006e..5cb21ca 100644 --- a/tests/hifiber/test_op.py +++ b/tests/hifiber/test_op.py @@ -51,6 +51,11 @@ def test_omul(): assert mul.gen() == "*" +def test_onotin(): + notin = ONotIn() + assert notin.gen() == "not in" + + def test_oor(): or_ = OOr() assert or_.gen() == "|" diff --git a/tests/integration/demo.yaml b/tests/integration/demo.yaml index 097b804..72e2543 100644 --- a/tests/integration/demo.yaml +++ b/tests/integration/demo.yaml @@ -1,24 +1,66 @@ einsum: declaration: - I: [B, C, H, W] - F: [C, M, R, S] - O: [B, M, P, Q] + SOB: [UA, UB] + T: [UA, UB, K] + I: [K] + OB: [] expressions: - - O[b, m, p, q] = I[b, c, p+r, q+s]*F[c, m, r, s] -mapping: - rank-order: - I: [B, C, H, W] - F: [M, C, R, S] - O: [B, M, P, Q] - partitioning: - O: - M: # filter partitioning - - uniform_shape(32) # 2 filters happening simultaneously per array - - uniform_shape(16) # 16 filters happening in interleaving fashion per PE - B: - - uniform_shape(4) - C: - - uniform_shape(6) # 2 channels running simultaneously per PE - - uniform_shape(3) # 3 channels running per PE - loop-order: - O: [B1, C2, M2, B0, M1, P, C1, R, Q, S, M0, C0] + - SOB[ua, ub] = T[ua, ub, k] * I[k] + - OB[] = SOB[ua, ub] +# einsum: +# declaration: +# A: [S] +# Z: [T] +# expressions: +# - Z[t] = A[2 * t] +# format: +# A: +# default: +# rank-order: [S] +# S: +# format: C +# pbits: 32 +# Z: +# default: +# rank-order: [T] +# T: +# format: C +# pbits: 32 +# architecture: +# accel: +# - name: System +# local: +# - name: MainMemory +# class: DRAM +# subtree: +# - name: Chip +# local: +# - name: LLB +# class: Cache +# attributes: +# width: 32 +# depth: 1024 +# bindings: +# Z: +# - config: accel +# prefix: tmp/demo +# - component: MainMemory +# bindings: +# - tensor: A +# rank: S +# type: payload +# format: default +# - tensor: Z +# rank: T +# type: payload +# format: default +# - component: LLB +# bindings: +# - tensor: A +# rank: S +# type: payload +# format: default +# - tensor: Z +# rank: T +# type: payload +# format: default diff --git a/tests/integration/extensor-energy.yaml b/tests/integration/extensor-energy.yaml new file mode 100644 index 0000000..b37b80f --- /dev/null +++ b/tests/integration/extensor-energy.yaml @@ -0,0 +1,320 @@ +einsum: + declaration: + A: [K, M] + B: [K, N] + Z: [M, N] + expressions: + - Z[m,n] = A[k,m] * B[k,n] +mapping: + rank-order: + A: [K, M] + B: [K, N] + Z: [M, N] + partitioning: + Z: + K: [uniform_shape(K1), uniform_shape(K0)] + M: [uniform_shape(M1), uniform_shape(M0)] + N: [uniform_shape(N1), uniform_shape(N0)] + loop-order: + Z: [N2, K2, M2, M1, N1, K1, M0, N0, K0] + spacetime: + Z: + space: [K1] + time: [N2, K2, M2, M1, N1, M0, N0, K0] +format: + A: + default: + rank-order: [K2, M2, M1, K1, M0, K0] + # These formats do not matter because there are no cbits or pbits + K2: + format: C + M2: + format: C + M1: + format: C + # M1 and K1 metadata are stored as a tuple (M1, K1) + # of size (32 bits, 32 bits) + K1: + format: C + cbits: 64 + # Microtiles are in CSF-like format + M0: + format: C + cbits: 32 + pbits: 32 + K0: + format: C + cbits: 32 + pbits: 64 + + B: + default: + rank-order: [N2, K2, N1, K1, N0, K0] + # These formats do not matter because there are no cbits or pbits + N2: + format: C + K2: + format: C + # B is CSF the rest of the way down + N1: + format: C + cbits: 32 + pbits: 32 + K1: + format: C + cbits: 32 + pbits: 32 + N0: + format: C + cbits: 32 + pbits: 32 + K0: + format: C + cbits: 32 + pbits: 64 + Z: + default: + rank-order: [N2, M2, M1, N1, M0, N0] + # These ranks do not matter because there are no cbits or pbits + N2: + format: U + M2: + format: U + # There is never any traffic counted for the M1 and N1 ranks + M1: + format: U + N1: + format: U + M0: + format: U + # The bottom ranks are in a COO-like format (M0, N0) of size + # (32 bits, 32 bits) + N0: + format: C + cbits: 64 + pbits: 64 +architecture: + Accelerator: + - name: System + attributes: + clock_frequency: 1000000000 # 1 GHz = 1000000000 Hz + local: + - name: MainMemory + class: DRAM + attributes: + bandwidth: 586314575512 # BW is 68.256 GB/s * 2^30 B/GB * 8 bits/B + - name: K2Intersect + class: Intersector + attributes: + type: skip-ahead + - name: TopSequencer + class: Sequencer + attributes: + num_ranks: 3 + subtree: + - name: Chip + local: + - name: LLB + class: Buffet + attributes: + width: 64 + depth: 3932160 # 30 MB / 8B/line + bandwidth: 9223372036854775807 # Max int64 (i.e., inf) + - name: K1Intersect + class: Intersector + attributes: + type: skip-ahead + - name: MiddleSequencer + class: Sequencer + attributes: + num_ranks: 3 + subtree: + - name: PE[0..127] # 128 PEs + local: + - name: PEB + class: Buffet + attributes: + width: 64 + depth: 8192 # 64kB / 8B/line + - name: K0Intersection + class: Intersector + attributes: + type: skip-ahead + - name: BottomSequencer + class: Sequencer + attributes: + num_ranks: 3 + - name: FPMul + class: Compute + attributes: + type: mul + - name: FPAdd + class: Compute + attributes: + type: add +bindings: + Z: + - config: Accelerator + prefix: tmp/extensor_energy + - component: MainMemory + bindings: + - tensor: A + rank: K1 + type: coord + format: default + - tensor: A + rank: M0 + type: coord + format: default + - tensor: A + rank: M0 + type: payload + format: default + - tensor: A + rank: K0 + type: coord + format: default + - tensor: A + rank: K0 + type: payload + format: default + - tensor: B + rank: N1 + type: coord + format: default + - tensor: B + rank: N1 + type: payload + format: default + - tensor: B + rank: K1 + type: coord + format: default + - tensor: B + rank: K1 + type: payload + format: default + - tensor: B + rank: N0 + type: coord + format: default + - tensor: B + rank: N0 + type: payload + format: default + - tensor: B + rank: K0 + type: coord + format: default + - tensor: B + rank: K0 + type: payload + format: default + - tensor: Z + rank: N0 + type: coord + format: default + - tensor: Z + rank: N0 + type: payload + format: default + - component: K2Intersect + bindings: + - rank: K2 + - component: TopSequencer + bindings: + - rank: N2 + - rank: K2 + - rank: M2 + - component: LLB + bindings: + - tensor: A + rank: K1 + type: coord + evict-on: M2 + format: default + style: lazy + - tensor: A + rank: M0 + type: coord + evict-on: M2 + format: default + style: eager + - tensor: B + rank: N1 + type: coord + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: N1 + type: payload + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: K1 + type: coord + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: K1 + type: payload + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: N0 + type: coord + evict-on: K2 + format: default + style: eager + - tensor: Z + rank: M0 + type: coord + evict-on: M2 + format: default + style: eager + - component: K1Intersect + bindings: + - rank: K1 + - component: MiddleSequencer + bindings: + - rank: M1 + - rank: N1 + - rank: K1 + - component: PEB + bindings: + - tensor: A + rank: M0 + type: coord + evict-on: K1 + format: default + style: eager + - tensor: B + rank: N0 + type: coord + evict-on: K1 + format: default + style: eager + - tensor: Z + rank: M0 + type: coord + evict-on: N1 + format: default + style: eager + - component: K0Intersection + bindings: + - rank: K0 + - component: BottomSequencer + bindings: + - rank: M0 + - rank: N0 + - rank: K0 + - component: FPMul + bindings: + - op: mul + - component: FPAdd + bindings: + - op: add diff --git a/tests/integration/extensor.yaml b/tests/integration/extensor.yaml new file mode 100644 index 0000000..4a4728f --- /dev/null +++ b/tests/integration/extensor.yaml @@ -0,0 +1,272 @@ +einsum: + declaration: + A: [K, M] + B: [K, N] + Z: [M, N] + expressions: + - Z[m,n] = A[k,m] * B[k,n] +mapping: + rank-order: + A: [K, M] + B: [K, N] + Z: [M, N] + partitioning: + Z: + K: [uniform_shape(K1), uniform_shape(K0)] + M: [uniform_shape(M1), uniform_shape(M0)] + N: [uniform_shape(N1), uniform_shape(N0)] + loop-order: + Z: [N2, K2, M2, M1, N1, K1, M0, N0, K0] + spacetime: + Z: + space: [K1] + time: [N2, K2, M2, M1, N1, M0, N0, K0] +format: + A: + default: + rank-order: [K2, M2, M1, K1, M0, K0] + # These formats do not matter because there are no cbits or pbits + K2: + format: C + M2: + format: C + M1: + format: C + # M1 and K1 metadata are stored as a tuple (M1, K1) + # of size (32 bits, 32 bits) + K1: + format: C + cbits: 64 + # Microtiles are in CSF-like format + M0: + format: C + cbits: 32 + pbits: 32 + K0: + format: C + cbits: 32 + pbits: 64 + + B: + default: + rank-order: [N2, K2, N1, K1, N0, K0] + # These formats do not matter because there are no cbits or pbits + N2: + format: C + K2: + format: C + # B is CSF the rest of the way down + N1: + format: C + cbits: 32 + pbits: 32 + K1: + format: C + cbits: 32 + pbits: 32 + N0: + format: C + cbits: 32 + pbits: 32 + K0: + format: C + cbits: 32 + pbits: 64 + Z: + default: + rank-order: [N2, M2, M1, N1, M0, N0] + # These ranks do not matter because there are no cbits or pbits + N2: + format: U + M2: + format: U + # There is never any traffic counted for the M1 and N1 ranks + M1: + format: U + N1: + format: U + M0: + format: U + # The bottom ranks are in a COO-like format (M0, N0) of size + # (32 bits, 32 bits) + N0: + format: C + cbits: 64 + pbits: 64 +architecture: + Accelerator: + - name: System + attributes: + clock_frequency: 1000000000 # 1 GHz = 1000000000 Hz + local: + - name: MainMemory + class: DRAM + attributes: + bandwidth: 586314575512 # BW is 68.256 GB/s * 2^30 B/GB * 8 bits/B + - name: K2Intersect + class: Intersector + attributes: + type: skip-ahead + subtree: + - name: Chip + local: + - name: LLB + class: Buffet + attributes: + width: 64 + depth: 3932160 # 30 MB / 8B/line + - name: K1Intersect + class: Intersector + attributes: + type: skip-ahead + subtree: + - name: PE[0..127] # 128 PEs + local: + - name: PEB + class: Buffet + attributes: + width: 64 + depth: 8192 # 64kB / 8B/line + - name: K0Intersection + class: Intersector + attributes: + type: skip-ahead + - name: FPMul + class: Compute + attributes: + type: mul + - name: FPAdd + class: Compute + attributes: + type: add +bindings: + Z: + - config: Accelerator + prefix: tmp/extensor + - component: MainMemory + bindings: + - tensor: A + rank: K1 + type: coord + format: default + - tensor: A + rank: M0 + type: coord + format: default + - tensor: A + rank: M0 + type: payload + format: default + - tensor: A + rank: K0 + type: coord + format: default + - tensor: A + rank: K0 + type: payload + format: default + - tensor: B + rank: N1 + type: coord + format: default + - tensor: B + rank: N1 + type: payload + format: default + - tensor: B + rank: K1 + type: coord + format: default + - tensor: B + rank: K1 + type: payload + format: default + - tensor: B + rank: N0 + type: coord + format: default + - tensor: B + rank: N0 + type: payload + format: default + - tensor: B + rank: K0 + type: coord + format: default + - tensor: B + rank: K0 + type: payload + format: default + - tensor: Z + rank: N0 + type: coord + format: default + - tensor: Z + rank: N0 + type: payload + format: default + - component: K2Intersect + bindings: + - rank: K2 + - component: LLB + bindings: + - tensor: A + rank: K1 + type: coord + evict-on: M2 + format: default + style: lazy + - tensor: A + rank: M0 + type: coord + evict-on: M2 + format: default + style: eager + - tensor: B + rank: N1 + type: coord + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: N1 + type: payload + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: K1 + type: coord + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: K1 + type: payload + evict-on: K2 + format: default + style: lazy + - tensor: B + rank: N0 + type: coord + evict-on: K2 + format: default + style: eager + - tensor: Z + rank: M0 + type: coord + evict-on: M2 + format: default + style: eager + - component: K1Intersect + bindings: + - rank: K1 + - component: K0Intersection + bindings: + - rank: K0 + - component: FPMul + bindings: + - op: mul + - component: FPAdd + bindings: + - op: add diff --git a/tests/integration/gamma.yaml b/tests/integration/gamma.yaml index 4f80205..d534b4b 100644 --- a/tests/integration/gamma.yaml +++ b/tests/integration/gamma.yaml @@ -4,155 +4,238 @@ einsum: B: [K, N] T: [K, M, N] Z: [M, N] - expressions: - T[k,m,n] = take(A[k,m], B[k,n], 1) - - Z[m,n] = T[k,m,n] * A[k,m] - + - Z[m,n] = T[k,m,n]*A[k,m] mapping: rank-order: A: [M, K] B: [K, N] T: [M, K, N] Z: [M, N] - loop-order: T: [M, K, N] Z: [M, N, K] - + spacetime: + T: + space: [M] + time: [K, N] + Z: + space: [M] + time: [N, K] +format: + A: + default: + rank-order: [M, K] + M: + format: U + pbits: 32 + K: + format: C + cbits: 32 + pbits: 64 + B: + default: + rank-order: [K, N] + K: + format: U + pbits: 32 + N: + format: C + cbits: 32 + pbits: 64 + Z: + default: + rank-order: [M, N] + M: + format: U + pbits: 32 + N: + format: C + cbits: 32 + pbits: 64 architecture: - subtree: + Accelerator: - name: System attributes: - clock_frequency: 1000000000 - + clock_frequency: 1000000000 # 1 GHz = 1 * 10^9 Hz = 1000000000 Hz local: - name: MainMemory class: DRAM attributes: - datawidth: 8 - bandwidth: 128 - + bandwidth: 1099511627776 # 128 GB/s = 128 * 2^30 * 8 bits/s subtree: - name: Chip - local: - - name: FiberCache # 3MB FiberCache + - name: FiberCache class: Cache attributes: - width: 8 - depth: 3145728 - + width: 64 # Block size is not mentioned in the paper; minimum correct value + depth: 393216 # 3 MB / 8B = 393216 lines subtree: - name: PE[0..31] # 32 PEs - + local: + - name: Stage0RegFile + class: Buffet + attributes: + width: 64 + depth: inf # Not specified by Gamma + - name: Stage1RegFile + class: Buffet + attributes: + width: 64 + depth: inf # Not specified by Gamma subtree: - - name: Stage0 - + - name: Stage0[0..31] local: - - name: RegFile0 - class: Buffet - - - name: Intersection - class: LeaderFollower - - - name: Stage0to1 - + - name: Intersect + class: Intersector + attributes: + type: leader-follower + - name: Stage0to1[0..31] local: - name: HighRadixMerger class: Merger attributes: - radix: 64 - next_latency: 1 - - - name: Stage1 - + inputs: 64 + comparator_radix: 64 + outputs: 1 + order: fifo + reduce: False + - name: Stage1[0..31] local: - - name: RegFile1 - class: Buffet - - - name: MAC + - name: FPMul class: compute + attributes: + type: mul + - name: FPAdd + class: compute + attributes: + type: add bindings: -- name: MainMemory - bindings: - - tensor: A - rank: root - - tensor: B - rank: root - - tensor: Z - rank: root - -- name: FiberCache - bindings: - - tensor: B - rank: K - -- name: RegFile0 - bindings: - - tensor: A - rank: M - - tensor: B - rank: N - - tensor: T - rank: M - -- name: Intersection - bindings: - - einsum: T - rank: K - leader: A - -- name: HighRadixMerger - bindings: - # T[M, K, N] -> T[M, N, K] - - tensor: T - init_ranks: [M, K, N] - swap_depth: 1 - -- name: RegFile1 - bindings: - - tensor: A - rank: M - - tensor: T - rank: M - - tensor: Z - rank: N - -- name: MAC - bindings: - - einsum: Z - op: mul - - einsum: Z - op: add - -format: - A: - M: - format: U - rhbits: 32 - pbits: 32 - K: - format: C - cbits: 32 - pbits: 64 - - B: - K: - format: U - rhbits: 32 - pbits: 32 - N: - format: C - cbits: 32 - pbits: 64 - + T: + - config: Accelerator + prefix: tmp/gamma_T + - component: MainMemory + bindings: + - tensor: A + rank: M + type: payload + format: default + - tensor: A + rank: K + type: coord + format: default + - tensor: A + rank: K + type: payload + format: default + - tensor: B + rank: K + type: payload + format: default + - tensor: B + rank: N + type: coord + format: default + - tensor: B + rank: N + type: payload + format: default + - component: FiberCache + bindings: + - tensor: B + rank: K + type: payload + format: default + - tensor: B + rank: N + type: coord + format: default + - tensor: B + rank: N + type: payload + format: default + - component: Stage0RegFile + bindings: + - tensor: A + rank: M + type: payload + format: default + evict-on: root + - tensor: A + rank: K + type: coord + format: default + evict-on: M + - tensor: A + rank: K + type: payload + format: default + evict-on: M + - component: Intersect + bindings: + - rank: K + leader: A Z: - M: - format: U - rhbits: 32 - pbits: 32 - N: - format: C - cbits: 32 - pbits: 64 + - config: Accelerator + prefix: tmp/gamma_Z + - component: MainMemory + bindings: + - tensor: Z + rank: M + type: payload + format: default + - tensor: Z + rank: N + type: coord + format: default + - tensor: Z + rank: N + type: payload + format: default + - component: Stage0RegFile + bindings: + - tensor: A + rank: M + type: payload + format: default + evict-on: root + - tensor: A + rank: K + type: coord + format: default + evict-on: M + - tensor: A + rank: K + type: payload + format: default + evict-on: M + - component: Stage1RegFile + bindings: + - tensor: Z + rank: M + type: payload + format: default + evict-on: root + - tensor: Z + rank: N + type: coord + format: default + evict-on: M + - tensor: Z + rank: N + type: payload + format: default + evict-on: M + - component: HighRadixMerger + bindings: + - tensor: T + init-ranks: [M, K, N] + final-ranks: [M, N, K] + - component: FPMul + bindings: + - op: mul + - component: FPAdd + bindings: + - op: add diff --git a/tests/integration/outerspace.yaml b/tests/integration/outerspace.yaml index 69b8a08..bee355e 100644 --- a/tests/integration/outerspace.yaml +++ b/tests/integration/outerspace.yaml @@ -2,142 +2,328 @@ einsum: declaration: A: [K, M] B: [K, N] - T: [K, M, N] + T0: [K, M, N] + T1: [K, M, N] Z: [M, N] expressions: - - T[k, m, n] = A[k, m] * B[k, n] - - Z[m, n] = T[k, m, n] - + - T0[k, m, n] = A[k, m] * B[k, n] + - T1[k, m, n] = T0[k, m, n] + - Z[m, n] = T1[k, m, n] mapping: rank-order: A: [K, M] B: [K, N] - T: [M, K, N] + T0: [M, K, N] + T1: [M, K, N] Z: [M, N] loop-order: - T: [K, M, N] + T0: [K, M, N] + T1: [M, K, N] Z: [M, N, K] - + spacetime: + T0: + space: [M] + time: [K, N] + T1: + space: [M] + time: [K, N] + Z: + space: [M] + time: [N, K] +format: + A: + default: + rank-order: [K, M] + K: + format: U + pbits: 32 + M: + format: C + cbits: 32 + pbits: 64 + B: + default: + rank-order: [K, N] + K: + format: U + pbits: 32 + N: + format: C + cbits: 32 + pbits: 64 + T0: + LinkedLists: + rank-order: [M, K, N] + M: + format: U + pbits: 32 + K: + format: C + pbits: 32 + N: + format: C + layout: interleaved + cbits: 32 + pbits: 64 + T1: + LinkedLists: + rank-order: [M, K, N] + M: + format: U + pbits: 32 + K: + format: C + pbits: 32 + N: + format: C + layout: interleaved + cbits: 32 + pbits: 64 + Z: + default: + rank-order: [M, N] + M: + format: U + pbits: 32 + N: + format: C + cbits: 32 + pbits: 64 architecture: - subtree: + MultiplyPhase: - name: System attributes: - clock_frequency: 1500000000 - + clock_frequency: 1500000000 # 1.5 GHz = 1.5 * 10^9 Hz = 1500000000 Hz local: - name: MainMemory class: DRAM attributes: - datawidth: 8 - bandwidth: 85.333 - + bandwidth: 1099511627776 # 128 GB/s = 128 * 2^30 * 8 bits/s subtree: - name: Chip subtree: - - name: PT[0..15] - + - name: PT[0..15] # 16 PTs local: - - name: CacheSPM + - name: Cache class: Cache attributes: - width: 512 - depth: inf - - subtree: - - name: PE[0..256] # 16 PEs per PT - local: - - name: RegFile - class: Buffet - - - name: Sort - class: Merger - attributes: - radix: inf - next_latency: N - - - name: Compute - class: compute - + width: 64 + depth: 2048 # 16kB / 8B = 2048 lines + subtree: + - name: PE[0..256] # 16 PEs per PT + local: + - name: RegFile + class: Buffet + attributes: + width: 64 # Block size is not mentioned in the paper; minimum correct value + depth: 128 # 1kB / 8B = 128 lines + - name: FPMul + class: compute + attributes: + type: mul + MergePhase: + - name: System + attributes: + clock_frequency: 1500000000 # 1.5 GHz = 1.5 * 10^9 Hz = 1500000000 Hz + local: + - name: MainMemory + class: DRAM + attributes: + bandwidth: 1099511627776 # 128 GB/s = 128 * 2^30 * 8 bits/s + subtree: + - name: Chip + subtree: + - name: PT[0..15] # 16 PTs + subtree: + - name: PE[0..128] + local: + - name: SPM + class: Buffet + attributes: + width: 96 + depth: 171 # 2kB / 12B = 170.666 + - name: SortHW + class: Merger + attributes: + inputs: inf + comparator_radix: inf + outputs: 1 + order: fifo + reduce: False + - name: RegFile + class: Buffet + attributes: + width: 64 # Block size is not mentioned in the paper; minimum correct value + depth: 128 # 1kB / 8B = 16 lines + - name: FPAdd + class: compute + attributes: + type: add bindings: - - name: MainMemory + T0: + - config: MultiplyPhase + prefix: tmp/outerspace_T0 + - component: MainMemory bindings: - tensor: A - rank: root + rank: K + type: payload + format: default + - tensor: A + rank: M + type: coord + format: default + - tensor: A + rank: M + type: payload + format: default - tensor: B - rank: root - - tensor: T - rank: root - - tensor: Z - rank: root - - - name: CacheSPM + rank: K + type: payload + format: default + - tensor: B + rank: N + type: coord + format: default + - tensor: B + rank: N + type: payload + format: default + - component: Cache bindings: - tensor: B - ranks: root - - - name: RegFile + rank: N + type: coord + format: default + - tensor: B + rank: N + type: payload + format: default + - component: RegFile bindings: + - tensor: A + rank: K + type: payload + evict-on: root + format: default + - tensor: A + rank: M + type: coord + evict-on: K + format: default - tensor: A rank: M + type: payload + evict-on: K + format: default - tensor: B + rank: K + type: payload + evict-on: root + format: default + - component: FPMul + bindings: + - op: mul + T1: + - config: MergePhase + prefix: tmp/outerspace_T1 + - component: MainMemory + bindings: + - tensor: T0 + rank: M + type: payload + format: LinkedLists + - tensor: T0 + rank: K + type: payload + format: LinkedLists + - tensor: T0 + rank: N + type: elem + format: LinkedLists + - tensor: T1 + rank: M + type: payload + format: LinkedLists + - tensor: T1 + rank: K + type: payload + format: LinkedLists + - tensor: T1 + rank: N + type: elem + format: LinkedLists + - component: SPM + bindings: + - tensor: T0 + rank: M + type: payload + format: LinkedLists + evict-on: root + - tensor: T0 + rank: K + type: payload + format: LinkedLists + evict-on: M + - tensor: T0 + rank: N + type: elem + format: LinkedLists + evict-on: K + - tensor: T1 + rank: M + type: payload + format: LinkedLists + evict-on: root + - tensor: T1 + rank: K + type: payload + format: LinkedLists + evict-on: M + - tensor: T1 rank: N - - tensor: T - rank: root + type: elem + format: LinkedLists + evict-on: K + Z: + - config: MergePhase + prefix: tmp/outerspace_Z + - component: MainMemory + bindings: + - tensor: Z + rank: M + type: payload + format: default - tensor: Z rank: N - - - name: Sort + type: coord + format: default + - tensor: Z + rank: N + type: payload + format: default + - component: SortHW bindings: - - tensor: T - init_ranks: [M, K, N] - swap_depth: 1 - - - name: Compute + - tensor: T1 + init-ranks: [M, K, N] + final-ranks: [M, N, K] + - component: RegFile bindings: - - einsum: T - op: mul - - einsum: Z - op: add - -format: - A: - M: - format: U - rhbits: 32 - pbits: 32 - K: - format: C - cbits: 32 - pbits: 64 - - B: - K: - format: U - rhbits: 32 - pbits: 32 - N: - format: C - cbits: 32 - pbits: 64 - - T: - M: - format: U - pbits: 32 - K: - format: C - pbits: 32 - N: - format: C - cbits: 32 - pbits: 64 - - Z: - M: - format: U - rhbits: 32 - pbits: 32 - N: - format: C - cbits: 32 - pbits: 64 + - tensor: Z + rank: M + type: payload + format: default + evict-on: root + - tensor: Z + rank: N + type: coord + format: default + evict-on: M + - tensor: Z + rank: N + type: payload + format: default + evict-on: M + - component: FPAdd + bindings: + - op: add diff --git a/tests/integration/sigma.yaml b/tests/integration/sigma.yaml new file mode 100644 index 0000000..8456032 --- /dev/null +++ b/tests/integration/sigma.yaml @@ -0,0 +1,117 @@ +einsum: + declaration: + A: [K, M] + B: [K, N] + Z: [M, N] + expressions: + - Z[m, n] = A[k, m] * B[k, n] +mapping: + rank-order: + A: [K, M] + B: [K, N] + Z: [M, N] + partitioning: + Z: + K: + - uniform_shape(128) + (M, K0): + - flatten() + MK0: + - uniform_occupancy(A.16384) + loop-order: + Z: [K1, MK01, N, MK00] + spacetime: + Z: + space: [MK00] + time: [K1, MK01, N] +architecture: + Accelerator: + - name: System + attributes: + clock_frequency: 500000000 # 500 MHz = 500000000 Hz + local: + - name: MainMemory + class: DRAM + attributes: + bandwidth: 8796093022208 # 1024 GB/s * 2^30 B/GB * 8 bits/B + subtree: + - name: Chip + local: + - name: DataSRAMBanks + class: Buffet + attributes: + width: 32 # Not specified; minimum possible + depth: 8388608 # 32MB / 4B/line = 8388608 lines + bandwidth: 8246337208320 # 960 GB/s * 2^30 B/GB * 8 bits/B + subtree: + - name: FlexDPE[0..127] # 128 FlexDPEs + subtree: + - name: PE[0..16383] # 128 PEs per FlexDPE + local: + - name: RegFile + class: Buffet + attributes: + width: 4096 # Distribution network width: 128 * 32 bits + depth: 256 # 128 * 128 PEs * 32 bits * 2 inputs / width + - name: Multiplier + class: Compute + attributes: + type: mul +format: + A: + flattened: + rank-order: [K1, MK01, MK00] + K1: + format: U + MK01: + format: U + MK00: + # TODO: Support B format + format: C + pbits: 32 + B: + partitioned: + rank-order: [K1, N, K0] + K1: + format: U + N: + format: U + K0: + format: U + pbits: 32 + +bindings: + Z: + - config: Accelerator + prefix: tmp/sigma + - component: DataSRAMBanks + bindings: + - tensor: A + rank: MK00 + type: payload + evict-on: root + format: flattened + style: eager + - tensor: B + rank: K0 + type: payload + evict-on: root + format: partitioned + style: eager + - component: RegFile + bindings: + - tensor: A + rank: MK00 + format: flattened + type: payload + evict-on: MK01 + style: eager + - tensor: B + rank: K0 + format: partitioned + type: payload + evict-on: N + style: eager + - component: Multiplier + bindings: + - op: mul diff --git a/tests/integration/test_arch.yaml b/tests/integration/test_arch.yaml index 1eead4c..a29d1bb 100644 --- a/tests/integration/test_arch.yaml +++ b/tests/integration/test_arch.yaml @@ -1,22 +1,53 @@ - architecture: - subtree: - - name: System +architecture: + Config0: + - name: System + attributes: + clock_frequency: 1000000000 + + local: + - name: Memory + class: DRAM attributes: - clock_frequency: 1000000000 + datawidth: 8 + bandwidth: 128 + + subtree: + - name: PE[0..7] local: - - name: Memory - class: DRAM + - name: Registers + class: Buffet + + - name: MAC + class: compute attributes: - datawidth: 8 - bandwidth: 128 + type: mul - subtree: - - name: PE[0..7] + Config1: + - name: System + attributes: + clock_frequency: 1000000000 - local: - - name: Registers - class: Buffet + local: + - name: Memory + class: DRAM + attributes: + datawidth: 8 + bandwidth: 128 - - name: MAC - class: compute + subtree: + - name: PE[0..7] + + local: + - name: Registers + class: Buffet + + - name: MAC0 + class: compute + attributes: + type: mul + + - name: MAC1 + class: compute + attributes: + type: add diff --git a/tests/integration/test_bindings.yaml b/tests/integration/test_bindings.yaml index abb98c7..6e5a254 100644 --- a/tests/integration/test_bindings.yaml +++ b/tests/integration/test_bindings.yaml @@ -1,19 +1,34 @@ bindings: - - name: Memory + Z: + - config: Config0 + prefix: tmp/Z + - component: Memory bindings: - tensor: A - rank: root + format: A_default + rank: M + type: payload + - tensor: Z - rank: root + format: Z_default + rank: M + type: payload - - name: Registers + - component: Registers bindings: - tensor: A + format: A_default rank: M + type: payload + style: eager + evict-on: M + - tensor: Z + format: Z_default rank: M + type: payload + evict-on: root - - name: MAC + - component: MAC bindings: - - einsum: Z - op: add + - op: add diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py index 2ae362f..6059872 100644 --- a/tests/integration/test_integration.py +++ b/tests/integration/test_integration.py @@ -41,4 +41,6 @@ def test_integration(): output = str(HiFiber(einsum, mapping)) hifiber = read_hifiber(filename + ".py") + if output != hifiber: + print(output) assert output == hifiber, test_name + " integration test failed!" \ No newline at end of file diff --git a/tests/ir/test_component.py b/tests/ir/test_component.py index 89eb55b..1ee82a9 100644 --- a/tests/ir/test_component.py +++ b/tests/ir/test_component.py @@ -1,95 +1,566 @@ +import pytest + from teaal.ir.component import * def test_component_get_name(): - component = Component("Test", {}, []) + component = Component("Test", 1, {}, {}) assert component.get_name() == "Test" +def test_component_get_num_instances(): + component = Component("Test", 5, {}, {}) + assert component.get_num_instances() == 5 + + def test_component_eq(): - component0 = Component("Test", {"attr0": 5}, []) - component1 = Component("Test", {"attr0": 5}, []) + component0 = Component("Test", 1, {"attr0": 5}, {}) + component1 = Component("Test", 1, {"attr0": 5}, {}) assert component0 == component1 assert component0 != "foo" +def test_component_hash(): + set_ = set() + set_.add(Component("foo", 1, {}, {"Z": [{"foo": "bar"}]})) + + assert Component("foo", 1, {}, {"Z": [{"foo": "bar"}]}) in set_ + assert "" not in set_ + + def test_component_repr(): - component = Component("Test", {"attrs0": 5}, []) - assert repr(component) == "(Component, Test, {'attrs0': 5}, {})" + component = Component("Test", 1, {"attrs0": 5}, {"Z": [{"foo": "bar"}]}) + assert repr(component) == "(Component, Test, 1, {'Z': [{'foo': 'bar'}]})" -def test_component_subclass_repr(): - bindings = [{"einsum": "Z", "op": "add"}, {"einsum": "Z", "op": "mul"}] - compute = ComputeComponent("MAC", {}, bindings) +def test_component_get_bindings(): + component = Component("Test", 1, {"attrs0": 5}, {"Z": [{"foo": "bar"}]}) - assert repr( - compute) == "(ComputeComponent, MAC, {}, {'Z': [{'op': 'add'}, {'op': 'mul'}]})" + assert component.get_bindings() == {"Z": [{"foo": "bar"}]} -def test_compute_component(): - bindings = [{"einsum": "Z", "op": "add"}, {"einsum": "Z", "op": "mul"}] - compute = ComputeComponent("MAC", {}, bindings) +def test_functional_component(): + bindings = {"Z": [{"op": "add"}], "T": [{"op": "mul"}]} + compute = FunctionalComponent("MAC", 1, {}, bindings) + + assert compute.get_bindings() == bindings + + assert repr(compute) in { + "(FunctionalComponent, MAC, 1, {'T': [{'op': 'mul'}], 'Z': [{'op': 'add'}]})", + "(FunctionalComponent, MAC, 1, {'Z': [{'op': 'add'}], 'T': [{'op': 'mul'}]})"} + + +def test_memory_attr_errs(): + with pytest.raises(ValueError) as excinfo: + MemoryComponent("Mem", 1, {"bandwidth": "foo"}, {}) + assert str(excinfo.value) == "Bad bandwidth foo for Memory Mem" + + memory = MemoryComponent("Mem", 1, {}, {}) + with pytest.raises(ValueError) as excinfo: + memory.get_bandwidth() + assert str(excinfo.value) == "Bandwidth unspecified for component Mem" + + +def test_memory_binding_errs(): + binding = {"Z": [{"rank": "M", "type": "elem", "format": "default"}]} + with pytest.raises(ValueError) as excinfo: + MemoryComponent("Mem", 1, {"bandwidth": 256}, binding) + assert str( + excinfo.value) == "Tensor not specified for Einsum Z in binding to Mem" + + binding = {"Z": [{"tensor": "A", "type": "elem", "format": "default"}]} + with pytest.raises(ValueError) as excinfo: + MemoryComponent("Mem", 1, {"bandwidth": 256}, binding) + assert str( + excinfo.value) == "Rank not specified for tensor A in Einsum Z in binding to Mem" + + binding = {"Z": [{"tensor": "A", "rank": "M", "format": "default"}]} + with pytest.raises(ValueError) as excinfo: + MemoryComponent("Mem", 1, {"bandwidth": 256}, binding) + assert str( + excinfo.value) == "Type not specified for tensor A in Einsum Z in binding to Mem" - assert compute.get_bindings("Z") == [{"op": "add"}, {"op": "mul"}] - assert compute.get_bindings("T") == [] + binding = {"Z": [{"tensor": "A", "rank": "M", + "type": "foo", "format": "default"}]} + with pytest.raises(ValueError) as excinfo: + MemoryComponent("Mem", 1, {"bandwidth": 256}, binding) + assert str( + excinfo.value) in { + "Type foo for Mem on tensor A in Einsum Z not one of {'coord', 'elem', 'payload'}", + "Type foo for Mem on tensor A in Einsum Z not one of {'coord', 'payload', 'elem'}", + "Type foo for Mem on tensor A in Einsum Z not one of {'payload', 'coord', 'elem'}", + "Type foo for Mem on tensor A in Einsum Z not one of {'payload', 'elem', 'coord'}", + "Type foo for Mem on tensor A in Einsum Z not one of {'elem', 'coord', 'payload'}", + "Type foo for Mem on tensor A in Einsum Z not one of {'elem', 'payload', 'coord'}"} + + binding = {"Z": [{"tensor": "A", "rank": "M", "type": "elem"}]} + with pytest.raises(ValueError) as excinfo: + MemoryComponent("Mem", 1, {"bandwidth": 256}, binding) + assert str( + excinfo.value) == "Format not specified for tensor A in Einsum Z in binding to Mem" + + bindings = {"Z": [{"tensor": "A", "rank": "M", "type": "payload", "format": "default"}, + {"tensor": "A", "rank": "M", "type": "payload", "format": "default"}]} + memory = MemoryComponent("Memory", 1, {"bandwidth": 256}, bindings) + with pytest.raises(ValueError) as excinfo: + memory.get_binding("Z", "A", "M", "payload", "default") + assert str( + excinfo.value) == "Multiple bindings for [('einsum', 'Z'), ('tensor', 'A'), ('rank', 'M'), ('type', 'payload'), ('format', 'default')]" def test_memory_component(): - memory = MemoryComponent("Memory", {}, [{"tensor": "A", "rank": "M"}]) + bindings = {"Z": [{"tensor": "A", "rank": "M", + "type": "payload", "format": "default"}]} + memory = MemoryComponent("Memory", 1, {"bandwidth": 256}, bindings) + + assert memory.get_bandwidth() == 256 + + assert memory.get_binding( + "Z", + "A", + "M", + "payload", + "default") == bindings["Z"][0] + assert memory.get_binding("Z", "B", "M", "payload", "default") is None + assert memory.get_binding("T", "A", "M", "payload", "default") is None + + assert repr( + memory) == "(MemoryComponent, Memory, 1, {'Z': [{'tensor': 'A', 'rank': 'M', 'type': 'payload', 'format': 'default'}]}, 256)" + + +def test_buffer_attr_errs(): + buffer_ = BufferComponent("Buf", 1, {"width": 8}, {}) + with pytest.raises(ValueError) as excinfo: + buffer_.get_depth() + assert str(excinfo.value) == "Depth unspecified for component Buf" - assert memory.get_binding("A") == "M" - assert memory.get_binding("B") is None + with pytest.raises(ValueError) as excinfo: + BufferComponent("Buf", 1, {"depth": "foo", "width": 8}, {}) + assert str(excinfo.value) == "Bad depth foo for Buffer Buf" - assert repr(memory) == "(MemoryComponent, Memory, {}, {'A': 'M'})" + buffer_ = BufferComponent("Buf", 1, {"depth": 256}, {}) + with pytest.raises(ValueError) as excinfo: + buffer_.get_width() + assert str(excinfo.value) == "Width unspecified for component Buf" + + with pytest.raises(ValueError) as excinfo: + BufferComponent("Buf", 1, {"depth": 256, "width": "foo"}, {}) + assert str(excinfo.value) == "Bad width foo for Buffer Buf" + + +def test_buffer_component(): + attrs = {"width": 8, "depth": 3 * 2 ** 20} + buffer_ = BufferComponent("Buf", 1, attrs, {}) + + assert buffer_.get_width() == 8 + assert buffer_.get_depth() == 3 * 2 ** 20 + + assert repr(buffer_) == "(BufferComponent, Buf, 1, {}, None, 3145728, 8)" + + +def test_buffet_binding_errs(): + attrs = {"width": 8, "depth": 3 * 2 ** 20} + + bindings = {"Z": [{"tensor": "A", "rank": "M", + "type": "payload", "format": "default", "style": "foo"}]} + with pytest.raises(ValueError) as excinfo: + BuffetComponent("LLB", 1, attrs, bindings) + assert str( + excinfo.value) == "Evict-on not specified for tensor A in Einsum Z in binding to LLB" + + bindings = {"Z": [{"tensor": "A", + "rank": "M", + "type": "payload", + "format": "default", + "style": "foo", + "evict-on": "root"}]} + with pytest.raises(ValueError) as excinfo: + BuffetComponent("LLB", 1, attrs, bindings) + assert str( + excinfo.value) in { + "Style foo for LLB on tensor A in Einsum Z not one of {'eager', 'lazy'}", + "Style foo for LLB on tensor A in Einsum Z not one of {'lazy', 'eager'}"} def test_buffet_component(): - bindings = [{"tensor": "A", "rank": "M"}] - buffet = BuffetComponent("LLB", {}, bindings) + attrs = {"width": 8, "depth": 3 * 2 ** 20} + bindings = {"Z": [{"tensor": "A", + "rank": "M", + "type": "payload", + "format": "default", + "style": "eager", + "evict-on": "root"}]} + buffet = BuffetComponent("LLB", 1, attrs, bindings) + + assert buffet.get_binding( + "Z", + "A", + "M", + "payload", + "default") == bindings["Z"][0] + + bindings = {"Z": [{"tensor": "A", + "rank": "M", + "type": "payload", + "format": "default", + "evict-on": "root"}]} + buffet = BuffetComponent("LLB", 1, attrs, bindings) + + bindings_corr = {"tensor": "A", + "rank": "M", + "type": "payload", + "format": "default", + "style": "lazy", + "evict-on": "root"} + + assert buffet.get_binding( + "Z", + "A", + "M", + "payload", + "default") == bindings_corr + + +def test_buffet_component_expand_eager(): + attrs = {"width": 8, "depth": 3 * 2 ** 20} + bindings = {"Z": [{"tensor": "A", + "rank": "M", + "type": "coord", + "format": "default", + "style": "eager", + "evict-on": "N"}]} + buffet = BuffetComponent("LLB", 1, attrs, bindings) + + ranks = ["J", "M", "K", "O"] + types = [["elem"], ["coord", "payload"], ["coord", "payload"], ["elem"]] + buffet.expand_eager("Z", "A", "default", ranks, types) + + expanded_bindings = {"Z": [{"tensor": "A", + "rank": "M", + "type": "coord", + "format": "default", + "style": "eager", + "evict-on": "N", + "root": "M"}, + {"tensor": "A", + "rank": "M", + "type": "payload", + "format": "default", + "style": "eager", + "evict-on": "N", + "root": "M"}, + {"tensor": "A", + "rank": "K", + "type": "coord", + "format": "default", + "style": "eager", + "evict-on": "N", + "root": "M"}, + {"tensor": "A", + "rank": "K", + "type": "payload", + "format": "default", + "style": "eager", + "evict-on": "N", + "root": "M"}, + {"tensor": "A", + "rank": "O", + "type": "elem", + "format": "default", + "style": "eager", + "evict-on": "N", + "root": "M"}]} + assert buffet.get_bindings() == expanded_bindings + + buffet.expand_eager("Z", "B", "default", ["N", "K"], [[], []]) + assert buffet.get_bindings() == expanded_bindings + + ranks = ["J", "M", "K", "O"] + types = [["elem"], ["coord", "payload"], ["coord", "payload"], ["elem"]] + buffet.expand_eager("Z", "A", "foo", ranks, types) + + assert buffet.get_bindings() == expanded_bindings def test_cache_component(): attrs = {"width": 8, "depth": 3 * 2 ** 20} - bindings = [{"tensor": "A", "rank": "M"}] - cache = CacheComponent("FiberCache", attrs, bindings) + bindings = {"Z": [{"tensor": "A", "rank": "M", + "type": "payload", "format": "default"}]} + cache = CacheComponent("FiberCache", 1, attrs, bindings) + + +def test_compute_attr_errs(): + with pytest.raises(ValueError) as excinfo: + ComputeComponent("FU", 1, {}, []) + assert str(excinfo.value) == "Type unspecified for component FU" + + with pytest.raises(ValueError) as excinfo: + ComputeComponent("FU", 1, {"type": None}, []) + assert str(excinfo.value) == "Bad type None for Compute FU" + + with pytest.raises(ValueError) as excinfo: + ComputeComponent("FU", 1, {"type": "foo"}, []) + assert str( + excinfo.value) in { + "foo is not a valid value for attribute type of class Compute. Choose one of {'mul', 'add'}", + "foo is not a valid value for attribute type of class Compute. Choose one of {'add', 'mul'}"} - assert cache.get_depth() == 3 * 2 ** 20 - assert cache.get_width() == 8 + +def test_compute_component(): + attrs = {"type": "mul"} + compute = ComputeComponent("FU", 1, attrs, {}) + + assert compute.get_type() == "mul" + + assert repr(compute) == "(ComputeComponent, FU, 1, {}, mul)" def test_dram_component(): - bindings = [{"tensor": "A", "rank": "M"}] - dram = DRAMComponent("DRAM", {"datawidth": 8, "bandwidth": 128}, bindings) + bindings = {"Z": [{"tensor": "A", "rank": "M", + "type": "payload", "format": "default"}]} + dram = DRAMComponent( + "DRAM", 1, { + "datawidth": 8, "bandwidth": 128}, bindings) + + +def test_intersector_component_binding_errs(): + bindings = {"Z": [{"foo": "bar"}]} + with pytest.raises(ValueError) as excinfo: + IntersectorComponent("Intersection", 1, {}, bindings) + assert str( + excinfo.value) == "Rank unspecified in Einsum Z in binding to Intersection" + + +def test_intersector_component(): + bindings = {"Z": [{"rank": "K"}]} + intersector = IntersectorComponent("Intersection", 1, {}, bindings) - assert dram.get_bandwidth() == 128 - assert dram.get_datawidth() == 8 + +def test_leader_follower_component_binding_errs(): + bindings = {"Z": [{"rank": "K"}]} + with pytest.raises(ValueError) as excinfo: + LeaderFollowerComponent("Intersection", 1, {}, bindings) + assert str( + excinfo.value) == "Leader unspecified in Einsum Z in binding to Intersection" def test_leader_follower_component(): - bindings = [{"einsum": "Z", "rank": "K"}] - leader_follower = LeaderFollowerComponent("Intersection", {}, bindings) + bindings = {"Z": [{"rank": "K", "leader": "A"}]} + leader_follower = LeaderFollowerComponent("Intersection", 1, {}, bindings) + + +def test_merger_attr_errs(): + attrs = { + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger0", 1, attrs, []) + assert str(excinfo.value) == "Inputs unspecified for component Merger0" + + attrs = { + "inputs": "foo", + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str(excinfo.value) == "Bad inputs foo for Merger Merger1" + + attrs = {"inputs": 64, "outputs": 2, "order": "opt", "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger0", 1, attrs, []) + assert str( + excinfo.value) == "Comparator radix unspecified for component Merger0" + + attrs = { + "inputs": 64, + "comparator_radix": "foo", + "outputs": 2, + "order": "opt", + "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str(excinfo.value) == "Bad comparator_radix foo for Merger Merger1" + + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": "foo", + "order": "opt", + "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str(excinfo.value) == "Bad outputs foo for Merger Merger1" + + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": None, + "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str(excinfo.value) == "Bad order None for Merger Merger1" + + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": "foo", + "reduce": False} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str( + excinfo.value) in { + "foo is not a valid value for attribute order of class Merger. Choose one of {'opt', 'fifo'}", + "foo is not a valid value for attribute order of class Merger. Choose one of {'fifo', 'opt'}"} + + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": 2} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str(excinfo.value) == "Bad reduce 2 for Merger Merger1" + + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": True} + with pytest.raises(NotImplementedError) as excinfo: + MergerComponent("Merger1", 1, attrs, []) + assert str(excinfo.value) == "Concurrent merge and reduction not supported" + + +def test_merger_binding_errs(): + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": False} + binding = { + "Z": [{"init-ranks": ["M", "K", "N"], "final-ranks": ["M", "N", "K"]}]} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, binding) + assert str( + excinfo.value) == "Tensor not specified for Einsum Z in binding to Merger1" + + binding = {"Z": [{"tensor": "T", "final-ranks": ["M", "N", "K"]}]} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, binding) + assert str( + excinfo.value) == "Initial ranks not specified for tensor T in Einsum Z in binding to Merger1" + + binding = {"Z": [{"tensor": "T", "init-ranks": ["M", "N", "K"]}]} + with pytest.raises(ValueError) as excinfo: + MergerComponent("Merger1", 1, attrs, binding) + assert str( + excinfo.value) == "Final ranks not specified for tensor T in Einsum Z in binding to Merger1" + + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": False} + binding = {"Z": [{"tensor": "T", + "init-ranks": ["M", + "K", + "N"], + "final-ranks": ["M", + "N", + "K"]}, + {"tensor": "T", + "init-ranks": ["K", + "M", + "N"], + "final-ranks": ["M", + "N", + "K"]}]} + merger = MergerComponent("Merger1", 1, attrs, binding) + with pytest.raises(ValueError) as excinfo: + merger.get_init_ranks("Z", "T", ["M", "N", "K"]) + assert str( + excinfo.value) == "Merge binding from both ['M', 'K', 'N'] and ['K', 'M', 'N'] to ['M', 'N', 'K']" def test_merger_component(): - attrs = {"radix": 64, "next_latency": 1} - binding = [{"tensor": "T", "init_ranks": ["M", "K", "N"], "swap_depth": 1}] - merger = MergerComponent("HighRadixMerger", attrs, binding) - - bindings = [{"tensor": "T", "init_ranks": [ - "M", "K", "N"], "final_ranks": ["M", "N", "K"], "swap_depth": 1}] + attrs = { + "inputs": 64, + "comparator_radix": 32, + "outputs": 2, + "order": "opt", + "reduce": False} + binding = {"Z": [{"tensor": "T", "init-ranks": [ + "M", "K", "N"], "final-ranks": ["M", "N", "K"]}]} + merger = MergerComponent("Merger0", 1, attrs, binding) + + bindings = {"Z": [{"tensor": "T", "init-ranks": [ + "M", "K", "N"], "final-ranks": ["M", "N", "K"]}]} assert merger.get_bindings() == bindings - assert merger.get_next_latency() == 1 - assert merger.get_radix() == 64 + assert merger.get_inputs() == 64 + assert merger.get_comparator_radix() == 32 + assert merger.get_outputs() == 2 + assert merger.get_order() == "opt" + assert merger.get_reduce() == False + + assert merger.get_init_ranks("Z", "T", ["M", "N", "K"]) == ["M", "K", "N"] + assert merger.get_init_ranks("T", "T", ["M", "K", "N"]) is None + assert merger.get_init_ranks("Z", "A", ["M", "K"]) is None + + assert repr( + merger) == "(MergerComponent, Merger0, 1, {'Z': [{'tensor': 'T', 'init-ranks': ['M', 'K', 'N'], 'final-ranks': ['M', 'N', 'K']}]}, 64, 32, 2, opt, False)" + + attrs = {"inputs": 200, "comparator_radix": 2} + merger = MergerComponent("Merger1", 1, attrs, binding) + + assert merger.get_inputs() == 200 + assert merger.get_comparator_radix() == 2 + assert merger.get_outputs() == 1 + assert merger.get_order() == "fifo" + assert merger.get_reduce() == False + - merger = MergerComponent( - "Sort", {"radix": "inf", "next_latency": "N"}, binding) +def test_sequencer_component_no_num_ranks(): + with pytest.raises(ValueError) as excinfo: + SequencerComponent("Seq", 1, {}, {}) - assert merger.get_next_latency() == "N" - assert merger.get_radix() == float("inf") + assert str(excinfo.value) == "Number of ranks unspecified for sequencer Seq" + + +def test_sequencer_component_too_many_ranks(): + attrs = {"num_ranks": 1} + bindings = {"Z": [{"rank": "K"}, {"rank": "M"}]} + with pytest.raises(ValueError) as excinfo: + SequencerComponent("Seq", 1, attrs, bindings) + + assert str( + excinfo.value) == "Too many ranks bound to sequencer Seq during Einsum Z" + + +def test_sequencer_component(): + attrs = {"num_ranks": 2} + bindings = {"Z": [{"rank": "K"}, {"rank": "M"}]} + sequencer = SequencerComponent("Seq", 1, attrs, bindings) + + assert sequencer.get_ranks("Z") == ["K", "M"] def test_skip_ahead_component(): - bindings = [{"einsum": "Z", "rank": "K2"}] - skip_ahead = SkipAheadComponent("K2Intersection", {}, bindings) + bindings = {"Z": [{"rank": "K2"}]} + skip_ahead = SkipAheadComponent("K2Intersection", 1, {}, bindings) + + +def test_two_finger_intersector(): + bindings = {"Z": [{"rank": "K2"}]} + skip_ahead = TwoFingerComponent("K2Intersection", 1, {}, bindings) diff --git a/tests/ir/test_equation.py b/tests/ir/test_equation.py index a3961fa..d0801c2 100644 --- a/tests/ir/test_equation.py +++ b/tests/ir/test_equation.py @@ -70,13 +70,12 @@ def test_repeated_tensor(): def test_get_factor_order(): equation = create_complex() assert equation.get_factor_order() == { - "A": ( - 0, 1), "C": ( - 0, 2), "d": ( - 0, 0), "B": ( - 1, 0), "T": ( - 1, 1), "e": ( - 1, 2)} + "A": (0, 1), + "C": (0, 2), + "d": (0, 0), + "B": (1, 0), + "T": (1, 1), + "e": (1, 2)} def test_get_in_update(): @@ -85,6 +84,19 @@ def test_get_in_update(): False, False, True]] +def test_get_iter(): + equation = create_complex() + A = equation.get_tensor("A") + B = equation.get_tensor("B") + C = equation.get_tensor("C") + T = equation.get_tensor("T") + Z = equation.get_tensor("Z") + + assert equation.get_iter([A, B]) == (None, [[A], [B]]) + assert equation.get_iter([C, T, B, Z]) == (Z, [[C], [B, T]]) + assert equation.get_iter([A, C]) == (None, [[A, C]]) + + def test_get_output(): equation = create_matmul() tensor = equation.get_output() diff --git a/tests/ir/test_flow_graph.py b/tests/ir/test_flow_graph.py index e80d647..c0295a2 100644 --- a/tests/ir/test_flow_graph.py +++ b/tests/ir/test_flow_graph.py @@ -10,6 +10,24 @@ from teaal.parse import * +def print_errs(graph, corr): + print("In Graph") + for edge in graph.edges: + if edge not in corr.edges: + print(" corr.add_edge", end="(") + print(type(edge[0]).__name__, end="(") + print(str(list(edge[0]._Node__key()))[1:-1], end="), ") + print(type(edge[1]).__name__, end="(") + print(str(list(edge[1]._Node__key()))[1:-1], end="))\n") + + print("In Corr") + for edge in corr.edges: + if edge not in graph.edges: + print(edge) + + print("---") + + def build_program_no_loops(): einsum = Einsum.from_file("tests/integration/test_translate_no_loops.yaml") mapping = Mapping.from_file( @@ -65,7 +83,41 @@ def build_gamma(): arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + hardware = Hardware(arch, bindings, program) + + format_ = Format.from_str(yaml) + + return program, hardware, format_ + + +def build_extensor(): + with open("tests/integration/extensor.yaml", "r") as f: + yaml = f.read() + + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + hardware = Hardware(arch, bindings, program) + + format_ = Format.from_str(yaml) + + return program, hardware, format_ + + +def build_extensor_energy(): + with open("tests/integration/extensor-energy.yaml", "r") as f: + yaml = f.read() + + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + hardware = Hardware(arch, bindings, program) format_ = Format.from_str(yaml) @@ -83,6 +135,8 @@ def test_graph_no_loops(): corr.add_edge(GetRootNode("A", []), OtherNode("Body")) corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -102,7 +156,10 @@ def test_graph(): corr.add_edge(LoopNode("N"), LoopNode("K")) corr.add_edge(LoopNode("N"), OtherNode("Body")) corr.add_edge(LoopNode("K"), OtherNode("Body")) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K")) + corr.add_edge(EndLoopNode("K"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), OtherNode("Footer")) corr.add_edge(SwizzleNode( "A", ["M", "K"], "loop-order"), GetRootNode("A", ["M", "K"])) corr.add_edge(SwizzleNode( @@ -112,6 +169,8 @@ def test_graph(): corr.add_edge(SwizzleNode( "B", ["N", "K"], "loop-order"), OtherNode("Graphics")) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -135,7 +194,10 @@ def test_graph_loop_order(): corr.add_edge(LoopNode("M"), LoopNode("N")) corr.add_edge(LoopNode("M"), OtherNode("Body")) corr.add_edge(LoopNode("N"), OtherNode("Body")) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), EndLoopNode("K")) + corr.add_edge(EndLoopNode("K"), OtherNode("Footer")) corr.add_edge(SwizzleNode( "A", ["K", "M"], "loop-order"), GetRootNode("A", ["K", "M"])) corr.add_edge(SwizzleNode( @@ -145,6 +207,8 @@ def test_graph_loop_order(): corr.add_edge(SwizzleNode( "B", ["K", "N"], "loop-order"), OtherNode("Graphics")) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -175,7 +239,13 @@ def test_graph_static_parts(): corr.add_edge(OtherNode("Graphics"), LoopNode("K2")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['M', 'N1', 'N0'])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K0")) + corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0")) + corr.add_edge(EndLoopNode("N0"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1")) + corr.add_edge(EndLoopNode("N1"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), EndLoopNode("K2")) + corr.add_edge(EndLoopNode("K2"), OtherNode("Footer")) corr.add_edge(GetRootNode("Z", ['M', 'N1', 'N0']), LoopNode("M")) corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics")) corr.add_edge( @@ -220,6 +290,9 @@ def test_graph_static_parts(): SwizzleNode( "B", [ "K2", "N1", "K1", "N0", "K0"], "loop-order"), OtherNode("Graphics")) + + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -255,7 +328,13 @@ def test_graph_dyn_parts(): corr.add_edge(OtherNode("Graphics"), LoopNode("K2")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['M', 'N1', 'N0'])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K0")) + corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0")) + corr.add_edge(EndLoopNode("N0"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1")) + corr.add_edge(EndLoopNode("N1"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), EndLoopNode("K2")) + corr.add_edge(EndLoopNode("K2"), OtherNode("Footer")) corr.add_edge(GetRootNode("Z", ['M', 'N1', 'N0']), LoopNode("M")) corr.add_edge( PartNode( @@ -344,6 +423,8 @@ def test_graph_dyn_parts(): "B", [ "K1", "N0", "K0"])) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -376,7 +457,13 @@ def test_graph_mixed_parts(): corr.add_edge(OtherNode("Graphics"), LoopNode("K3")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) corr.add_edge(OtherNode("Output"), GetRootNode("Z", ["M", "N"])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K0")) + corr.add_edge(EndLoopNode("K0"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), EndLoopNode("K2")) + corr.add_edge(EndLoopNode("K2"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), EndLoopNode("K3")) + corr.add_edge(EndLoopNode("K3"), OtherNode("Footer")) corr.add_edge(GetRootNode("Z", ["M", "N"]), LoopNode("M")) corr.add_edge(PartNode("A", ("K",)), OtherNode("Graphics")) corr.add_edge(PartNode("A", ("K",)), PartNode("A", ("K2I",))) @@ -471,6 +558,8 @@ def test_graph_mixed_parts(): "B", [ "K1", "N", "K0"])) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -503,7 +592,11 @@ def test_graph_static_flattening(): corr.add_edge(OtherNode("Graphics"), LoopNode("K1")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['N', 'M'])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("MK00")) + corr.add_edge(EndLoopNode("MK00"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("MK01")) + corr.add_edge(EndLoopNode("MK01"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), OtherNode("Footer")) corr.add_edge(GetRootNode("Z", ['N', 'M']), LoopNode("N")) corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics")) corr.add_edge( @@ -556,6 +649,8 @@ def test_graph_static_flattening(): corr.add_edge(GetPayloadNode("Z", ['M']), OtherNode("Body")) corr.add_edge(GetPayloadNode("B", ['K0']), OtherNode("Body")) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -591,7 +686,12 @@ def test_graph_dyn_flattening(): corr.add_edge(OtherNode('Graphics'), LoopNode('M1')) corr.add_edge(OtherNode('Output'), OtherNode('Graphics')) corr.add_edge(OtherNode('Output'), GetRootNode('Z', ['M1', 'N', 'M0'])) - corr.add_edge(OtherNode('Body'), OtherNode('Footer')) + corr.add_edge(OtherNode("Body"), EndLoopNode("M0K00")) + corr.add_edge(EndLoopNode("M0K00"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("M0K01")) + corr.add_edge(EndLoopNode("M0K01"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), EndLoopNode("M1")) + corr.add_edge(EndLoopNode("M1"), OtherNode("Footer")) corr.add_edge(GetRootNode('Z', ['M1', 'N', 'M0']), LoopNode('M1')) corr.add_edge( PartNode( @@ -666,6 +766,8 @@ def test_graph_dyn_flattening(): corr.add_edge(GetPayloadNode('Z', ['M0']), OtherNode('Body')) corr.add_edge(GetPayloadNode('B', ['K0']), OtherNode('Body')) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -684,7 +786,9 @@ def test_graph_conv(): corr.add_edge(OtherNode("Graphics"), LoopNode("W")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) corr.add_edge(OtherNode("Output"), GetRootNode("O", ['Q'])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("Q")) + corr.add_edge(EndLoopNode("Q"), EndLoopNode("W")) + corr.add_edge(EndLoopNode("W"), OtherNode("Footer")) corr.add_edge(GetRootNode("O", ['Q']), LoopNode("Q")) corr.add_edge(GetRootNode("I", ['W']), LoopNode("W")) corr.add_edge(GetRootNode("F", ['S']), LoopNode("Q")) @@ -707,6 +811,8 @@ def test_graph_conv(): ["S"])) corr.add_edge(SwizzleNode("F", ["S"], "loop-order"), OtherNode("Graphics")) + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) @@ -734,7 +840,11 @@ def test_graph_conv_part(): corr.add_edge(OtherNode("Graphics"), LoopNode("Q2")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) corr.add_edge(OtherNode("Output"), GetRootNode("O", ["Q2", "Q1", "Q0"])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), EndLoopNode("Q0")) + corr.add_edge(EndLoopNode("Q0"), EndLoopNode("S")) + corr.add_edge(EndLoopNode("S"), EndLoopNode("Q1")) + corr.add_edge(EndLoopNode("Q1"), EndLoopNode("Q2")) + corr.add_edge(EndLoopNode("Q2"), OtherNode("Footer")) corr.add_edge(GetRootNode("O", ["Q2", "Q1", "Q0"]), LoopNode("Q2")) corr.add_edge(PartNode("I", ("W",)), OtherNode("Graphics")) corr.add_edge(PartNode("I", ("W",)), PartNode("I", ("W1I",))) @@ -786,10 +896,67 @@ def test_graph_conv_part(): "I", [ "W1", "W0"])) + print_errs(graph, corr) + + assert nx.is_isomorphic(graph, corr) + + +def test_graph_metrics_no_loops(): + yaml = """ + einsum: + declaration: + Z: [] + expressions: + - Z[] = a + architecture: + accel: + - name: empty + bindings: + Z: + - config: accel + prefix: tmp/Z + format: + Z: + default: + rank-order: [] + """ + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + hardware = Hardware(arch, bindings, program) + + format_ = Format.from_str(yaml) + + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + graph = FlowGraph(program, metrics, []).get_graph() + + corr = nx.DiGraph() + + corr.add_edge(OtherNode("Body"), OtherNode("Footer")) + corr.add_edge(OtherNode("Body"), MetricsNode("End")) + corr.add_edge(OtherNode("Graphics"), OtherNode("Body")) + corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) + corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) + corr.add_edge(OtherNode("Output"), GetRootNode("Z", [])) + corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + corr.add_edge(MetricsNode("Start"), OtherNode("Body")) + corr.add_edge(MetricsNode("End"), OtherNode("Footer")) + corr.add_edge(GetRootNode("Z", []), OtherNode("Body")) + corr.add_edge(OtherNode('Graphics'), MetricsNode('Body')) + corr.add_edge(MetricsNode('Start'), MetricsNode('Body')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsNode('Body'), MetricsNode('End')) + + print_errs(graph, corr) + assert nx.is_isomorphic(graph, corr) -def test_graph_metrics(): +def test_graph_metrics_T(): program, hardware, format_ = build_gamma() program.add_einsum(0) metrics = Metrics(program, hardware, format_) @@ -801,29 +968,590 @@ def test_graph_metrics(): corr.add_edge(LoopNode("K"), LoopNode("N")) corr.add_edge(LoopNode("K"), OtherNode("Body")) corr.add_edge(LoopNode("N"), OtherNode("Body")) + corr.add_edge(OtherNode("Body"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("K")) + corr.add_edge(EndLoopNode("K"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), OtherNode("Footer")) corr.add_edge(OtherNode("Graphics"), LoopNode("M")) corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) - corr.add_edge(OtherNode("Output"), GetRootNode("T", ["M", "K", "N"])) - corr.add_edge(OtherNode("Body"), OtherNode("Footer")) - corr.add_edge(OtherNode("Body"), MetricsNode("End")) + corr.add_edge(OtherNode("Output"), GetRootNode("T", ['M', 'K', 'N'])) corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) corr.add_edge(MetricsNode("Start"), LoopNode("M")) corr.add_edge(MetricsNode("End"), OtherNode("Footer")) - corr.add_edge(GetRootNode("T", ["M", "K", "N"]), LoopNode("M")) + corr.add_edge(GetRootNode("T", ['M', 'K', 'N']), LoopNode("M")) + corr.add_edge( + SwizzleNode("A", ['M', 'K'], "loop-order"), + GetRootNode("A", ['M', 'K'])) + corr.add_edge( + SwizzleNode("A", ['M', 'K'], "loop-order"), OtherNode("Graphics")) + corr.add_edge(GetRootNode("A", ['M', 'K']), LoopNode("M")) corr.add_edge(SwizzleNode( - "A", ["M", "K"], "loop-order"), GetRootNode("A", ["M", "K"])) + "B", ['K', 'N'], "loop-order"), GetRootNode("B", ['K', 'N'])) + corr.add_edge( + SwizzleNode("B", ['K', 'N'], "loop-order"), OtherNode("Graphics")) + corr.add_edge(GetRootNode("B", ['K', 'N']), LoopNode("K")) + corr.add_edge(LoopNode('M'), MetricsHeaderNode('K')) + corr.add_edge(LoopNode('K'), MetricsHeaderNode('N')) + corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('M')) + corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('M')) + corr.add_edge(MetricsHeaderNode('M'), LoopNode('M')) + corr.add_edge(MetricsHeaderNode('K'), LoopNode('K')) + corr.add_edge(MetricsHeaderNode('N'), LoopNode('N')) + corr.add_edge(LoopNode('N'), MetricsNode('Body')) + corr.add_edge(EndLoopNode('N'), MetricsFooterNode('N')) + corr.add_edge(EndLoopNode('K'), MetricsFooterNode('K')) + corr.add_edge(EndLoopNode('M'), MetricsFooterNode('M')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsFooterNode('N'), EndLoopNode('K')) + corr.add_edge(MetricsFooterNode('K'), EndLoopNode('M')) + corr.add_edge(MetricsFooterNode('M'), OtherNode('Footer')) + corr.add_edge(MetricsFooterNode('M'), MetricsNode('End')) + corr.add_edge(EndLoopNode("M"), MetricsNode("End")) + + print_errs(graph, corr) + + assert nx.is_isomorphic(graph, corr) + + +def test_graph_metrics_Z(): + program, hardware, format_ = build_gamma() + program.add_einsum(1) + metrics = Metrics(program, hardware, format_) + graph = FlowGraph(program, metrics, []).get_graph() + + corr = nx.DiGraph() + + corr.add_edge(LoopNode("M"), LoopNode("N")) + corr.add_edge(LoopNode("M"), LoopNode("K")) + corr.add_edge(LoopNode("N"), LoopNode("K")) + corr.add_edge(LoopNode("N"), OtherNode("Body")) + corr.add_edge(LoopNode("K"), OtherNode("Body")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K")) + corr.add_edge(EndLoopNode("K"), EndLoopNode("N")) + corr.add_edge(EndLoopNode("N"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), OtherNode("Footer")) + corr.add_edge(OtherNode("Graphics"), LoopNode("M")) + corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) + corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) + corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['M', 'N'])) + corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + corr.add_edge(MetricsNode("Start"), LoopNode("M")) + corr.add_edge(MetricsNode("End"), OtherNode("Footer")) + corr.add_edge(GetRootNode("Z", ['M', 'N']), LoopNode("M")) corr.add_edge(SwizzleNode( - "A", ["M", "K"], "loop-order"), OtherNode("Graphics")) - corr.add_edge(GetRootNode("A", ["M", "K"]), LoopNode("M")) + "T", ['M', 'N', 'K'], "loop-order"), GetRootNode("T", ['M', 'N', 'K'])) + corr.add_edge( + SwizzleNode( + "T", [ + 'M', 'N', 'K'], "loop-order"), OtherNode("Graphics")) + corr.add_edge(GetRootNode("T", ['M', 'N', 'K']), LoopNode("M")) + corr.add_edge( + SwizzleNode( + "T", [ + 'M', 'K', 'N'], "metrics"), SwizzleNode( + "T", [ + 'M', 'N', 'K'], "loop-order")) corr.add_edge(SwizzleNode( - "B", ["K", "N"], "loop-order"), GetRootNode("B", ["K", "N"])) + "A", ['M', 'K'], "loop-order"), GetRootNode("A", ['M', 'K'])) + corr.add_edge( + SwizzleNode( + "A", [ + 'M', 'K'], "loop-order"), OtherNode("Graphics")) + corr.add_edge(GetRootNode("A", ['M', 'K']), LoopNode("M")) + corr.add_edge(LoopNode('M'), MetricsHeaderNode('N')) + corr.add_edge(LoopNode('N'), MetricsHeaderNode('K')) + corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('M')) + corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('M')) + corr.add_edge(MetricsHeaderNode('M'), LoopNode('M')) + corr.add_edge(MetricsHeaderNode('N'), LoopNode('N')) + corr.add_edge(MetricsHeaderNode('K'), LoopNode('K')) + corr.add_edge(LoopNode('K'), MetricsNode('Body')) + corr.add_edge(EndLoopNode('K'), MetricsFooterNode('K')) + corr.add_edge(EndLoopNode('N'), MetricsFooterNode('N')) + corr.add_edge(EndLoopNode('M'), MetricsFooterNode('M')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsFooterNode('K'), EndLoopNode('N')) + corr.add_edge(MetricsFooterNode('N'), EndLoopNode('M')) + corr.add_edge(MetricsFooterNode('M'), OtherNode('Footer')) + corr.add_edge(MetricsFooterNode('M'), MetricsNode('End')) + corr.add_edge(EndLoopNode("M"), MetricsNode("End")) + + print_errs(graph, corr) + + assert nx.is_isomorphic(graph, corr) + + +def test_graph_metrics_extensor(): + program, hardware, format_ = build_extensor() + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + graph = FlowGraph(program, metrics, []).get_graph() + + corr = nx.DiGraph() + + corr.add_edge(LoopNode("N2"), LoopNode("K2")) + corr.add_edge(LoopNode("N2"), LoopNode("M2")) + corr.add_edge(LoopNode("K2"), LoopNode("M2")) + corr.add_edge(LoopNode("K2"), LoopNode("N1")) + corr.add_edge(LoopNode("M2"), LoopNode("M1")) + corr.add_edge(LoopNode("M1"), LoopNode("N1")) + corr.add_edge(LoopNode("M1"), LoopNode("K1")) + corr.add_edge(LoopNode("N1"), LoopNode("K1")) + corr.add_edge(LoopNode("N1"), LoopNode("M0")) + corr.add_edge(LoopNode("K1"), LoopNode("M0")) + corr.add_edge(LoopNode("K1"), LoopNode("N0")) + corr.add_edge(LoopNode("M0"), LoopNode("N0")) + corr.add_edge(LoopNode("M0"), LoopNode("K0")) + corr.add_edge(LoopNode("N0"), LoopNode("K0")) + corr.add_edge(LoopNode("N0"), OtherNode("Body")) + corr.add_edge(LoopNode("K0"), OtherNode("Body")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K0")) + corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0")) + corr.add_edge(EndLoopNode("N0"), EndLoopNode("M0")) + corr.add_edge(EndLoopNode("M0"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1")) + corr.add_edge(EndLoopNode("N1"), EndLoopNode("M1")) + corr.add_edge(EndLoopNode("M1"), EndLoopNode("M2")) + corr.add_edge(EndLoopNode("M2"), EndLoopNode("K2")) + corr.add_edge(EndLoopNode("K2"), EndLoopNode("N2")) + corr.add_edge(EndLoopNode("N2"), OtherNode("Footer")) + corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + corr.add_edge(OtherNode("Graphics"), LoopNode("N2")) + corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) + corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) + corr.add_edge(OtherNode("Output"), GetRootNode( + "Z", ['N2', 'M2', 'M1', 'N1', 'M0', 'N0'])) + corr.add_edge(MetricsNode("Start"), LoopNode("N2")) + corr.add_edge(MetricsNode("End"), OtherNode("Footer")) + corr.add_edge( + GetRootNode( + "Z", [ + 'N2', 'M2', 'M1', 'N1', 'M0', 'N0']), LoopNode("N2")) + corr.add_edge(PartNode("A", ('M',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "A", ('M',)), SwizzleNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order")) + corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "A", ('K',)), SwizzleNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order")) + corr.add_edge(SwizzleNode("A", + ['K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"), + GetRootNode("A", ['K2', 'M2', 'M1', 'K1', 'M0', 'K0'])) + corr.add_edge( + SwizzleNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"), OtherNode("Graphics")) + corr.add_edge( + GetRootNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0']), LoopNode("K2")) + corr.add_edge(PartNode("B", ('K',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "B", ('K',)), SwizzleNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order")) + corr.add_edge(PartNode("B", ('N',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "B", ('N',)), SwizzleNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order")) + corr.add_edge(SwizzleNode("B", + ['N2', 'K2', 'N1', 'K1', 'N0', 'K0'], + "loop-order"), + GetRootNode("B", + ['N2', 'K2', 'N1', 'K1', 'N0', 'K0'])) + corr.add_edge( + SwizzleNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"), OtherNode("Graphics")) + corr.add_edge( + GetRootNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0']), LoopNode("N2")) + corr.add_edge(LoopNode('N2'), MetricsHeaderNode('K2')) + corr.add_edge(LoopNode('K2'), MetricsHeaderNode('M2')) + corr.add_edge(LoopNode('M2'), MetricsHeaderNode('M1')) + corr.add_edge(LoopNode('M1'), MetricsHeaderNode('N1')) + corr.add_edge(LoopNode('N1'), MetricsHeaderNode('K1')) + corr.add_edge(LoopNode('K1'), MetricsHeaderNode('M0')) + corr.add_edge(LoopNode('M0'), MetricsHeaderNode('N0')) + corr.add_edge(LoopNode('N0'), MetricsHeaderNode('K0')) + corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('N2')) + corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('N2')) + corr.add_edge(MetricsHeaderNode('N2'), LoopNode('N2')) + corr.add_edge(MetricsHeaderNode('K2'), LoopNode('K2')) + corr.add_edge(MetricsHeaderNode('M2'), LoopNode('M2')) + corr.add_edge(MetricsHeaderNode('M1'), LoopNode('M1')) + corr.add_edge(MetricsHeaderNode('N1'), LoopNode('N1')) + corr.add_edge(MetricsHeaderNode('K1'), LoopNode('K1')) + corr.add_edge(MetricsHeaderNode('M0'), LoopNode('M0')) + corr.add_edge(MetricsHeaderNode('N0'), LoopNode('N0')) + corr.add_edge(MetricsHeaderNode('K0'), LoopNode('K0')) + corr.add_edge(LoopNode('K0'), MetricsNode('Body')) + corr.add_edge(EndLoopNode('K0'), MetricsFooterNode('K0')) + corr.add_edge(EndLoopNode('N0'), MetricsFooterNode('N0')) + corr.add_edge(EndLoopNode('M0'), MetricsFooterNode('M0')) + corr.add_edge(EndLoopNode('K1'), MetricsFooterNode('K1')) + corr.add_edge(EndLoopNode('N1'), MetricsFooterNode('N1')) + corr.add_edge(EndLoopNode('M1'), MetricsFooterNode('M1')) + corr.add_edge(EndLoopNode('M2'), MetricsFooterNode('M2')) + corr.add_edge(EndLoopNode('K2'), MetricsFooterNode('K2')) + corr.add_edge(EndLoopNode('N2'), MetricsFooterNode('N2')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsFooterNode('K0'), EndLoopNode('N0')) + corr.add_edge(MetricsFooterNode('N0'), EndLoopNode('M0')) + corr.add_edge(MetricsFooterNode('M0'), EndLoopNode('K1')) + corr.add_edge(MetricsFooterNode('K1'), EndLoopNode('N1')) + corr.add_edge(MetricsFooterNode('N1'), EndLoopNode('M1')) + corr.add_edge(MetricsFooterNode('M1'), EndLoopNode('M2')) + corr.add_edge(MetricsFooterNode('M2'), EndLoopNode('K2')) + corr.add_edge(MetricsFooterNode('K2'), EndLoopNode('N2')) + corr.add_edge(MetricsFooterNode('N2'), OtherNode('Footer')) + corr.add_edge(MetricsFooterNode('N2'), MetricsNode('End')) + corr.add_edge(EndLoopNode("N2"), MetricsNode("End")) + + print_errs(graph, corr) + + assert nx.is_isomorphic(graph, corr) + + +def test_graph_metrics_extensor_energy(): + program, hardware, format_ = build_extensor_energy() + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + graph = FlowGraph(program, metrics, []).get_graph() + + corr = nx.DiGraph() + + corr.add_edge(LoopNode("N2"), LoopNode("K2")) + corr.add_edge(LoopNode("N2"), LoopNode("M2")) + corr.add_edge(LoopNode("K2"), LoopNode("M2")) + corr.add_edge(LoopNode("K2"), LoopNode("N1")) + corr.add_edge(LoopNode("M2"), LoopNode("M1")) + corr.add_edge(LoopNode("M1"), LoopNode("N1")) + corr.add_edge(LoopNode("M1"), LoopNode("K1")) + corr.add_edge(LoopNode("N1"), LoopNode("K1")) + corr.add_edge(LoopNode("N1"), LoopNode("M0")) + corr.add_edge(LoopNode("K1"), LoopNode("M0")) + corr.add_edge(LoopNode("K1"), LoopNode("N0")) + corr.add_edge(LoopNode("M0"), LoopNode("N0")) + corr.add_edge(LoopNode("M0"), LoopNode("K0")) + corr.add_edge(LoopNode("N0"), LoopNode("K0")) + corr.add_edge(LoopNode("N0"), OtherNode("Body")) + corr.add_edge(LoopNode("K0"), OtherNode("Body")) + corr.add_edge(OtherNode("Body"), EndLoopNode("K0")) + corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0")) + corr.add_edge(EndLoopNode("N0"), EndLoopNode("M0")) + corr.add_edge(EndLoopNode("M0"), EndLoopNode("K1")) + corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1")) + corr.add_edge(EndLoopNode("N1"), EndLoopNode("M1")) + corr.add_edge(EndLoopNode("M1"), EndLoopNode("M2")) + corr.add_edge(EndLoopNode("M2"), EndLoopNode("K2")) + corr.add_edge(EndLoopNode("K2"), EndLoopNode("N2")) + corr.add_edge(EndLoopNode("N2"), OtherNode("Footer")) + corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + corr.add_edge(OtherNode("Graphics"), LoopNode("N2")) + corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) + corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) + corr.add_edge(OtherNode("Output"), GetRootNode( + "Z", ['N2', 'M2', 'M1', 'N1', 'M0', 'N0'])) + corr.add_edge(MetricsNode("Start"), LoopNode("N2")) + corr.add_edge(MetricsNode("End"), OtherNode("Footer")) + corr.add_edge( + GetRootNode( + "Z", [ + 'N2', 'M2', 'M1', 'N1', 'M0', 'N0']), LoopNode("N2")) + corr.add_edge(PartNode("A", ('M',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "A", ('M',)), SwizzleNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order")) + corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "A", ('K',)), SwizzleNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order")) + corr.add_edge(SwizzleNode("A", + ['K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"), + GetRootNode("A", ['K2', 'M2', 'M1', 'K1', 'M0', 'K0'])) + corr.add_edge( + SwizzleNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"), OtherNode("Graphics")) + corr.add_edge( + GetRootNode( + "A", [ + 'K2', 'M2', 'M1', 'K1', 'M0', 'K0']), LoopNode("K2")) + corr.add_edge(PartNode("B", ('K',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "B", ('K',)), SwizzleNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order")) + corr.add_edge(PartNode("B", ('N',)), OtherNode("Graphics")) + corr.add_edge( + PartNode( + "B", ('N',)), SwizzleNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order")) + corr.add_edge(SwizzleNode("B", + ['N2', 'K2', 'N1', 'K1', 'N0', 'K0'], + "loop-order"), + GetRootNode("B", + ['N2', 'K2', 'N1', 'K1', 'N0', 'K0'])) + corr.add_edge( + SwizzleNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"), OtherNode("Graphics")) + corr.add_edge( + GetRootNode( + "B", [ + 'N2', 'K2', 'N1', 'K1', 'N0', 'K0']), LoopNode("N2")) + corr.add_edge(LoopNode('N2'), MetricsHeaderNode('K2')) + corr.add_edge(LoopNode('K2'), MetricsHeaderNode('M2')) + corr.add_edge(LoopNode('M2'), MetricsHeaderNode('M1')) + corr.add_edge(LoopNode('M1'), MetricsHeaderNode('N1')) + corr.add_edge(LoopNode('N1'), MetricsHeaderNode('K1')) + corr.add_edge(LoopNode('K1'), MetricsHeaderNode('M0')) + corr.add_edge(LoopNode('M0'), MetricsHeaderNode('N0')) + corr.add_edge(LoopNode('N0'), MetricsHeaderNode('K0')) + corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('N2')) + corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('N2')) + corr.add_edge(MetricsHeaderNode('N2'), LoopNode('N2')) + corr.add_edge(MetricsHeaderNode('K2'), LoopNode('K2')) + corr.add_edge(MetricsHeaderNode('M2'), LoopNode('M2')) + corr.add_edge(MetricsHeaderNode('M1'), LoopNode('M1')) + corr.add_edge(MetricsHeaderNode('N1'), LoopNode('N1')) + corr.add_edge(MetricsHeaderNode('K1'), LoopNode('K1')) + corr.add_edge(MetricsHeaderNode('M0'), LoopNode('M0')) + corr.add_edge(MetricsHeaderNode('N0'), LoopNode('N0')) + corr.add_edge(MetricsHeaderNode('K0'), LoopNode('K0')) + corr.add_edge(LoopNode('K0'), MetricsNode('Body')) + corr.add_edge(EndLoopNode('K0'), MetricsFooterNode('K0')) + corr.add_edge(EndLoopNode('N0'), MetricsFooterNode('N0')) + corr.add_edge(EndLoopNode('M0'), MetricsFooterNode('M0')) + corr.add_edge(EndLoopNode('K1'), MetricsFooterNode('K1')) + corr.add_edge(EndLoopNode('N1'), MetricsFooterNode('N1')) + corr.add_edge(EndLoopNode('M1'), MetricsFooterNode('M1')) + corr.add_edge(EndLoopNode('M2'), MetricsFooterNode('M2')) + corr.add_edge(EndLoopNode('K2'), MetricsFooterNode('K2')) + corr.add_edge(EndLoopNode('N2'), MetricsFooterNode('N2')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsFooterNode('K0'), EndLoopNode('N0')) + corr.add_edge(MetricsFooterNode('N0'), EndLoopNode('M0')) + corr.add_edge(MetricsFooterNode('M0'), EndLoopNode('K1')) + corr.add_edge(MetricsFooterNode('K1'), EndLoopNode('N1')) + corr.add_edge(MetricsFooterNode('N1'), EndLoopNode('M1')) + corr.add_edge(MetricsFooterNode('M1'), EndLoopNode('M2')) + corr.add_edge(MetricsFooterNode('M2'), EndLoopNode('K2')) + corr.add_edge(MetricsFooterNode('K2'), EndLoopNode('N2')) + corr.add_edge(MetricsFooterNode('N2'), OtherNode('Footer')) + corr.add_edge(MetricsFooterNode('N2'), MetricsNode('End')) + corr.add_edge(EndLoopNode("N2"), MetricsNode("End")) + + print_errs(graph, corr) + + assert nx.is_isomorphic(graph, corr) + + +def test_graph_metrics_swizzle_for_part(): + yaml = """ + einsum: + declaration: + Z: [] + A: [K, M] + expressions: + - Z[] = A[k, m] + mapping: + partitioning: + Z: + (M, K): [flatten()] + architecture: + accel: + - name: level0 + local: + - name: Merger + class: Merger + attributes: + inputs: 16 + comparator_radix: 16 + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: Merger + bindings: + - tensor: A + init-ranks: [K, M] + final-ranks: [M, K] + format: + A: + default: + rank-order: [MK] + MK: + format: C + pbits: 64 + Z: + default: + rank-order: [] + """ + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + hardware = Hardware(arch, bindings, program) + + format_ = Format.from_str(yaml) + + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + graph = FlowGraph(program, metrics, []).get_graph() + + corr = nx.DiGraph() + + corr.add_edge(LoopNode("MK"), OtherNode("Body")) + corr.add_edge(OtherNode("Body"), EndLoopNode("MK")) + corr.add_edge(EndLoopNode("MK"), OtherNode("Footer")) + corr.add_edge(OtherNode("Graphics"), LoopNode("MK")) + corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) + corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) + corr.add_edge(OtherNode("Output"), GetRootNode("Z", [])) + corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + corr.add_edge(MetricsNode("Start"), LoopNode("MK")) + corr.add_edge(MetricsNode("End"), OtherNode("Footer")) + corr.add_edge(GetRootNode("Z", []), OtherNode("Body")) + corr.add_edge( + SwizzleNode("A", ['M', 'K'], "partitioning"), + PartNode("A", ('M', 'K'))) + corr.add_edge(PartNode("A", ('M', 'K')), OtherNode("Graphics")) + corr.add_edge( + PartNode("A", ('M', 'K')), + SwizzleNode("A", ['MK'], "loop-order")) + corr.add_edge( + SwizzleNode("A", ['K', 'M'], "metrics"), + SwizzleNode("A", ['M', 'K'], "partitioning")) + corr.add_edge( + SwizzleNode("A", ['MK'], "loop-order"), + GetRootNode("A", ['MK'])) + corr.add_edge( + SwizzleNode("A", ['MK'], "loop-order"), + OtherNode("Graphics")) + corr.add_edge(GetRootNode("A", ['MK']), LoopNode("MK")) + corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('MK')) + corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('MK')) + corr.add_edge(MetricsHeaderNode('MK'), LoopNode('MK')) + corr.add_edge(LoopNode('MK'), MetricsNode('Body')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsFooterNode('MK'), MetricsNode('End')) + corr.add_edge(EndLoopNode('MK'), MetricsFooterNode('MK')) + corr.add_edge(MetricsFooterNode('MK'), OtherNode('Footer')) + corr.add_edge(EndLoopNode("MK"), MetricsNode("End")) + + print_errs(graph, corr) + + assert nx.is_isomorphic(graph, corr) + + +def test_graph_metrics_trace_output(): + yaml = """ + einsum: + declaration: + Z: [K, M] + A: [K, M] + expressions: + - Z[k, m] = A[k, m] + architecture: + accel: + - name: level0 + local: + - name: Buffer + class: Buffet + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: Buffer + bindings: + - tensor: Z + rank: K + type: payload + style: eager + evict-on: root + format: default + format: + Z: + default: + rank-order: [K, M] + K: + format: C + pbits: 32 + M: + format: C + cbits: 32 + pbits: 64 + """ + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + hardware = Hardware(arch, bindings, program) + + format_ = Format.from_str(yaml) + + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + graph = FlowGraph(program, metrics, []).get_graph() + + corr = nx.DiGraph() + + corr.add_edge(LoopNode("K"), LoopNode("M")) + corr.add_edge(LoopNode("M"), OtherNode("Body")) + corr.add_edge(OtherNode("Body"), EndLoopNode("M")) + corr.add_edge(EndLoopNode("M"), EndLoopNode("K")) + corr.add_edge(EndLoopNode("K"), OtherNode("Footer")) + corr.add_edge(OtherNode("Footer"), MetricsNode("Dump")) + corr.add_edge(OtherNode("Graphics"), LoopNode("K")) + corr.add_edge(OtherNode("Graphics"), MetricsNode("Start")) + corr.add_edge(OtherNode("Output"), OtherNode("Graphics")) + corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['K', 'M'])) + corr.add_edge(MetricsNode("Start"), LoopNode("K")) + corr.add_edge(MetricsNode("End"), OtherNode("Footer")) + corr.add_edge(GetRootNode("Z", ['K', 'M']), LoopNode("K")) corr.add_edge(SwizzleNode( - "B", ["K", "N"], "loop-order"), OtherNode("Graphics")) - corr.add_edge(GetRootNode("B", ["K", "N"]), LoopNode("K")) - corr.add_edge(SwizzleNode("B", ['K', 'N'], - "loop-order"), CollectingNode("B", "K")) - corr.add_edge(CollectingNode("B", "K"), MetricsNode("Start")) + "A", ['K', 'M'], "loop-order"), GetRootNode("A", ['K', 'M'])) + corr.add_edge( + SwizzleNode( + "A", ['K', 'M'], "loop-order"), OtherNode("Graphics")) + corr.add_edge(GetRootNode("A", ['K', 'M']), LoopNode("K")) + corr.add_edge(LoopNode('K'), MetricsHeaderNode('M')) + corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('K')) + corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('K')) + corr.add_edge(MetricsHeaderNode('K'), LoopNode('K')) + corr.add_edge(MetricsHeaderNode('M'), LoopNode('M')) + corr.add_edge(LoopNode('M'), MetricsNode('Body')) + corr.add_edge(MetricsNode('Body'), OtherNode('Body')) + corr.add_edge(MetricsFooterNode('K'), MetricsNode('End')) + corr.add_edge(EndLoopNode('M'), MetricsFooterNode('M')) + corr.add_edge(EndLoopNode('K'), MetricsFooterNode('K')) + corr.add_edge(MetricsFooterNode('M'), EndLoopNode('K')) + corr.add_edge(MetricsFooterNode('K'), OtherNode('Footer')) + corr.add_edge(EndLoopNode("K"), MetricsNode("End")) + + print_errs(graph, corr) assert nx.is_isomorphic(graph, corr) diff --git a/tests/ir/test_flow_nodes.py b/tests/ir/test_flow_nodes.py index a86af77..d6d47f7 100644 --- a/tests/ir/test_flow_nodes.py +++ b/tests/ir/test_flow_nodes.py @@ -1,13 +1,6 @@ from teaal.ir.flow_nodes import * -def test_collecting_node(): - assert repr(CollectingNode("A", "K")) == "(CollectingNode, A, K)" - - assert CollectingNode("A", "K").get_tensor() == "A" - assert CollectingNode("A", "K").get_rank() == "K" - - def test_eager_input_node(): assert repr(EagerInputNode("Q1", ["I", "J"]) ) == "(EagerInputNode, Q1, ['I', 'J'])" @@ -16,6 +9,12 @@ def test_eager_input_node(): assert EagerInputNode("Q1", ["I", "J"]).get_tensors() == ["I", "J"] +def test_end_loop_node(): + assert repr(EndLoopNode("K1")) == "(EndLoopNode, K1)" + + assert EndLoopNode("K1").get_rank() == "K1" + + def test_fiber_node(): assert repr(FiberNode("a_k")) == "(FiberNode, a_k)" @@ -55,6 +54,18 @@ def test_loop_node(): assert LoopNode("K1").get_rank() == "K1" +def test_metrics_footer_node(): + assert repr(MetricsFooterNode("K1")) == "(MetricsFooterNode, K1)" + + assert MetricsFooterNode("K1").get_rank() == "K1" + + +def test_metrics_header_node(): + assert repr(MetricsHeaderNode("K1")) == "(MetricsHeaderNode, K1)" + + assert MetricsHeaderNode("K1").get_rank() == "K1" + + def test_metrics_node(): assert repr(MetricsNode("Start")) == "(MetricsNode, Start)" diff --git a/tests/ir/test_fusion.py b/tests/ir/test_fusion.py new file mode 100644 index 0000000..a343624 --- /dev/null +++ b/tests/ir/test_fusion.py @@ -0,0 +1,304 @@ +import pytest + +from teaal.ir.fusion import Fusion +from teaal.ir.hardware import Hardware +from teaal.ir.program import Program +from teaal.parse import * + + +def make_yaml(spacetime, bindings): + yaml = """ + einsum: + declaration: + A: [K, M] + B: [K, N] + T: [K, M, N] + C: [M, N] + Z: [M, N] + expressions: + - T[k, m, n] = A[k, m] * B[k, n] + - Z[m, n] = T[k, m, n] * C[m, n] + mapping: + loop-order: + T: [M, K, N] + Z: [M, K, N] + spacetime:""" + spacetime + """ + format: + # TODO: allow empty format + Z: + default: + rank-order: [M, N] + M: + format: C + N: + format: C + pbits: 32 + architecture: + configA: + - name: System + local: + - name: FPMul0 + class: compute + attributes: + type: mul + - name: FPMul1 + class: compute + attributes: + type: mul + configB: + - name: System + local: + - name: FPMul + class: compute + attributes: + type: mul + bindings:""" + bindings + + return yaml + + +def parse_yamls(yaml): + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + program.add_einsum(0) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + hardware = Hardware(arch, bindings, program) + + format_ = Format.from_str(yaml) + + return program, hardware, format_ + + +def test_no_spacetime(): + spacetime = "" + bindings = """ + T: + - config: configA + prefix: tmp/T + Z: + - config: configB + prefix: tmp/Z + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + + with pytest.raises(ValueError) as excinfo: + fusion.add_einsum(program) + assert str( + excinfo.value) == "Undefined spacetime for Einsum T" + + +def test_add_einsum_diff_configs(): + spacetime = """ + T: + space: [N] + time: [M, K] + Z: + space: [N] + time: [M, K] + """ + + bindings = """ + T: + - config: configA + prefix: tmp/T + Z: + - config: configB + prefix: tmp/Z + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + fusion.add_einsum(program) + + program.add_einsum(1) + fusion.add_einsum(program) + + assert fusion.get_blocks() == [["T"], ["Z"]] + + +def test_add_einsum_diff_temporal_ranks(): + spacetime = """ + T: + space: [N] + time: [M, K] + Z: + space: [K] + time: [M, N] + """ + + bindings = """ + T: + - config: configA + prefix: tmp/T + Z: + - config: configA + prefix: tmp/Z + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + fusion.add_einsum(program) + + program.add_einsum(1) + fusion.add_einsum(program) + + assert fusion.get_blocks() == [["T"], ["Z"]] + + +def test_add_einsum_diff_temporal_ranks(): + spacetime = """ + T: + space: [N] + time: [M, K] + Z: + space: [K] + time: [M, N] + """ + + bindings = """ + T: + - config: configA + prefix: tmp/T + Z: + - config: configA + prefix: tmp/Z + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + fusion.add_einsum(program) + + program.add_einsum(1) + fusion.add_einsum(program) + + assert fusion.get_blocks() == [["T"], ["Z"]] + + +def test_add_einsum_overlapping_components(): + spacetime = """ + T: + space: [N] + time: [M, K] + Z: + space: [K] + time: [M, N] + """ + + bindings = """ + T: + - config: configA + prefix: tmp/T + - component: FPMul0 + bindings: + - op: mul + Z: + - config: configA + prefix: tmp/Z + - component: FPMul0 + bindings: + - op: mul + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + fusion.add_einsum(program) + + program.add_einsum(1) + fusion.add_einsum(program) + + assert fusion.get_blocks() == [["T"], ["Z"]] + + +def test_add_einsum_fused(): + spacetime = """ + T: + space: [N] + time: [M, K] + Z: + space: [N] + time: [M, K] + """ + + bindings = """ + T: + - config: configA + prefix: tmp/T + - component: FPMul0 + bindings: + - op: mul + Z: + - config: configA + prefix: tmp/Z + - component: FPMul1 + bindings: + - op: mul + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + fusion.add_einsum(program) + + program.add_einsum(1) + fusion.add_einsum(program) + + assert fusion.get_blocks() == [["T", "Z"]] + + +def test_add_components(): + spacetime = """ + T: + space: [N] + time: [M, K] + Z: + space: [N] + time: [M, K] + """ + + bindings = """ + T: + - config: configA + prefix: tmp/T + - component: FPMul0 + bindings: + - op: mul + - component: FPMul1 + bindings: + - op: mul + Z: + - config: configA + prefix: tmp/Z + """ + yaml = make_yaml(spacetime, bindings) + + program, hardware, format_ = parse_yamls(yaml) + fusion = Fusion(hardware) + + program.add_einsum(0) + fusion.add_einsum(program) + fusion.add_component("T", "FPMul0") + fusion.add_component("T", "FPMul1") + + assert fusion.get_components("T") == ["FPMul0", "FPMul1"] diff --git a/tests/ir/test_hardware.py b/tests/ir/test_hardware.py index b88b78d..306e495 100644 --- a/tests/ir/test_hardware.py +++ b/tests/ir/test_hardware.py @@ -3,334 +3,521 @@ from teaal.ir.component import * from teaal.ir.hardware import Hardware from teaal.ir.level import Level +from teaal.ir.program import Program from teaal.parse import * +def build_outerspace_yaml(): + with open("tests/integration/outerspace.yaml", "r") as f: + return f.read() + + +def parse_yamls(yaml): + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + program = Program(einsum, mapping) + program.add_einsum(0) + + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + + return Hardware(arch, bindings, program) + + def test_no_arch(): - arch = Architecture.from_str("") - bindings = Bindings.from_str("") + yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a + bindings: + Z: + - config: arch + prefix: tmp/Z + """ + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) with pytest.raises(ValueError) as excinfo: - Hardware(arch, bindings) + Hardware(arch, bindings, program) assert str(excinfo.value) == "Empty architecture specification" def test_bad_arch(): yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a architecture: - subtree: - - name: foo + config0: + - name: foo - name: bar + bindings: + Z: + - config: config0 + prefix: tmp/Z """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) with pytest.raises(ValueError) as excinfo: - Hardware(arch, bindings) - assert str(excinfo.value) == "Architecture must have a single root level" + Hardware(arch, bindings, program) + assert str( + excinfo.value) == "Configuration config0 must have a single root level" def test_bad_component(): yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a architecture: - subtree: + accel: - name: System local: - name: BAD class: foo + bindings: + Z: + - config: accel + prefix: tmp/Z """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) with pytest.raises(ValueError) as excinfo: - Hardware(arch, bindings) + Hardware(arch, bindings, program) assert str(excinfo.value) == "Unknown class: foo" +def test_bad_intersector(): + yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a + architecture: + accel: + - name: System + local: + - name: BAD + class: Intersector + attributes: + type: foo + bindings: + Z: + - config: accel + prefix: tmp/Z + """ + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + + with pytest.raises(ValueError) as excinfo: + Hardware(arch, bindings, program) + assert str(excinfo.value) == "Unknown intersection type: foo" + + def test_no_binding(): yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a architecture: - subtree: + arch: - name: System local: - name: Cache class: Cache + bindings: + Z: + - config: arch + prefix: tmp/Z """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + hardware = Hardware(arch, bindings, program) - cache = CacheComponent("Cache", {}, []) + cache = CacheComponent("Cache", 1, {}, {}) assert hardware.get_component("Cache") == cache def test_get_component(): yaml = """ + einsum: + declaration: + A: [K, M] + B: [K, N] + T: [K, M, N] + Z: [M, N] + expressions: + - T[k,m,n] = take(A[k,m], B[k,n], 1) + - Z[m,n] = T[k,m,n] * A[k,m] + mapping: + rank-order: + A: [M, K] + B: [K, N] + T: [M, K, N] + Z: [M, N] + partitioning: + T: + M: [uniform_occupancy(A.32)] + K: [uniform_occupancy(A.64)] + Z: + M: [uniform_occupancy(A.32)] + K: [uniform_occupancy(A.64)] + loop-order: + T: [M1, M0, K1, K0, N] + Z: [M1, M0, K1, N, K0] + spacetime: + T: + space: [M0, K1] + time: [M1, K0, N] + Z: + space: [M0, K1] + time: [M1, N, K0] architecture: - subtree: + Accelerator: - name: Base local: - name: LLB class: Buffet + attributes: + width: 64 + depth: 3145728 - name: FiberCache class: Cache attributes: - width: 8 + width: 64 depth: 3145728 - name: Compute class: Compute + attributes: + type: mul - name: Memory class: DRAM attributes: - datawidth: 8 bandwidth: 128 - name: LFIntersect - class: LeaderFollower + class: Intersector + attributes: + type: leader-follower - name: HighRadixMerger class: Merger attributes: - radix: 64 - next_latency: 1 + inputs: 64 + comparator_radix: 64 + outputs: 1 + order: fifo + reduce: False + + - name: TopSequencer + class: Sequencer + attributes: + num_ranks: 3 - name: SAIntersect - class: SkipAhead + class: Intersector + attributes: + type: skip-ahead + + - name: TFIntersect + class: Intersector + attributes: + type: two-finger bindings: - - name: LLB + T: + - config: Accelerator + prefix: tmp/T + - component: LLB bindings: - tensor: A rank: K2 + format: default + type: payload + evict-on: root - tensor: B rank: K2 - - tensor: Z - rank: N2 + format: default + type: payload + evict-on: root - - name: FiberCache + - component: FiberCache bindings: - tensor: B rank: K + format: default + type: payload - - name: Compute - bindings: - - einsum: Z - op: mul - - einsum: Z - op: add - - - name: Memory + - component: Memory bindings: - tensor: A - rank: root + rank: K2 + format: default + type: payload - tensor: B - rank: root - - tensor: Z - rank: root + rank: K2 + format: default + type: payload - - name: LFIntersect + - component: LFIntersect bindings: - - einsum: T - rank: K + - rank: K leader: A - - name: HighRadixMerger + Z: + - config: Accelerator + prefix: tmp/Z + - component: LLB + bindings: + - tensor: Z + rank: N2 + format: default + type: payload + evict-on: root + + - component: Compute + bindings: + - op: mul + + - component: Memory + bindings: + - tensor: Z + rank: N2 + format: default + type: payload + + - component: HighRadixMerger bindings: - tensor: T - init_ranks: [M, K, N] - swap_depth: 1 + init-ranks: [M, K, N] + final-ranks: [M, N, K] - - name: SAIntersect + - component: TopSequencer bindings: - - einsum: Z - rank: K2 + - rank: M2 + - rank: K2 + - rank: N1 + + - component: SAIntersect + bindings: + - rank: K2 + + - component: TFIntersect + bindings: + - rank: K1 """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + hardware = Hardware(arch, bindings, program) def assert_component(type_, name, attrs): - binding = bindings.get(name) - component = type_(name, attrs, binding) + binding = bindings.get_component(name) + component = type_(name, 1, attrs, binding) + assert hardware.get_component(name) == component - assert_component(BuffetComponent, "LLB", {}) + attrs = {"width": 64, "depth": 3145728} + assert_component(BuffetComponent, "LLB", attrs) - attrs = {"width": 8, "depth": 3145728} + attrs = {"width": 64, "depth": 3145728} assert_component(CacheComponent, "FiberCache", attrs) - assert_component(ComputeComponent, "Compute", {}) + assert_component(ComputeComponent, "Compute", {"type": "mul"}) attrs = {"datawidth": 8, "bandwidth": 128} assert_component(DRAMComponent, "Memory", attrs) assert_component(LeaderFollowerComponent, "LFIntersect", {}) - attrs = {"radix": 64, "next_latency": 1} + attrs = { + "inputs": 64, + "comparator_radix": 64, + "outputs": 1, + "order": "fifo", + "reduce": False + } assert_component(MergerComponent, "HighRadixMerger", attrs) - assert_component(SkipAheadComponent, "SAIntersect", {}) - + attrs = {"num_ranks": 3} + assert_component(SequencerComponent, "TopSequencer", attrs) -def test_bad_compute_path(): - yaml = """ - architecture: - subtree: - - name: System - - subtree: - - name: Stage0 - local: - - name: BAD0 - class: compute - - - name: Stage1 - local: - - name: BAD1 - class: compute - - bindings: - - name: BAD0 - bindings: - - einsum: Z - op: mul - - name: BAD1 - bindings: - - einsum: Z - op: add - """ - arch = Architecture.from_str(yaml) - bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) - - with pytest.raises(ValueError) as excinfo: - hardware.get_compute_path("Z") - assert str(excinfo.value) == "Only one compute path allowed per einsum" - - -def test_get_compute_path(): - arch = Architecture.from_file("tests/integration/test_arch.yaml") - bindings = Bindings.from_file("tests/integration/test_bindings.yaml") - hardware = Hardware(arch, bindings) - - system = hardware.get_tree() - pe = system.get_subtrees()[0] + assert_component(SkipAheadComponent, "SAIntersect", {}) - assert hardware.get_compute_path("Z") == [system, pe] - assert hardware.get_compute_path("T") == [] + assert_component(TwoFingerComponent, "TFIntersect", {}) -def test_get_compute_components(): +def test_get_components(): yaml = """ + einsum: + declaration: + Z: [M] + X: [M] + A: [K, M] + D: [J, M] + expressions: + - Z[m] = A[k, m] + - X[m] = D[j, m] + architecture: - subtree: + accel: - name: System local: - name: Intersect0 - class: SkipAhead + class: Intersector + attributes: + type: skip-ahead subtree: - name: PE local: - name: Intersect1 - class: SkipAhead + class: Intersector + attributes: + type: skip-ahead - name: MAC class: compute + attributes: + type: add bindings: - - name: Intersect0 + Z: + - config: accel + prefix: tmp/Z + + - component: Intersect0 bindings: - - einsum: Z - rank: K + - rank: K - - name: Intersect1 + - component: MAC bindings: - - einsum: X - rank: J + - op: add - - name: MAC + X: + - config: accel + prefix: tmp/X + - component: Intersect1 bindings: - - einsum: Z - op: add + - rank: J + """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + hardware = Hardware(arch, bindings, program) intersect = SkipAheadComponent( - "Intersect0", {}, bindings.get("Intersect0")) - mac = ComputeComponent("MAC", {}, bindings.get("MAC")) + "Intersect0", 1, {}, bindings.get_component("Intersect0")) + mac = ComputeComponent("MAC", 1, + {"type": "add"}, + bindings.get_component("MAC")) + + assert hardware.get_components( + "Z", FunctionalComponent) == [ + intersect, mac] + - assert hardware.get_compute_components("Z") == [intersect, mac] +def test_get_config(): + yaml = build_outerspace_yaml() + hardware = parse_yamls(yaml) + assert hardware.get_config("T0") == "MultiplyPhase" + assert hardware.get_config("T1") == "MergePhase" + assert hardware.get_config("Z") == "MergePhase" -def test_get_merger_components(): + +def test_get_frequency_unspecified(): yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a architecture: - subtree: + accel: - name: System + bindings: + Z: + - config: accel + prefix: tmp/Z + """ + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + hardware = Hardware(arch, bindings, program) - subtree: - - name: SwapStage0 - local: - - name: Merger0 - class: Merger - attributes: - radix: 64 - next_latency: 1 - - - name: ComputeStage - local: - - name: Compute - class: compute + with pytest.raises(ValueError) as excinfo: + hardware.get_frequency("Z") + assert str(excinfo.value) == "Unspecified clock frequency for config accel" - - name: SwapStage1 - local: - - name: Merger1 - class: Merger - attributes: - radix: 64 - next_latency: 1 +def test_get_frequency_bad(): + yaml = """ + einsum: + declaration: + Z: [M] + expressions: + - Z[m] = a + architecture: + accel: + - name: System + attributes: + clock_frequency: foo bindings: - - name: Merger0 - bindings: - - tensor: T - init_ranks: [M, K, N] - swap_depth: 1 - - - name: Compute - bindings: - - einsum: Z - op: add - - - name: Merger1 - bindings: - - tensor: Z - init_ranks: [N, M] - swap_depth: 0 + Z: + - config: accel + prefix: tmp/Z """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + hardware = Hardware(arch, bindings, program) + + with pytest.raises(ValueError) as excinfo: + hardware.get_frequency("Z") + assert str(excinfo.value) == "Bad clock frequency for config accel" - attrs = {"radix": 64, "next_latency": 1} - merger0 = MergerComponent("Merger0", attrs, bindings.get("Merger0")) - merger1 = MergerComponent("Merger1", attrs, bindings.get("Merger1")) - assert hardware.get_merger_components() == [merger0, merger1] +def test_get_frequency(): + yaml = build_outerspace_yaml() + hardware = parse_yamls(yaml) + + assert hardware.get_frequency("Z") == 1500000000 def test_get_traffic_path_multiple_bindings(): yaml = """ + einsum: + declaration: + Z: [M] + A: [M] + expressions: + - Z[m] = A[m] + architecture: - subtree: + accel: - name: BAD local: @@ -342,36 +529,56 @@ def test_get_traffic_path_multiple_bindings(): - name: Compute class: compute + attributes: + type: add bindings: - - name: Memory0 + Z: + - config: accel + prefix: tmp/Z + + - component: Memory0 bindings: - tensor: A - rank: root + rank: M + type: payload + format: default - - name: Memory1 + - component: Memory1 bindings: - tensor: A - rank: root + rank: M + type: payload + format: default - - name: Compute + - component: Compute bindings: - - einsum: Z - op: add + - op: add """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + program.add_einsum(0) + hardware = Hardware(arch, bindings, program) with pytest.raises(ValueError) as excinfo: - hardware.get_traffic_path("Z", "A") - assert str(excinfo.value) == "Multiple bindings for einsum Z and tensor A" + hardware.get_traffic_path("A", "M", "payload", "default") + assert str(excinfo.value) == "Multiple traffic paths for tensor A in Einsum Z" def test_get_traffic_path(): yaml = """ + einsum: + declaration: + A: [M] + B: [M, K] + X: [M] + Z: [M] + expressions: + - X[m] = A[m] * B[m, k] + - Z[m] = A[m] + B[m] architecture: - subtree: + accel: - name: System local: @@ -382,7 +589,9 @@ def test_get_traffic_path(): - name: Stages local: - name: Intersection - class: SkipAhead + class: Intersector + attributes: + type: skip-ahead - name: LLB class: Buffet @@ -395,6 +604,8 @@ def test_get_traffic_path(): - name: MAC0 class: compute + attributes: + type: mul - name: Stage1 local: @@ -403,6 +614,8 @@ def test_get_traffic_path(): - name: MAC1 class: compute + attributes: + type: mul - name: Stage2 local: @@ -411,76 +624,160 @@ def test_get_traffic_path(): - name: MAC2 class: compute + attributes: + type: mul bindings: - - name: Memory + Z: + - config: accel + prefix: tmp/Z + - component: Memory bindings: - tensor: A - rank: root + rank: M + format: default + type: payload + evict-on: root + - tensor: Z - rank: root + rank: M + format: default + type: payload + evict-on: root - - name: S0B + - component: S0B bindings: - tensor: A rank: M + format: default + type: payload + evict-on: root - tensor: Z rank: M + format: default + type: payload + evict-on: root - - name: MAC0 + - component: MAC0 bindings: - - einsum: A - op: mul + - op: mul - - name: S1B + - component: S1B bindings: - tensor: Z rank: M - - - name: MAC1 + format: default + type: coord + evict-on: root + + X: + - config: accel + prefix: tmp/X + - component: MAC1 bindings: - - einsum: X - op: add + - op: add - - name: S2B + - component: S2B bindings: - tensor: A rank: M - - tensor: Z + format: default + type: payload + evict-on: root + - tensor: X rank: M - - - name: MAC2 - bindings: - - einsum: Z - op: add + format: default + type: payload + evict-on: root """ arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + program.add_einsum(1) + hardware = Hardware(arch, bindings, program) + + mem = DRAMComponent("Memory", 1, {}, bindings.get_component("Memory")) + s0b = BuffetComponent("S0B", 1, {}, bindings.get_component("S0B")) + s1b = BuffetComponent("S1B", 1, {}, bindings.get_component("S1B")) + s2b = BuffetComponent("S2B", 1, {}, bindings.get_component("S2B")) + + assert hardware.get_traffic_path( + "A", "M", "payload", "default") == [(mem, "lazy"), (s0b, "lazy")] + assert hardware.get_traffic_path( + "Z", "M", "payload", "default") == [(mem, "lazy"), (s0b, "lazy")] + assert hardware.get_traffic_path( + "Z", "M", "coord", "default") == [(s1b, "lazy")] + + program.add_einsum(0) + assert hardware.get_traffic_path("B", "M", "payload", "default") == [] + - mem = DRAMComponent("Memory", {}, bindings.get("Memory")) - s0b = BuffetComponent("S0B", {}, bindings.get("S0B")) - s1b = BuffetComponent("S1B", {}, bindings.get("S1B")) - s2b = BuffetComponent("S2B", {}, bindings.get("S2B")) +def test_get_traffic_eager(): + extensor = "tests/integration/extensor.yaml" + arch = Architecture.from_file(extensor) + bindings = Bindings.from_file(extensor) + program = Program(Einsum.from_file(extensor), Mapping.from_file(extensor)) + program.add_einsum(0) + hardware = Hardware(arch, bindings, program) - assert hardware.get_traffic_path("A", "A") == [mem, s0b] - assert hardware.get_traffic_path("Z", "A") == [mem, s2b] - assert hardware.get_traffic_path("Z", "Z") == [mem, s2b] - assert hardware.get_traffic_path("X", "B") == [] + dram = hardware.get_component("MainMemory") + llb = hardware.get_component("LLB") + + ranks = ["K2", "M2", "M1", "K1", "M0", "K0"] + types = [[], [], [], ["coord"], ["coord", "payload"], ["coord", "payload"]] + llb.expand_eager("Z", "A", "default", ranks, types) + + assert hardware.get_traffic_path( + "A", "K1", "coord", "default") == [ + (dram, "lazy"), (llb, "lazy")] + assert hardware.get_traffic_path( + "A", "K0", "coord", "default") == [ + (dram, "lazy"), (llb, "M0")] + + +def test_get_prefix(): + gamma = "tests/integration/gamma.yaml" + arch = Architecture.from_file(gamma) + bindings = Bindings.from_file(gamma) + program = Program(Einsum.from_file(gamma), Mapping.from_file(gamma)) + hardware = Hardware(arch, bindings, program) + + assert hardware.get_prefix("T") == "tmp/gamma_T" + assert hardware.get_prefix("Z") == "tmp/gamma_Z" def test_get_tree(): + yaml = """ + einsum: + declaration: + A: [M] + Z: [M] + expressions: + - Z[m] = A[m] + """ arch = Architecture.from_file("tests/integration/test_arch.yaml") bindings = Bindings.from_file("tests/integration/test_bindings.yaml") - hardware = Hardware(arch, bindings) - - regs = BuffetComponent("Registers", {}, bindings.get("Registers")) - mac = ComputeComponent("MAC", {}, bindings.get("MAC")) + program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + hardware = Hardware(arch, bindings, program) + + regs = BuffetComponent( + "Registers", 8, + {}, + bindings.get_component("Registers")) + mac = ComputeComponent("MAC", 8, + {"type": "mul"}, + bindings.get_component("MAC")) pe = Level("PE", 8, {}, [regs, mac], []) mem_attrs = {"datawidth": 8, "bandwidth": 128} - mem = DRAMComponent("Memory", mem_attrs, bindings.get("Memory")) + mem = DRAMComponent( + "Memory", + 1, + mem_attrs, + bindings.get_component("Memory")) attrs = {"clock_frequency": 10 ** 9} tree = Level("System", 1, attrs, [mem], [pe]) + + program.add_einsum(0) assert hardware.get_tree() == tree diff --git a/tests/ir/test_level.py b/tests/ir/test_level.py index dac2bd2..a368182 100644 --- a/tests/ir/test_level.py +++ b/tests/ir/test_level.py @@ -4,8 +4,9 @@ def build_local(): attrs = {"datawidth": 8, "bandwidth": 128} - bindings = [{"tensor": "A", "rank": "M"}] - return DRAMComponent("DRAM", attrs, bindings) + bindings = {"Z": [{"tensor": "A", "rank": "M", + "format": "default", "type": "payload"}]} + return DRAMComponent("DRAM", 1, attrs, bindings) def build_level(): @@ -19,7 +20,7 @@ def build_level(): def build_subtree(): - return Level("PE", 8, {}, [ComputeComponent("MAC", {}, [])], []) + return Level("PE", 8, {}, [FunctionalComponent("MAC", 8, {}, {})], []) def test_get_attr(): @@ -53,6 +54,6 @@ def test_eq(): def test_repr(): level = build_level() - repr_ = "(Level, System, 1, {'clock_frequency': 1000000000}, [(DRAMComponent, DRAM, {'datawidth': 8, 'bandwidth': 128}, {'A': 'M'})], [(Level, PE, 8, {}, [(ComputeComponent, MAC, {}, {})], [])])" + repr_ = "(Level, System, 1, {'clock_frequency': 1000000000}, [(DRAMComponent, DRAM, 1, {'Z': [{'tensor': 'A', 'rank': 'M', 'format': 'default', 'type': 'payload'}]}, 128)], [(Level, PE, 8, {}, [(FunctionalComponent, MAC, 8, {})], [])])" assert repr(level) == repr_ diff --git a/tests/ir/test_metrics.py b/tests/ir/test_metrics.py index 9a9ad24..a61fa0c 100644 --- a/tests/ir/test_metrics.py +++ b/tests/ir/test_metrics.py @@ -8,17 +8,27 @@ from teaal.parse import * +def build_extensor_yaml(): + with open("tests/integration/extensor.yaml", "r") as f: + return f.read() + + +def build_extensor_energy_yaml(): + with open("tests/integration/extensor-energy.yaml", "r") as f: + return f.read() + + def build_gamma_yaml(): with open("tests/integration/gamma.yaml", "r") as f: return f.read() -def build_metrics(): - yaml = build_gamma_yaml() - return Metrics(*build_program_hardware(yaml)) +def build_sigma_yaml(): + with open("tests/integration/sigma.yaml", "r") as f: + return f.read() -def build_program_hardware(yaml): +def parse_yamls(yaml): einsum = Einsum.from_str(yaml) mapping = Mapping.from_str(yaml) program = Program(einsum, mapping) @@ -26,398 +36,1019 @@ def build_program_hardware(yaml): arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) - format_ = Format.from_str(yaml) - return program, hardware, format_ + return program, arch, bindings, format_ -def test_check_configuration_no_dyn_part(): +def test_used_traffic_paths(): yaml = """ einsum: declaration: - A: [M] - Z: [M] + A: [M, N] + Z: [M, N] expressions: - - Z[m] = A[m] - mapping: - partitioning: - Z: - M: [uniform_occupancy(A.10)] - + - Z[m, n] = A[m, n] architecture: - subtree: + accel: - name: System local: - - name: Compute - class: compute - - binding: - - name: Compute - bindings: - - einsum: Z - op: add + - name: Memory + class: DRAM + subtree: + - name: PE + local: + - name: Registers + class: Buffet + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: Memory + bindings: + - tensor: A + rank: N + type: payload + format: default0 + - tensor: A + rank: N + type: payload + format: default1 + - component: Registers + bindings: + - tensor: A + rank: N + type: payload + format: default0 + evict-on: M + - tensor: A + rank: N + type: payload + format: default1 + evict-on: M + format: + A: + default0: + rank-order: [M, N] + M: + format: U + N: + format: U + pbits: 32 + default1: + rank-order: [M, N] + M: + format: U + N: + format: U + pbits: 32 """ - program, hardware, format_ = build_program_hardware(yaml) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) - with pytest.raises(NotImplementedError): + with pytest.raises(ValueError) as excinfo: Metrics(program, hardware, format_) + assert str( + excinfo.value) in { + "Multiple potential formats {'default0', 'default1'} for tensor A in Einsum Z", + "Multiple potential formats {'default1', 'default0'} for tensor A in Einsum Z"} -def test_check_configuration_three_tensors(): +def test_expand_eager(): + program, arch, bindings, format_ = parse_yamls(build_extensor_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + bindings = {'Z': [ + {'tensor': 'A', 'rank': 'K1', 'type': 'coord', 'evict-on': 'M2', 'format': 'default', 'style': 'lazy'}, + {'tensor': 'A', 'rank': 'M0', 'type': 'coord', 'evict-on': 'M2', 'format': 'default', 'style': 'eager', 'root': 'M0'}, + {'tensor': 'B', 'rank': 'N1', 'type': 'coord', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'}, + {'tensor': 'B', 'rank': 'N1', 'type': 'payload', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'}, + {'tensor': 'B', 'rank': 'K1', 'type': 'coord', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'}, + {'tensor': 'B', 'rank': 'K1', 'type': 'payload', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'}, + {'tensor': 'B', 'rank': 'N0', 'type': 'coord', 'evict-on': 'K2', 'format': 'default', 'style': 'eager', 'root': 'N0'}, + {'tensor': 'Z', 'rank': 'M0', 'type': 'coord', 'evict-on': 'M2', 'format': 'default', 'style': 'eager', 'root': 'M0'}, + {'tensor': 'Z', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'N0', 'type': 'coord'}, + {'tensor': 'Z', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'N0', 'type': 'payload'}, + {'tensor': 'A', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'M0', 'type': 'payload'}, + {'tensor': 'A', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'K0', 'type': 'coord'}, + {'tensor': 'A', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'K0', 'type': 'payload'}, + {'tensor': 'B', 'evict-on': 'K2', 'style': 'eager', 'format': 'default', 'root': 'N0', 'rank': 'N0', 'type': 'payload'}, + {'tensor': 'B', 'evict-on': 'K2', 'style': 'eager', 'format': 'default', 'root': 'N0', 'rank': 'K0', 'type': 'coord'}, + {'tensor': 'B', 'evict-on': 'K2', 'style': 'eager', 'format': 'default', 'root': 'N0', 'rank': 'K0', 'type': 'payload'}]} + + assert hardware.get_component("LLB").get_bindings()["Z"] == bindings["Z"] + + +def test_expand_eager_elem(): yaml = """ einsum: declaration: - A: [M] - B: [M] - C: [M] - Z: [M] + Z: [] + A: [K, M] + B: [K] expressions: - - Z[m] = A[m] * B[m] * C[m] - + - Z[] = A[k, m] * B[k] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Compute - class: compute - - binding: - - name: Compute - bindings: - - einsum: Z - op: add + - name: Buffer + class: Buffet + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: Buffer + bindings: + - tensor: A + rank: K + type: payload + evict-on: root + format: default + style: eager + format: + A: + default: + rank-order: [K, M] + K: + format: C + pbits: 32 + M: + format: C + cbits: 32 + pbits: 32 + layout: interleaved """ - program, hardware, format_ = build_program_hardware(yaml) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - with pytest.raises(NotImplementedError): - Metrics(program, hardware, format_) + bindings = {'Z': [{'tensor': 'A', + 'rank': 'K', + 'type': 'payload', + 'evict-on': 'root', + 'format': 'default', + 'style': 'eager', + 'root': 'K'}, + {'tensor': 'A', + 'evict-on': 'root', + 'style': 'eager', + 'format': 'default', + 'root': 'K', + 'rank': 'M', + 'type': 'elem'}]} + + assert hardware.get_component("Buffer").get_bindings() == bindings + + +def test_get_coiter(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_coiter("K") == hardware.get_component("Intersect") -def test_not_loaded_on_chip(): +def test_get_coiter_traces_leader_follower(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_coiter_traces("Intersect", "K") == ["intersect_2"] + + +def test_get_coiter_traces_two_finger_more_than_two(): yaml = """ einsum: declaration: - Z: [M] + Z: [] + A: [K] + B: [K] + C: [K] expressions: - - - Z[m] = a - + - Z[] = A[k] * B[k] * C[k] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Memory - class: DRAM - - subtree: - - name: PE - local: - - name: MAC - class: compute - + - name: Intersect + class: Intersector + attributes: + type: two-finger bindings: - - name: Memory - bindings: - - tensor: Z - rank: M - - - name: MAC - bindings: - - einsum: Z - op: add + Z: + - config: accel + prefix: tmp/Z + - component: Intersect + bindings: + - rank: K + # TODO: Allow the format to be empty + format: + Z: + default: + rank-order: [] """ - program, hardware, format_ = build_program_hardware(yaml) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) - with pytest.raises(ValueError) as excinfo: + with pytest.raises(NotImplementedError): Metrics(program, hardware, format_) - assert str(excinfo.value) == "Tensor Z never buffered on chip" -def test_not_implemented_root_not_in_dram(): +def test_get_coiter_traces_two_finger(): yaml = """ einsum: declaration: - Z: [M] + Z: [] + A: [K] + B: [K] expressions: - - Z[m] = a - + - Z[] = A[k] * B[k] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Memory - class: DRAM + - name: Intersect + class: Intersector + attributes: + type: two-finger + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: Intersect + bindings: + - rank: K + # TODO: Allow the format to be empty + format: + Z: + default: + rank-order: [] + """ + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - subtree: - - name: PE - local: - - name: Buffer - class: Buffet + assert metrics.get_coiter_traces("Intersect", "K") == [ + "intersect_0", "intersect_1"] + + +def test_get_collected_iter_info(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_collected_iter_info() == set() + + program, arch, bindings, format_ = parse_yamls( + build_extensor_energy_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_collected_iter_info() == { + "N2", "K2", "M2", "M1", "N1", "K1", "M0", "N0", "K0"} + + +def test_get_collected_tensor_info(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_collected_tensor_info("A") == {("K", "fiber", False), ( + "M", "iter", False), ("M", "fiber", False), ("K", "iter", False), ("K", "fiber", True)} + assert metrics.get_collected_tensor_info("B") == {( + "N", "iter", False), ("K", "fiber", False), ("N", "fiber", False), ("K", "iter", False)} + assert metrics.get_collected_tensor_info("T") == set() + + program.reset() + program.add_einsum(1) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_collected_tensor_info("A") == {( + "K", "fiber", False), ("M", "iter", False), ("M", "fiber", False), ("K", "iter", False)} + assert metrics.get_collected_tensor_info("T") == set() + assert metrics.get_collected_tensor_info("Z") == {( + "M", "iter", False), ("N", "iter", False), ("M", "fiber", False), ("N", "fiber", False)} - - name: MAC - class: compute +def test_get_collected_tensor_info_eager(): + program, arch, bindings, format_ = parse_yamls(build_extensor_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_collected_tensor_info("A") == {('M0', 'M0', False), ('K1', 'fiber', True), ( + 'K2', 'fiber', True), ('K1', 'fiber', False), ('K0', 'M0', False), ('K0', 'fiber', True)} + assert metrics.get_collected_tensor_info("B") == { + ('N1', 'fiber', False), + ('K0', 'N0', False), + ('N1', 'iter', False), + ('N0', 'N0', False), + ('K1', 'fiber', True), + ('K2', 'fiber', True), + ('K1', 'fiber', False), + ('K1', 'iter', False), + ('K0', 'fiber', True)} + assert metrics.get_collected_tensor_info( + "Z") == {('N0', 'M0', False), ("M0", "M0", False)} + + +def test_get_collected_tensor_info_extra_intersection_test(): + yaml = """ + einsum: + declaration: + Z: [M, N] + A: [M] + B: [M] + C: [N] + expressions: + - Z[m, n] = A[m] * B[m] * C[n] + architecture: + accel: + - name: level0 + local: + - name: Intersector + class: Intersector + attributes: + type: two-finger bindings: - - name: Memory - bindings: - - tensor: Z - rank: M - - - name: Buffer - bindings: - - tensor: Z - rank: M - - - name: MAC - bindings: - - einsum: Z - op: add + Z: + - config: accel + prefix: tmp/Z + - component: Intersector + bindings: + - rank: M + format: + Z: + default: + rank-order: [M, N] + M: + format: C + N: + format: C + pbits: 64 """ - program, hardware, format_ = build_program_hardware(yaml) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - with pytest.raises(NotImplementedError): - Metrics(program, hardware, format_) + assert metrics.get_collected_tensor_info("A") == {("M", "fiber", True)} + assert metrics.get_collected_tensor_info("B") == {("M", "fiber", True)} + assert metrics.get_collected_tensor_info("C") == set() + assert metrics.get_collected_tensor_info("Z") == set() -def test_get_compute_components(): - metrics = build_metrics() - bindings = Bindings.from_str(build_gamma_yaml()) +def test_get_collected_tensor_info_flattening(): + program, arch, bindings, format_ = parse_yamls(build_sigma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - intersect = LeaderFollowerComponent( - "Intersection", {}, bindings.get("Intersection")) + assert metrics.get_collected_tensor_info("A") == {("MK00", "MK00", False)} + assert metrics.get_collected_tensor_info("B") == {("K0", "K0", False)} - assert metrics.get_compute_components() == [intersect] +def test_get_eager_evict_on(): + program, arch, bindings, format_ = parse_yamls(build_extensor_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) -def test_get_format(): - metrics = build_metrics() - spec = { - "M": { - "format": "U", - "rhbits": 32, - "pbits": 32}, - "K": { - "format": "C", - "cbits": 32, - "pbits": 64}} - assert metrics.get_format(Tensor("A", ["M", "K"])) == spec + assert metrics.get_eager_evict_on("A", "K2") == [] + assert metrics.get_eager_evict_on("A", "M0") == ["M2"] + assert metrics.get_eager_evict_on("B", "N0") == ["K2"] -def test_get_merger_components(): - yaml = build_gamma_yaml() - program, hardware, format_ = build_program_hardware(yaml) +def test_get_eager_evicts(): + program, arch, bindings, format_ = parse_yamls(build_extensor_yaml()) + hardware = Hardware(arch, bindings, program) metrics = Metrics(program, hardware, format_) - assert metrics.get_merger_components() == [] + assert metrics.get_eager_evicts("N2") == [] + assert metrics.get_eager_evicts("K2") == [("B", "N0")] + assert metrics.get_eager_evicts("M2") == [("A", "M0"), ("Z", "M0")] - bindings = Bindings.from_str(yaml) - attrs = {"radix": 64, "next_latency": 1} - merger = MergerComponent( - "HighRadixMerger", - attrs, - bindings.get("HighRadixMerger")) - binding = bindings.get("HighRadixMerger")[0].copy() - binding["final_ranks"] = ["M", "N", "K"] +def test_get_eager_write(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - program.reset() - program.add_einsum(1) + assert not metrics.get_eager_write() + + program, arch, bindings, format_ = parse_yamls(build_extensor_yaml()) + hardware = Hardware(arch, bindings, program) metrics = Metrics(program, hardware, format_) - assert metrics.get_merger_components() == [(merger, binding)] + assert metrics.get_eager_write() -def test_get_merger_components_output(): +def test_get_fiber_trace(): yaml = """ einsum: declaration: - Z: [M, N] + Z0: [M] + Z1: [M] + Z2: [M] + Z3: [M] + Z4: [M] + A: [M, K] + B: [M, K] + C: [M, K] + D: [M, K] + E: [M, K] + F: [M, K] + G: [M, K] expressions: - - Z[m, n] = a + - Z0[m] = a + - Z1[m] = A[m, k] + - Z2[m] = A[m, k] * B[m, k] + - Z3[m] = A[m, k] + B[m, k] + - Z4[m] = A[m, k] * B[m, k] * C[m, k] + D[m, k] + E[m, k] * F[m, k] + G[m, k] + architecture: + accel: + - name: empty + bindings: + Z0: + - config: accel + prefix: tmp/Z0 + Z1: + - config: accel + prefix: tmp/Z1 + Z2: + - config: accel + prefix: tmp/Z2 + Z3: + - config: accel + prefix: tmp/Z3 + Z4: + - config: accel + prefix: tmp/Z4 + format: + Z0: + default: + rank-order: [M] + M: + format: C + pbits: 64 + """ + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - mapping: - rank-order: - Z: [M, N] - loop-order: - Z: [N, M] + assert metrics.get_fiber_trace("Z0", "M", True) == "iter" + assert metrics.get_fiber_trace("Z0", "M", False) == "iter" - architecture: - subtree: - - name: System - local: - - name: Merger - class: Merger + program.reset() + program.add_einsum(1) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_fiber_trace("Z1", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z1", "M", False) == "populate_write_0" + assert metrics.get_fiber_trace("A", "M", True) == "populate_1" + assert metrics.get_fiber_trace("A", "K", True) == "iter" + + program.reset() + program.add_einsum(2) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_fiber_trace("Z2", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z2", "M", False) == "populate_write_0" + assert metrics.get_fiber_trace("A", "M", True) == "intersect_2" + assert metrics.get_fiber_trace("A", "K", True) == "intersect_0" + assert metrics.get_fiber_trace("B", "M", True) == "intersect_3" + assert metrics.get_fiber_trace("B", "K", True) == "intersect_1" - - name: Compute - class: compute + program.reset() + program.add_einsum(3) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + assert metrics.get_fiber_trace("Z3", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z3", "M", False) == "populate_write_0" + assert metrics.get_fiber_trace("A", "M", True) == "union_2" + assert metrics.get_fiber_trace("A", "K", True) == "union_0" + assert metrics.get_fiber_trace("B", "M", True) == "union_3" + assert metrics.get_fiber_trace("B", "K", True) == "union_1" + + program.reset() + program.add_einsum(4) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_fiber_trace("Z4", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z4", "M", False) == "populate_write_0" + assert metrics.get_fiber_trace("A", "M", True) == "intersect_4" + assert metrics.get_fiber_trace("A", "K", True) == "intersect_2" + assert metrics.get_fiber_trace("B", "M", True) == "intersect_6" + assert metrics.get_fiber_trace("B", "K", True) == "intersect_4" + assert metrics.get_fiber_trace("C", "M", True) == "intersect_7" + assert metrics.get_fiber_trace("C", "K", True) == "intersect_5" + assert metrics.get_fiber_trace("D", "M", True) == "union_8" + assert metrics.get_fiber_trace("D", "K", True) == "union_6" + assert metrics.get_fiber_trace("E", "M", True) == "intersect_12" + assert metrics.get_fiber_trace("E", "K", True) == "intersect_10" + assert metrics.get_fiber_trace("F", "M", True) == "intersect_13" + assert metrics.get_fiber_trace("F", "K", True) == "intersect_11" + assert metrics.get_fiber_trace("G", "M", True) == "union_11" + assert metrics.get_fiber_trace("G", "K", True) == "union_9" + + +def test_get_fiber_trace_coord_math(): + yaml = """ + einsum: + declaration: + A: [K] + B: [M] + Z0: [M] + Z1: [M] + Z2: [M] + expressions: + - Z0[m] = A[2 * m] + - Z1[m] = A[2 * m] + B[m] + - Z2[m] = A[2 * m] * B[m] + architecture: + accel: + - name: empty bindings: - - name: Merger - bindings: - - tensor: Z - init_ranks: [N, M] - swap_depth: 0 - - - name: Compute - bindings: - - einsum: Z - op: add + Z0: + - config: accel + prefix: tmp/Z0 + Z1: + - config: accel + prefix: tmp/Z1 + Z2: + - config: accel + prefix: tmp/Z2 + format: + Z0: + default: + rank-order: [M] + M: + format: C """ - program, hardware, format_ = build_program_hardware(yaml) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) metrics = Metrics(program, hardware, format_) - bindings = Bindings.from_str(yaml) - merger = MergerComponent("Merger", {}, bindings.get("Merger")) - binding = bindings.get("Merger")[0].copy() - binding["final_ranks"] = ["M", "N"] + assert metrics.get_fiber_trace("A", "K", True) == "populate_1" + assert metrics.get_fiber_trace("Z0", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z0", "M", False) == "populate_write_0" - assert metrics.get_merger_components() == [(merger, binding)] + program.reset() + program.add_einsum(1) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_fiber_trace("A", "K", True) == "union_2" + assert metrics.get_fiber_trace("B", "M", True) == "union_3" + assert metrics.get_fiber_trace("Z1", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z1", "M", False) == "populate_write_0" + + program.reset() + program.add_einsum(2) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + assert metrics.get_fiber_trace("A", "K", True) == "intersect_2" + assert metrics.get_fiber_trace("B", "M", True) == "intersect_3" + assert metrics.get_fiber_trace("Z2", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z2", "M", False) == "populate_write_0" -def test_get_merger_components_part_merge(): +def test_get_fiber_trace_leader_follower_multiple_intersectors(): yaml = """ einsum: declaration: - A: [K, M] Z: [M] + A: [M, K] + B: [M, K] + C: [M, K] + D: [M, K] expressions: - - Z[m] = A[k, m] + - Z[m] = A[m, k] * B[m, k] + C[m, k] * D[m, k] + architecture: + accel: + - name: level0 + local: + - name: LeaderFollower0 + class: Intersector + attributes: + type: leader-follower + - name: LeaderFollower1 + class: Intersector + attributes: + type: leader-follower + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: LeaderFollower0 + bindings: + - rank: M + leader: A + - component: LeaderFollower1 + bindings: + - rank: M + leader: A + format: + Z0: + default: + rank-order: [M] + M: + format: C + pbits: 64 + """ + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) - mapping: - rank-order: - A: [M, K] + with pytest.raises(NotImplementedError): + metrics = Metrics(program, hardware, format_) - partitioning: - Z: - M: [uniform_shape(10)] - loop-order: - Z: [M1, K, M0] +def test_get_fiber_trace_leader_follower_multiple_terms(): + yaml = """ + einsum: + declaration: + Z: [M] + A: [M, K] + B: [M, K] + C: [M, K] + D: [M, K] + expressions: + - Z[m] = A[m, k] * B[m, k] + C[m, k] * D[m, k] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Merger - class: Merger + - name: LeaderFollower + class: Intersector + attributes: + type: leader-follower + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: LeaderFollower + bindings: + - rank: M + leader: A + - rank: K + leader: A + format: + Z0: + default: + rank-order: [M] + M: + format: C + pbits: 64 + """ + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + + with pytest.raises(NotImplementedError): + metrics = Metrics(program, hardware, format_) - - name: Compute - class: compute +def test_get_fiber_trace_leader_follower(): + yaml = """ + einsum: + declaration: + Z: [M] + A: [M, K] + B: [M, K] + C: [M, K] + D: [M, K] + expressions: + - Z[m] = A[m, k] * B[m, k] * C[m, k] * D[m, k] + architecture: + accel: + - name: level0 + local: + - name: LeaderFollower + class: Intersector + attributes: + type: leader-follower bindings: - - name: Merger - bindings: - - tensor: A - init_ranks: [M1, M0, K] - swap_depth: 1 - - - name: Compute - bindings: - - einsum: Z - op: add + Z: + - config: accel + prefix: tmp/Z + - component: LeaderFollower + bindings: + - rank: M + leader: C + - rank: K + leader: B + format: + Z0: + default: + rank-order: [M] + M: + format: C + pbits: 64 """ - program, hardware, format_ = build_program_hardware(yaml) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) metrics = Metrics(program, hardware, format_) - bindings = Bindings.from_str(yaml) - merger = MergerComponent("Merger", {}, bindings.get("Merger")) - binding = bindings.get("Merger")[0].copy() - binding["final_ranks"] = ["M1", "K", "M0"] + assert metrics.get_fiber_trace("Z", "M", True) == "populate_read_0" + assert metrics.get_fiber_trace("Z", "M", False) == "populate_write_0" + assert metrics.get_fiber_trace("C", "M", True) == "intersect_2" + assert metrics.get_fiber_trace("A", "M", True) == "intersect_3" + assert metrics.get_fiber_trace("B", "M", True) == "intersect_4" + assert metrics.get_fiber_trace("D", "M", True) == "intersect_5" - assert metrics.get_merger_components() == [(merger, binding)] + assert metrics.get_fiber_trace("B", "K", True) == "intersect_0" + assert metrics.get_fiber_trace("A", "K", True) == "intersect_1" + assert metrics.get_fiber_trace("C", "K", True) == "intersect_2" + assert metrics.get_fiber_trace("D", "K", True) == "intersect_3" -def test_get_on_chip_buffer_not_in_dram(): - metrics = build_metrics() +def test_get_fiber_trace_get_payload(): + program, arch, bindings, format_ = parse_yamls(build_sigma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - with pytest.raises(ValueError) as excinfo: - metrics.get_on_chip_buffer(Tensor("T", ["M", "K", "N"])) - assert str(excinfo.value) == "Tensor T not stored in DRAM" + assert metrics.get_fiber_trace("B", "K0", True) == "get_payload_B" -def test_get_on_chip_buffer(): - metrics = build_metrics() - bindings = Bindings.from_str(build_gamma_yaml()) +def test_get_format(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - attrs = {"width": 8, "depth": 3145728} - cache = CacheComponent("FiberCache", attrs, bindings.get("FiberCache")) - regs = BuffetComponent("RegFile0", {}, bindings.get("RegFile0")) + assert metrics.get_format() == format_ - assert metrics.get_on_chip_buffer(Tensor("A", ["M", "K"])) == regs - assert metrics.get_on_chip_buffer(Tensor("B", ["K", "N"])) == cache +def test_get_hardware(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) -def test_get_on_chip_rank_not_in_dram(): - metrics = build_metrics() + assert metrics.get_hardware() == hardware - with pytest.raises(ValueError) as excinfo: - metrics.get_on_chip_rank(Tensor("T", ["M", "K", "N"])) - assert str(excinfo.value) == "Tensor T not stored in DRAM" +def test_get_loop_formats(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) -def test_get_on_chip_rank(): - metrics = build_metrics() + assert metrics.get_loop_formats() == {"A": "default", "B": "default"} - assert metrics.get_on_chip_rank(Tensor("A", ["M", "K"])) == "M" - assert metrics.get_on_chip_rank(Tensor("B", ["K", "N"])) == "K" +def test_get_merger_init_ranks_multiple_bindings(): + yaml = """ + einsum: + declaration: + A: [M, N] + Z: [M, N] + expressions: + - Z[m, n] = A[m, n] + architecture: + merger: + - name: mergers + local: + - name: Merger0 + class: Merger + attributes: + inputs: 2 + comparator_radix: 2 + - name: Merger1 + class: Merger + attributes: + inputs: 2 + comparator_radix: 2 + bindings: + Z: + - config: merger + prefix: tmp/Z + - component: Merger0 + bindings: + - tensor: A + init-ranks: [M, N] + final-ranks: [N, M] + - component: Merger1 + bindings: + - tensor: A + init-ranks: [M, N] + final-ranks: [N, M] + format: + A: + default: + rank-order: [N, M] + N: + format: U + M: + format: U + pbits: 32 + """ + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) -def test_in_dram(): - metrics = build_metrics() + with pytest.raises(ValueError) as excinfo: + metrics.get_merger_init_ranks("A", ["N", "M"]) + assert str( + excinfo.value) == "Multiple bindings for merge of tensor A to final rank order ['N', 'M']" - assert metrics.in_dram(Tensor("A", ["M", "K"])) - assert metrics.in_dram(Tensor("B", ["M", "K"])) - assert not metrics.in_dram(Tensor("T", ["M", "K", "N"])) +def test_get_merger_init_ranks(): + program, arch, bindings, format_ = parse_yamls(build_gamma_yaml()) + program.reset() + program.add_einsum(1) -def test_on_chip_stationary(): - metrics = build_metrics() + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - assert metrics.on_chip_stationary(Tensor("A", ["M", "K"])) - assert not metrics.on_chip_stationary(Tensor("B", ["K", "N"])) + assert metrics.get_merger_init_ranks( + "T", [ + "M", "N", "K"]) == [ + "M", "K", "N"] + assert metrics.get_merger_init_ranks( + "T", ["M1", "M0", "K1", "N", "K0"]) is None + assert metrics.get_merger_init_ranks("Z", ["M", "N"]) is None -def test_on_chip_stationary_root_buffered(): +def test_get_source_memory_not_memory(): yaml = """ einsum: declaration: Z: [M] expressions: - Z[m] = a + architecture: + accel: + - name: level0 + local: + - name: LeaderFollower + class: Intersector + attributes: + type: leader-follower + - name: DRAM + class: DRAM + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: DRAM + bindings: + - tensor: Z + rank: M + type: payload + format: default + format: + Z: + default: + rank-order: [M] + M: + format: C + pbits: 64 + """ + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) + + with pytest.raises(ValueError) as excinfo: + metrics.get_source_memory("LeaderFollower", "Z", "M", "payload") + assert str( + excinfo.value) == "Destination component LeaderFollower not a memory" + + +def test_get_source_memory(): + yaml = """ + einsum: + declaration: + A: [K, M] + B: [K, M] + C: [K] + Z: [M] + expressions: + - Z[m] = A[k, m] * B[k, m] * C[k] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Memory + - name: DRAM class: DRAM + attributes: + bandwidth: 512 subtree: - - name: PE + - name: level1 local: - - name: Buffer - class: Buffet - - - name: MAC - class: compute + - name: L2Cache + class: Cache + attributes: + width: 64 + depth: 1024 + bandwidth: 2048 + + subtree: + - name: level2 + local: + - name: L1Cache + class: Cache + attributes: + width: 64 + depth: 128 bindings: - - name: Memory - bindings: - - tensor: Z - rank: root - - - name: Buffer - bindings: - - tensor: Z - rank: root - - - name: MAC - bindings: - - einsum: Z - op: add + Z: + - config: accel + prefix: tmp/Z + - component: DRAM + bindings: + - tensor: A + rank: M + type: payload + format: default + - tensor: A + rank: K + type: coord + format: default + - tensor: A + rank: K + type: payload + format: default + - tensor: Z + rank: M + type: elem + format: default + - component: L2Cache + bindings: + - tensor: A + rank: M + type: payload + format: default + - tensor: A + rank: K + type: coord + format: default + - tensor: A + rank: K + type: payload + format: default + - tensor: Z + rank: M + type: elem + format: default + - component: L1Cache + bindings: + - tensor: A + rank: K + type: coord + format: default + - tensor: A + rank: K + type: payload + format: default + - tensor: B + rank: K + type: payload + format: default + - tensor: Z + rank: M + type: elem + format: default + + format: + A: + default: + rank-order: [M, K] + M: + format: U + pbits: 32 + K: + format: C + cbits: 32 + pbits: 64 + B: + default: + rank-order: [M, K] + M: + format: U + K: + format: U + pbits: 64 + Z: + default: + rank-order: [M] + M: + format: C + cbits: 32 + pbits: 64 """ - metrics = Metrics(*build_program_hardware(yaml)) + program, arch, bindings, format_ = parse_yamls(yaml) + hardware = Hardware(arch, bindings, program) + metrics = Metrics(program, hardware, format_) - assert metrics.on_chip_stationary(Tensor("Z", ["M"])) + assert metrics.get_source_memory("L2Cache", "C", "K", "payload") is None + assert metrics.get_source_memory("L1Cache", "B", "M", "payload") is None + assert metrics.get_source_memory("L2Cache", "B", "K", "payload") is None + assert metrics.get_source_memory("L1Cache", "B", "K", "payload") is None + assert metrics.get_source_memory( + "L2Cache", "A", "M", "payload") == hardware.get_component("DRAM") + assert metrics.get_source_memory( + "L1Cache", "Z", "M", "elem") == hardware.get_component("L2Cache") diff --git a/tests/ir/test_part_node.py b/tests/ir/test_part_nodes.py similarity index 100% rename from tests/ir/test_part_node.py rename to tests/ir/test_part_nodes.py diff --git a/tests/ir/test_partitioning.py b/tests/ir/test_partitioning.py index 02619f7..3e35b11 100644 --- a/tests/ir/test_partitioning.py +++ b/tests/ir/test_partitioning.py @@ -358,12 +358,10 @@ def test_get_final_rank_id(): """ partitioning = build_partitioning(all_parts) - assert partitioning.get_final_rank_id(Tensor("B", ["K", "N"]), "N") == "N2" - assert partitioning.get_final_rank_id( - Tensor("B", ["K", "N"]), "N2") == "N2" - assert partitioning.get_final_rank_id( - Tensor("A", ["K", "M"]), "M1I") == "M1" - assert partitioning.get_final_rank_id(Tensor("B", ["K", "N"]), "K") == "K" + assert partitioning.get_final_rank_id(["K", "N"], "N") == "N2" + assert partitioning.get_final_rank_id(["K", "N"], "N2") == "N2" + assert partitioning.get_final_rank_id(["K", "M"], "M1I") == "M1" + assert partitioning.get_final_rank_id(["K", "N"], "K") == "K" def test_final_rank_id_flattening(): @@ -374,23 +372,15 @@ def test_final_rank_id_flattening(): """ partitioning = build_partitioning(all_parts) - assert partitioning.get_final_rank_id( - Tensor("A", ["K", "M"]), "MK00") == "MK00" - assert partitioning.get_final_rank_id( - Tensor("A", ["K", "M"]), "MK01") == "MK01" - assert partitioning.get_final_rank_id( - Tensor("A", ["K", "M"]), "MK0") == "MK01" - assert partitioning.get_final_rank_id( - Tensor("A", ["K", "M"]), "M") == "MK01" - assert partitioning.get_final_rank_id( - Tensor("Z", ["M", "N"]), "M") == "MK00" - assert partitioning.get_final_rank_id( - Tensor("A", ["K", "M"]), "K0") == "MK01" - assert partitioning.get_final_rank_id( - Tensor("B", ["K", "N"]), "K0") == "MK00" - assert partitioning.get_final_rank_id( - Tensor("B", ["K", "N"]), "K1") == "K1" - assert partitioning.get_final_rank_id(Tensor("B", ["K", "N"]), "N") == "N" + assert partitioning.get_final_rank_id(["K", "M"], "MK00") == "MK00" + assert partitioning.get_final_rank_id(["K", "M"], "MK01") == "MK01" + assert partitioning.get_final_rank_id(["K", "M"], "MK0") == "MK01" + assert partitioning.get_final_rank_id(["K", "M"], "M") == "MK01" + assert partitioning.get_final_rank_id(["M", "N"], "M") == "MK00" + assert partitioning.get_final_rank_id(["K", "M"], "K0") == "MK01" + assert partitioning.get_final_rank_id(["K", "N"], "K0") == "MK00" + assert partitioning.get_final_rank_id(["K", "N"], "K1") == "K1" + assert partitioning.get_final_rank_id(["K", "N"], "N") == "N" def test_get_final_rank_id_conv(): @@ -400,10 +390,10 @@ def test_get_final_rank_id_conv(): """ partitioning = build_partitioning_conv(all_parts) - assert partitioning.get_final_rank_id(Tensor("I", ["W"]), "W") == "W2" - assert partitioning.get_final_rank_id(Tensor("I", ["W"]), "W1I") == "W1" - assert partitioning.get_final_rank_id(Tensor("I", ["W"]), "W0") == "W0" - assert partitioning.get_final_rank_id(Tensor("F", ["S"]), "S") == "S" + assert partitioning.get_final_rank_id(["W"], "W") == "W2" + assert partitioning.get_final_rank_id(["W"], "W1I") == "W1" + assert partitioning.get_final_rank_id(["W"], "W0") == "W0" + assert partitioning.get_final_rank_id(["S"], "S") == "S" def test_get_intermediates(): diff --git a/tests/ir/test_program.py b/tests/ir/test_program.py index 125833d..b66b8d9 100644 --- a/tests/ir/test_program.py +++ b/tests/ir/test_program.py @@ -30,6 +30,22 @@ def create_default(): return Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) +def create_cascade(): + yaml = """ + einsum: + declaration: + Z: [M, N] + A: [K, M] + B: [K, N] + T: [M, N] + C: [M, N] + expressions: + - T[m, n] = A[k, m] * B[k, n] + - Z[m, n] = T[m, n] + C[m, n] + """ + return Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + + def create_loop_ordered(): yaml = """ einsum: @@ -215,6 +231,11 @@ def test_apply_partition_swizzling(): assert A.get_ranks() == ["J", "K1", "N", "M", "K0"] +def test_get_all_einsums(): + program = create_cascade() + assert program.get_all_einsums() == ["T", "Z"] + + def test_get_equation_unconfigured(): program = create_default() diff --git a/tests/ir/test_spacetime.py b/tests/ir/test_spacetime.py index 25e559c..ae99d9b 100644 --- a/tests/ir/test_spacetime.py +++ b/tests/ir/test_spacetime.py @@ -1,10 +1,34 @@ import pytest from sympy import symbols +from teaal.ir.coord_math import CoordMath from teaal.ir.partitioning import Partitioning from teaal.ir.spacetime import SpaceTime +from teaal.ir.tensor import Tensor from teaal.parse.mapping import Mapping from teaal.parse.spacetime import SpaceTimeParser +from tests.utils.parse_tree import * + + +def parse_mapping(parts, spacetime): + yaml = """ + mapping: + partitioning: + Z:""" + parts + """ + spacetime: + Z:""" + spacetime + return Mapping.from_str(yaml) + + +def build_partitioning(mapping): + dict_ = mapping.get_partitioning()["Z"] + + coord_math = CoordMath() + tensor = Tensor("T", ["J", "M", "N", "K"]) + ranks = make_ranks(["j", "m", "n", "k"]) + coord_math.add(tensor, ranks) + + return Partitioning(dict_, ["J", "M", "N", "K"], coord_math) def create_yaml(space, time, opt=None): @@ -24,8 +48,8 @@ def create_yaml(space, time, opt=None): def create_eqn_exprs(): - k, m, n = symbols("k m n") - return {k: k, m: m, n: n} + j, k, m, n = symbols("j k m n") + return {j: j, k: k, m: m, n: n} def test_bad_space(): @@ -148,6 +172,27 @@ def test_get_style(): assert spacetime.get_style("N") == "coord" +def test_get_style_flattening(): + parts = """ + (M, K): [flatten()] + """ + sptm = """ + space: [MK.pos] + time: [J, N] + """ + mapping = parse_mapping(parts, sptm) + partitioning = build_partitioning(mapping) + eqn_exprs = create_eqn_exprs() + spacetime = SpaceTime( + mapping.get_spacetime()["Z"], + partitioning, + eqn_exprs) + + assert spacetime.get_style("MK") == "pos" + assert spacetime.get_style("M") == "pos" + assert spacetime.get_style("K") == "pos" + + def test_get_time(): yaml = create_yaml(["M"], ["K", "N"]) eqn_exprs = create_eqn_exprs() diff --git a/tests/ir/test_tensor.py b/tests/ir/test_tensor.py index 7f5568c..9b9aac1 100644 --- a/tests/ir/test_tensor.py +++ b/tests/ir/test_tensor.py @@ -91,6 +91,11 @@ def test_peek_empty(): assert tensor.peek() is None +def test_peek_clean(): + tensor = Tensor("A", ["I", "J"]) + assert tensor.peek_clean() == "I" + + def test_peek_rest(): tensor = Tensor("A", ["K1", "M", "K0"]) assert tensor.peek_rest() == ["K1", "M", "K0"] diff --git a/tests/parse/test_arch.py b/tests/parse/test_arch.py index 2707296..59f5cee 100644 --- a/tests/parse/test_arch.py +++ b/tests/parse/test_arch.py @@ -31,7 +31,7 @@ def test_bad_architecture(): def test_unnamed_subtree(): yaml = """ architecture: - subtree: + Config0: - attributes: foo: 1 """ @@ -44,7 +44,7 @@ def test_unnamed_subtree(): def test_unnamed_local(): yaml = """ architecture: - subtree: + Config0: - name: System local: - class: DRAM @@ -58,7 +58,7 @@ def test_unnamed_local(): def test_unclassed_local(): yaml = """ architecture: - subtree: + Config0: - name: System local: - name: Memory @@ -105,14 +105,19 @@ def test_unspecified(): def test_all_spec(): regs = build_local("Registers", "Buffet", {}) - mac = build_local("MAC", "compute", {}) - subtree = build_subtree("PE", 8, {}, [regs, mac], []) + mac = build_local("MAC", "compute", {"type": "mul"}) + subtree0 = build_subtree("PE", 8, {}, [regs, mac], []) + + mac0 = build_local("MAC0", "compute", {"type": "mul"}) + mac1 = build_local("MAC1", "compute", {"type": "add"}) + subtree1 = build_subtree("PE", 8, {}, [regs, mac0, mac1], []) mem = build_local("Memory", "DRAM", {"datawidth": 8, "bandwidth": 128}) attrs = {"clock_frequency": 10 ** 9} - tree = build_subtree("System", 1, attrs, [mem], [subtree]) + tree0 = build_subtree("System", 1, attrs, [mem], [subtree0]) + tree1 = build_subtree("System", 1, attrs, [mem], [subtree1]) arch = Architecture.from_file("tests/integration/test_arch.yaml") - spec = {"architecture": {"subtree": [tree]}} + spec = {"architecture": {"Config0": [tree0], "Config1": [tree1]}} assert arch.get_spec() == spec diff --git a/tests/parse/test_bindings.py b/tests/parse/test_bindings.py index dfd650c..0341bfe 100644 --- a/tests/parse/test_bindings.py +++ b/tests/parse/test_bindings.py @@ -1,9 +1,11 @@ +import pytest + from teaal.parse.bindings import Bindings def test_empty(): bindings = Bindings.from_str("") - assert bindings.get("BAD") == [] + assert bindings.get_component("BAD") == {} def test_no_bindings(): @@ -12,16 +14,51 @@ def test_no_bindings(): - bar - baz """ - assert Bindings.from_str(yaml).get("BAD") == [] + assert Bindings.from_str(yaml).get_component("BAD") == {} + + +def test_no_config(): + yaml = """ + bindings: + Z: + - component: foo + bindings: + - tensor: bar + """ + with pytest.raises(ValueError) as excinfo: + Bindings.from_str(yaml) + assert str( + excinfo.value) == "Accelerator config and prefix missing for Einsum Z" def test_defined(): bindings = Bindings.from_file("tests/integration/test_bindings.yaml") - mem = [{"tensor": "A", "rank": "root"}, {"tensor": "Z", "rank": "root"}] - regs = [{"tensor": "A", "rank": "M"}, {"tensor": "Z", "rank": "M"}] - mac = [{"einsum": "Z", "op": "add"}] - - assert bindings.get("Memory") == mem - assert bindings.get("Registers") == regs - assert bindings.get("MAC") == mac - assert bindings.get("BAD") == [] + mem = {"Z": [{"tensor": "A", "rank": "M", "format": "A_default", "type": "payload"}, { + "tensor": "Z", "rank": "M", "type": "payload", "format": "Z_default"}]} + regs = {"Z": [{"tensor": "A", + "rank": "M", + "format": "A_default", + "type": "payload", + "style": "eager", + "evict-on": "M"}, + {"tensor": "Z", + "rank": "M", + "format": "Z_default", + "rank": "M", + "type": "payload", + "evict-on": "root"}]} + mac = {"Z": [{"op": "add"}]} + + assert bindings.get_config("Z") == "Config0" + assert bindings.get_prefix("Z") == "tmp/Z" + + assert bindings.get_component("Memory") == mem + assert bindings.get_component("Registers") == regs + assert bindings.get_component("MAC") == mac + assert bindings.get_component("BAD") == {} + + assert bindings.get_bindings() == { + "Z": { + "Memory": mem["Z"], + "Registers": regs["Z"], + "MAC": mac["Z"]}} diff --git a/tests/parse/test_format.py b/tests/parse/test_format.py index 80cf3ba..1e83db2 100644 --- a/tests/parse/test_format.py +++ b/tests/parse/test_format.py @@ -7,14 +7,24 @@ def build_format(): yaml = """ format: A: - M: - format: U - rhbits: 32 - pbits: 32 - K: - format: C - cbits: 32 - pbits: 64 + init: + rank-order: [M, K] + M: + format: U + rhbits: 32 + pbits: 32 + K: + format: C + cbits: 32 + pbits: 64 + loop: + rank-order: [K, M] + K: + format: C + M: + format: C + cbits: 32 + pbits: 64 """ return Format.from_str(yaml) @@ -27,25 +37,42 @@ def test_no_format(): Format.from_file("tests/integration/test_arch.yaml") -def test_missing_tensor(): - format_ = build_format() +def test_missing_rank_order(): + yaml = """ + format: + A: + BAD: + M: + format: C + pbits: 32 + """ with pytest.raises(ValueError) as excinfo: - format_.get_spec("B") + Format.from_str(yaml) assert str( - excinfo.value) == "Format unspecified for tensor B" + excinfo.value) == "Rank order not specified for tensor A in format BAD" def test_format(): format_ = build_format() spec = { - "M": { - "format": "U", - "rhbits": 32, - "pbits": 32}, - "K": { - "format": "C", - "cbits": 32, - "pbits": 64}} + "init": { + "rank-order": ["M", "K"], + "M": { + "format": "U", + "rhbits": 32, + "pbits": 32}, + "K": { + "format": "C", + "cbits": 32, + "pbits": 64}}, + "loop": { + "rank-order": ["K", "M"], + "K": {"format": "C"}, + "M": { + "format": "C", + "cbits": 32, + "pbits": 64}}} assert format_.get_spec("A") == spec + assert format_.get_spec("B") == {} diff --git a/tests/trans/test_collector.py b/tests/trans/test_collector.py index 43d0cc4..42608c2 100644 --- a/tests/trans/test_collector.py +++ b/tests/trans/test_collector.py @@ -1,5 +1,6 @@ import pytest +from teaal.ir.fusion import Fusion from teaal.ir.hardware import Hardware from teaal.ir.metrics import Metrics from teaal.ir.program import Program @@ -7,11 +8,31 @@ from teaal.trans.collector import Collector +def build_extensor_yaml(): + with open("tests/integration/extensor.yaml", "r") as f: + return f.read() + + +def build_extensor_energy_yaml(): + with open("tests/integration/extensor-energy.yaml", "r") as f: + return f.read() + + def build_gamma_yaml(): with open("tests/integration/gamma.yaml", "r") as f: return f.read() +def build_outerspace_yaml(): + with open("tests/integration/outerspace.yaml", "r") as f: + return f.read() + + +def build_sigma_yaml(): + with open("tests/integration/sigma.yaml", "r") as f: + return f.read() + + def build_collector(yaml, i): einsum = Einsum.from_str(yaml) mapping = Mapping.from_str(yaml) @@ -19,281 +40,1049 @@ def build_collector(yaml, i): arch = Architecture.from_str(yaml) bindings = Bindings.from_str(yaml) - hardware = Hardware(arch, bindings) + hardware = Hardware(arch, bindings, program) format_ = Format.from_str(yaml) program.add_einsum(i) metrics = Metrics(program, hardware, format_) - return Collector(program, metrics) + fusion = Fusion(hardware) + fusion.add_einsum(program) + return Collector(program, metrics, fusion) -def test_dump(): - yaml = build_gamma_yaml() - collector = build_collector(yaml, 0) - hifiber = "metrics = {}\n" + \ - "metrics[\"T\"] = {}\n" + \ - "metrics[\"T\"][\"T footprint\"] = 0\n" + \ - "metrics[\"T\"][\"T traffic\"] = 0\n" + \ - "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"T\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \ - "metrics[\"T\"][\"A traffic\"] = metrics[\"T\"][\"A footprint\"]\n" + \ - "B_KN_format = Format(B_KN, {\"K\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"T\"][\"B footprint\"] = B_KN_format.getTensor()\n" + \ - "metrics[\"T\"][\"B traffic\"] = Traffic.cacheTraffic(B_KN, \"K\", B_KN_format, 25165824) + B_KN_format.getRank(\"K\")\n" + \ - "metrics[\"T\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 0)" +def add_einsum(collector, i): + program = collector.program + hardware = collector.metrics.hardware + format_ = collector.metrics.format + fusion = collector.fusion - assert collector.dump().gen(0) == hifiber + program.reset() + program.add_einsum(i) + metrics = Metrics(program, hardware, format_) + fusion.add_einsum(program) + return Collector(program, metrics, fusion) - collector = build_collector(yaml, 1) - hifiber = "metrics[\"Z\"] = {}\n" + \ - "Z_MN_format = Format(Z_MN, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"Z\"][\"Z footprint\"] = Z_MN_format.getTensor()\n" + \ - "metrics[\"Z\"][\"Z traffic\"] = metrics[\"Z\"][\"Z footprint\"]\n" + \ - "metrics[\"Z\"][\"T footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"T traffic\"] = 0\n" + \ - "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"Z\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \ - "metrics[\"Z\"][\"A traffic\"] = metrics[\"Z\"][\"A footprint\"]\n" + \ - "metrics[\"Z\"][\"mul\"] = Compute.opCount(Metrics.dump(), \"mul\")\n" + \ - "metrics[\"Z\"][\"add\"] = Compute.opCount(Metrics.dump(), \"add\")\n" + \ - "metrics[\"Z\"][\"T_MKN merge ops\"] = Compute.swapCount(T_MKN, 1, 64, 1)" - assert collector.dump().gen(0) == hifiber +def check_hifiber_lines(gen_lines, corr_lines): + gen_set = set(gen_lines) + corr_set = set(corr_lines) + print("In generated") + for line in gen_lines: + if line not in corr_set: + print(line) -def test_dump_buffet(): + print("In corr") + for line in corr_lines: + if line not in gen_set: + print(line) + + assert gen_set == corr_set + + +def test_create_component_unknown(): yaml = """ einsum: declaration: - A: [M] - Z: [M, N] + Z: [] + A: [K] + B: [K] expressions: - - Z[m, n] = A[m] - + - Z[] = A[k] * B[k] mapping: - loop-order: - Z: [N, M] - + spacetime: + Z: + space: [] + time: [K] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Memory + - name: DRAM class: DRAM + bindings: + Z: + - config: accel + prefix: tmp/Z + # TODO: Allow the format to be empty + format: + Z: + default: + rank-order: [] + """ + collector = build_collector(yaml, 0) - subtree: - - name: PE + dram = collector.metrics.get_hardware().get_component("DRAM") + with pytest.raises(ValueError) as excinfo: + collector.create_component(dram, "K") + assert str( + excinfo.value) == "Unable to create consumable metrics component for DRAM of type DRAMComponent" - local: - - name: RegFile - class: Buffet - - name: Compute - class: Compute +def test_create_component(): + yaml = """ + einsum: + declaration: + Z: [] + A: [I, J, K] + B: [I, J, K] + expressions: + - Z[] = A[i, j, k] * B[i, j, k] + mapping: + spacetime: + Z: + space: [] + time: [I, J, K] + architecture: + accel: + - name: level0 + local: + - name: LF + class: Intersector + attributes: + type: leader-follower + - name: SA + class: Intersector + attributes: + type: skip-ahead + - name: TF + class: Intersector + attributes: + type: two-finger + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: LF + bindings: + - rank: I + leader: A + - component: SA + bindings: + - rank: J + - component: TF + bindings: + - rank: K + # TODO: Allow the format to be empty + format: + Z: + default: + rank-order: [] + """ + collector = build_collector(yaml, 0) + get_comp = collector.metrics.get_hardware().get_component + + assert collector.create_component(get_comp("LF"), "I").gen( + 0) == "LF_I = LeaderFollowerIntersector()" + assert collector.create_component(get_comp("SA"), "J").gen( + 0) == "SA_J = SkipAheadIntersector()" + assert collector.create_component(get_comp("TF"), "K").gen( + 0) == "TF_K = TwoFingerIntersector()" + +def test_consume_traces_unknown(): + yaml = """ + einsum: + declaration: + Z: [] + A: [K] + B: [K] + expressions: + - Z[] = A[k] * B[k] + mapping: + spacetime: + Z: + space: [] + time: [K] + architecture: + accel: + - name: level0 + local: + - name: DRAM + class: DRAM bindings: - - name: Memory - bindings: - - tensor: A - rank: root + Z: + - config: accel + prefix: tmp/Z + # TODO: Allow the format to be empty + format: + Z: + default: + rank-order: [] + """ + collector = build_collector(yaml, 0) - - name: RegFile - bindings: - - tensor: A - rank: M + with pytest.raises(ValueError) as excinfo: + collector.consume_traces("DRAM", "K") + assert str( + excinfo.value) == "Unable to consume traces for component DRAM of type DRAMComponent" - - name: Compute - bindings: - - einsum: Z - op: add +def test_consume_traces(): + yaml = """ + einsum: + declaration: + Z: [] + A: [I, J, K] + B: [I, J, K] + expressions: + - Z[] = A[i, j, k] * B[i, j, k] + mapping: + spacetime: + Z: + space: [] + time: [I, J, K] + architecture: + accel: + - name: level0 + local: + - name: LF + class: Intersector + attributes: + type: leader-follower + - name: SA + class: Intersector + attributes: + type: skip-ahead + - name: TF + class: Intersector + attributes: + type: two-finger + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: LF + bindings: + - rank: I + leader: A + - component: SA + bindings: + - rank: J + - component: TF + bindings: + - rank: K + # TODO: Allow the format to be empty format: - A: - M: - format: C - cbits: 32 - pbits: 64 + Z: + default: + rank-order: [] """ collector = build_collector(yaml, 0) + + assert collector.consume_traces("LF", "I").gen( + 0) == "LF_I.addTraces(Metrics.consumeTrace(\"I\", \"intersect_0\"))" + assert collector.consume_traces("SA", "J").gen( + 0) == "SA_J.addTraces(Metrics.consumeTrace(\"J\", \"intersect_0\"), Metrics.consumeTrace(\"J\", \"intersect_1\"))" + assert collector.consume_traces("TF", "K").gen( + 0) == "TF_K.addTraces(Metrics.consumeTrace(\"K\", \"intersect_0\"), Metrics.consumeTrace(\"K\", \"intersect_1\"))" + + +def test_dump_gamma_T(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + + hifiber = "metrics = {}\n" + \ + "metrics[\"T\"] = {}\n" + \ + "formats = {\"A\": Format(A_MK, {\"rank-order\": [\"M\", \"K\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"B\": Format(B_KN, {\"rank-order\": [\"K\", \"N\"], \"K\": {\"format\": \"U\", \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \ + "bindings = [{\"tensor\": \"B\", \"rank\": \"K\", \"type\": \"payload\", \"format\": \"default\"}, {\"tensor\": \"B\", \"rank\": \"N\", \"type\": \"coord\", \"format\": \"default\"}, {\"tensor\": \"B\", \"rank\": \"N\", \"type\": \"payload\", \"format\": \"default\"}]\n" + \ + "Traffic.filterTrace(\"tmp/gamma_T-K-intersect_3.csv\", \"tmp/gamma_T-K-iter.csv\", \"tmp/gamma_T-K-intersect_3_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/gamma_T-N-populate_1.csv\", \"tmp/gamma_T-N-iter.csv\", \"tmp/gamma_T-N-populate_1_payload.csv\")\n" + \ + "traces = {(\"B\", \"K\", \"payload\", \"read\"): \"tmp/gamma_T-K-intersect_3_payload.csv\", (\"B\", \"N\", \"coord\", \"read\"): \"tmp/gamma_T-N-populate_1.csv\", (\"B\", \"N\", \"payload\", \"read\"): \"tmp/gamma_T-N-populate_1_payload.csv\"}\n" + \ + "traffic = Traffic.cacheTraffic(bindings, formats, traces, 25165824, 64)\n" + \ + "metrics[\"T\"][\"MainMemory\"] = {}\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"B\"] = {}\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"B\"][\"read\"] = 0\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \ + "Traffic.filterTrace(\"tmp/gamma_T-M-populate_1.csv\", \"tmp/gamma_T-M-iter.csv\", \"tmp/gamma_T-M-populate_1_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/gamma_T-K-intersect_2.csv\", \"tmp/gamma_T-K-iter.csv\", \"tmp/gamma_T-K-intersect_2_payload.csv\")\n" + \ + "traces = {(\"A\", \"M\", \"payload\", \"read\"): \"tmp/gamma_T-M-populate_1_payload.csv\", (\"A\", \"K\", \"coord\", \"read\"): \"tmp/gamma_T-K-intersect_2.csv\", (\"A\", \"K\", \"payload\", \"read\"): \"tmp/gamma_T-K-intersect_2_payload.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, float(\"inf\"), 64)\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"A\"] = {}\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"A\"][\"read\"] = 0\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \ + "metrics[\"T\"][\"MainMemory\"][\"time\"] = (metrics[\"T\"][\"MainMemory\"][\"A\"][\"read\"] + metrics[\"T\"][\"MainMemory\"][\"B\"][\"read\"]) / 1099511627776\n" + \ + "metrics[\"T\"][\"Intersect\"] = 0\n" + \ + "metrics[\"T\"][\"Intersect\"] += Intersect_K.getNumIntersects()\n" + \ + "metrics[\"T\"][\"Intersect\"][\"time\"] = metrics[\"T\"][\"Intersect\"] / 32000000000" + + assert collector.dump().gen(0) == hifiber + + +def test_dump_gamma_Z(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + collector.dump() + + collector = add_einsum(collector, 1) + + hifiber = "metrics[\"Z\"] = {}\n" + \ + "formats = {\"Z\": Format(Z_MN, {\"rank-order\": [\"M\", \"N\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"A\": Format(A_MK, {\"rank-order\": [\"M\", \"K\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \ + "Traffic.filterTrace(\"tmp/gamma_Z-M-intersect_3.csv\", \"tmp/gamma_Z-M-iter.csv\", \"tmp/gamma_Z-M-intersect_3_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/gamma_Z-K-intersect_1.csv\", \"tmp/gamma_Z-K-iter.csv\", \"tmp/gamma_Z-K-intersect_1_payload.csv\")\n" + \ + "traces = {(\"A\", \"M\", \"payload\", \"read\"): \"tmp/gamma_Z-M-intersect_3_payload.csv\", (\"A\", \"K\", \"coord\", \"read\"): \"tmp/gamma_Z-K-intersect_1.csv\", (\"A\", \"K\", \"payload\", \"read\"): \"tmp/gamma_Z-K-intersect_1_payload.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, float(\"inf\"), 64)\n" + \ + "bindings = [{\"tensor\": \"Z\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \ + "Traffic.filterTrace(\"tmp/gamma_Z-M-populate_read_0.csv\", \"tmp/gamma_Z-M-iter.csv\", \"tmp/gamma_Z-M-populate_read_0_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/gamma_Z-M-populate_write_0.csv\", \"tmp/gamma_Z-M-iter.csv\", \"tmp/gamma_Z-M-populate_write_0_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/gamma_Z-N-populate_read_0.csv\", \"tmp/gamma_Z-N-iter.csv\", \"tmp/gamma_Z-N-populate_read_0_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/gamma_Z-N-populate_write_0.csv\", \"tmp/gamma_Z-N-iter.csv\", \"tmp/gamma_Z-N-populate_write_0_payload.csv\")\n" + \ + "traces = {(\"Z\", \"M\", \"payload\", \"read\"): \"tmp/gamma_Z-M-populate_read_0_payload.csv\", (\"Z\", \"M\", \"payload\", \"write\"): \"tmp/gamma_Z-M-populate_write_0_payload.csv\", (\"Z\", \"N\", \"coord\", \"read\"): \"tmp/gamma_Z-N-populate_read_0.csv\", (\"Z\", \"N\", \"coord\", \"write\"): \"tmp/gamma_Z-N-populate_write_0.csv\", (\"Z\", \"N\", \"payload\", \"read\"): \"tmp/gamma_Z-N-populate_read_0_payload.csv\", (\"Z\", \"N\", \"payload\", \"write\"): \"tmp/gamma_Z-N-populate_write_0_payload.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, float(\"inf\"), 64)\n" + \ + "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 1099511627776\n" + \ + "metrics[\"Z\"][\"HighRadixMerger\"] = {}\n" + \ + "metrics[\"Z\"][\"HighRadixMerger\"][\"T_MKN\"] = Compute.numSwaps(T_MKN, 1, 64, 1)\n" + \ + "metrics[\"Z\"][\"HighRadixMerger\"][\"time\"] = metrics[\"Z\"][\"HighRadixMerger\"][T_MKN] / 32000000000\n" + \ + "metrics[\"Z\"][\"FPMul\"] = {}\n" + \ + "metrics[\"Z\"][\"FPMul\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \ + "metrics[\"Z\"][\"FPMul\"][\"time\"] = metrics[\"Z\"][\"FPMul\"][\"mul\"] / 32000000000\n" + \ + "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 32000000000\n" + \ + "metrics[\"blocks\"] = [[\"T\", \"Z\"]]\n" + \ + "metrics[\"time\"] = max(metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"Z\"][\"FPMul\"][\"time\"], metrics[\"Z\"][\"HighRadixMerger\"][\"time\"], metrics[\"T\"][\"Intersect\"][\"time\"], metrics[\"T\"][\"MainMemory\"][\"time\"] + metrics[\"Z\"][\"MainMemory\"][\"time\"])" + + # print(collector.dump().gen(0)) + # assert False + + assert collector.dump().gen(0) == hifiber + + +def test_dump_outerspace_Z(): + yaml = build_outerspace_yaml() + collector = build_collector(yaml, 0) + collector.dump() + + collector = add_einsum(collector, 1) + collector.dump() + + collector = add_einsum(collector, 2) + + hifiber = "metrics[\"Z\"] = {}\n" + \ + "formats = {\"Z\": Format(Z_MN, {\"rank-order\": [\"M\", \"N\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \ + "bindings = [{\"tensor\": \"Z\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \ + "Traffic.filterTrace(\"tmp/outerspace_Z-M-populate_read_0.csv\", \"tmp/outerspace_Z-M-iter.csv\", \"tmp/outerspace_Z-M-populate_read_0_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/outerspace_Z-M-populate_write_0.csv\", \"tmp/outerspace_Z-M-iter.csv\", \"tmp/outerspace_Z-M-populate_write_0_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/outerspace_Z-N-populate_read_0.csv\", \"tmp/outerspace_Z-N-iter.csv\", \"tmp/outerspace_Z-N-populate_read_0_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/outerspace_Z-N-populate_write_0.csv\", \"tmp/outerspace_Z-N-iter.csv\", \"tmp/outerspace_Z-N-populate_write_0_payload.csv\")\n" + \ + "traces = {(\"Z\", \"M\", \"payload\", \"read\"): \"tmp/outerspace_Z-M-populate_read_0_payload.csv\", (\"Z\", \"M\", \"payload\", \"write\"): \"tmp/outerspace_Z-M-populate_write_0_payload.csv\", (\"Z\", \"N\", \"coord\", \"read\"): \"tmp/outerspace_Z-N-populate_read_0.csv\", (\"Z\", \"N\", \"coord\", \"write\"): \"tmp/outerspace_Z-N-populate_write_0.csv\", (\"Z\", \"N\", \"payload\", \"read\"): \"tmp/outerspace_Z-N-populate_read_0_payload.csv\", (\"Z\", \"N\", \"payload\", \"write\"): \"tmp/outerspace_Z-N-populate_write_0_payload.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 8192, 64)\n" + \ + "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 1099511627776\n" + \ + "metrics[\"Z\"][\"SortHW\"] = {}\n" + \ + "metrics[\"Z\"][\"SortHW\"][\"T1_MKN\"] = Compute.numSwaps(T1_MKN, 1, float(\"inf\"), \"N\")\n" + \ + "metrics[\"Z\"][\"SortHW\"][\"time\"] = metrics[\"Z\"][\"SortHW\"][T1_MKN] / 193500000000\n" + \ + "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 193500000000\n" + \ + "metrics[\"blocks\"] = [[\"T0\"], [\"T1\", \"Z\"]]\n" + \ + "metrics[\"time\"] = max(metrics[\"T0\"][\"FPMul\"][\"time\"], metrics[\"T0\"][\"MainMemory\"][\"time\"]) + max(metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"T1\"][\"MainMemory\"][\"time\"] + metrics[\"Z\"][\"MainMemory\"][\"time\"], metrics[\"Z\"][\"SortHW\"][\"time\"])" + + assert collector.dump().gen(0) == hifiber + + +def test_dump_extensor(): + yaml = build_extensor_yaml() + collector = build_collector(yaml, 0) + + hifiber = "metrics = {}\n" + \ + "metrics[\"Z\"] = {}\n" + \ + "formats = {\"Z\": Format(Z_N2M2M1N1M0N0, {\"rank-order\": [\"N2\", \"M2\", \"M1\", \"N1\", \"M0\", \"N0\"], \"N2\": {\"format\": \"U\"}, \"M2\": {\"format\": \"U\"}, \"M1\": {\"format\": \"U\"}, \"N1\": {\"format\": \"U\"}, \"M0\": {\"format\": \"U\"}, \"N0\": {\"format\": \"C\", \"cbits\": 64, \"pbits\": 64}}), \"A\": Format(A_K2M2M1K1M0K0, {\"rank-order\": [\"K2\", \"M2\", \"M1\", \"K1\", \"M0\", \"K0\"], \"K2\": {\"format\": \"C\"}, \"M2\": {\"format\": \"C\"}, \"M1\": {\"format\": \"C\"}, \"K1\": {\"format\": \"C\", \"cbits\": 64}, \"M0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"B\": Format(B_N2K2N1K1N0K0, {\"rank-order\": [\"N2\", \"K2\", \"N1\", \"K1\", \"N0\", \"K0\"], \"N2\": {\"format\": \"C\"}, \"K2\": {\"format\": \"C\"}, \"N1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"N0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"M0\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"M0\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N0\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"N0\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"coord\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"M0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"payload\"}]\n" + \ + "Traffic.filterTrace(\"tmp/extensor-N1-populate_1.csv\", \"tmp/extensor-N1-iter.csv\", \"tmp/extensor-N1-populate_1_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/extensor-K1-intersect_1.csv\", \"tmp/extensor-K1-iter.csv\", \"tmp/extensor-K1-intersect_1_payload.csv\")\n" + \ + "traces = {(\"A\", \"K1\", \"coord\", \"read\"): \"tmp/extensor-K1-intersect_0.csv\", (\"A\", \"M0\", \"coord\", \"read\"): \"tmp/extensor-M0-eager_a_m0_read.csv\", (\"B\", \"N1\", \"coord\", \"read\"): \"tmp/extensor-N1-populate_1.csv\", (\"B\", \"N1\", \"payload\", \"read\"): \"tmp/extensor-N1-populate_1_payload.csv\", (\"B\", \"K1\", \"coord\", \"read\"): \"tmp/extensor-K1-intersect_1.csv\", (\"B\", \"K1\", \"payload\", \"read\"): \"tmp/extensor-K1-intersect_1_payload.csv\", (\"B\", \"N0\", \"coord\", \"read\"): \"tmp/extensor-N0-eager_b_n0_read.csv\", (\"Z\", \"N0\", \"coord\", \"read\"): \"tmp/extensor-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"coord\", \"write\"): \"tmp/extensor-N0-eager_z_m0_write.csv\", (\"Z\", \"N0\", \"payload\", \"read\"): \"tmp/extensor-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"payload\", \"write\"): \"tmp/extensor-N0-eager_z_m0_write.csv\", (\"A\", \"M0\", \"payload\", \"read\"): \"tmp/extensor-M0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"coord\", \"read\"): \"tmp/extensor-K0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"payload\", \"read\"): \"tmp/extensor-K0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"payload\", \"read\"): \"tmp/extensor-N0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"coord\", \"read\"): \"tmp/extensor-K0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/extensor-K0-eager_b_n0_read.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 251658240, 64)\n" + \ + "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"A\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"B\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 586314575512\n" + \ + "metrics[\"Z\"][\"FPMul\"] = {}\n" + \ + "metrics[\"Z\"][\"FPMul\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \ + "metrics[\"Z\"][\"FPMul\"][\"time\"] = metrics[\"Z\"][\"FPMul\"][\"mul\"] / 128000000000\n" + \ + "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 128000000000\n" + \ + "metrics[\"Z\"][\"K2Intersect\"] = 0\n" + \ + "metrics[\"Z\"][\"K2Intersect\"] += K2Intersect_K2.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"K2Intersect\"][\"time\"] = metrics[\"Z\"][\"K2Intersect\"] / 1000000000\n" + \ + "metrics[\"Z\"][\"K1Intersect\"] = 0\n" + \ + "metrics[\"Z\"][\"K1Intersect\"] += K1Intersect_K1.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"K1Intersect\"][\"time\"] = metrics[\"Z\"][\"K1Intersect\"] / 1000000000\n" + \ + "metrics[\"Z\"][\"K0Intersection\"] = 0\n" + \ + "metrics[\"Z\"][\"K0Intersection\"] += K0Intersection_K0.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"K0Intersection\"][\"time\"] = metrics[\"Z\"][\"K0Intersection\"] / 128000000000\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = max(metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"Z\"][\"FPMul\"][\"time\"], metrics[\"Z\"][\"K0Intersection\"][\"time\"], metrics[\"Z\"][\"K1Intersect\"][\"time\"], metrics[\"Z\"][\"K2Intersect\"][\"time\"], metrics[\"Z\"][\"MainMemory\"][\"time\"])" + + assert collector.dump().gen(0) == hifiber + + +def test_dump_extensor_energy(): + yaml = build_extensor_energy_yaml() + collector = build_collector(yaml, 0) + + hifiber = "metrics = {}\n" + \ + "metrics[\"Z\"] = {}\n" + \ + "formats = {\"Z\": Format(Z_N2M2M1N1M0N0, {\"rank-order\": [\"N2\", \"M2\", \"M1\", \"N1\", \"M0\", \"N0\"], \"N2\": {\"format\": \"U\"}, \"M2\": {\"format\": \"U\"}, \"M1\": {\"format\": \"U\"}, \"N1\": {\"format\": \"U\"}, \"M0\": {\"format\": \"U\"}, \"N0\": {\"format\": \"C\", \"cbits\": 64, \"pbits\": 64}}), \"A\": Format(A_K2M2M1K1M0K0, {\"rank-order\": [\"K2\", \"M2\", \"M1\", \"K1\", \"M0\", \"K0\"], \"K2\": {\"format\": \"C\"}, \"M2\": {\"format\": \"C\"}, \"M1\": {\"format\": \"C\"}, \"K1\": {\"format\": \"C\", \"cbits\": 64}, \"M0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"B\": Format(B_N2K2N1K1N0K0, {\"rank-order\": [\"N2\", \"K2\", \"N1\", \"K1\", \"N0\", \"K0\"], \"N2\": {\"format\": \"C\"}, \"K2\": {\"format\": \"C\"}, \"N1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"N0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"M0\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"M0\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N0\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"N0\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"coord\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"M0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"payload\"}]\n" + \ + "Traffic.filterTrace(\"tmp/extensor_energy-N1-populate_1.csv\", \"tmp/extensor_energy-N1-iter.csv\", \"tmp/extensor_energy-N1-populate_1_payload.csv\")\n" + \ + "Traffic.filterTrace(\"tmp/extensor_energy-K1-intersect_1.csv\", \"tmp/extensor_energy-K1-iter.csv\", \"tmp/extensor_energy-K1-intersect_1_payload.csv\")\n" + \ + "traces = {(\"A\", \"K1\", \"coord\", \"read\"): \"tmp/extensor_energy-K1-intersect_0.csv\", (\"A\", \"M0\", \"coord\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"B\", \"N1\", \"coord\", \"read\"): \"tmp/extensor_energy-N1-populate_1.csv\", (\"B\", \"N1\", \"payload\", \"read\"): \"tmp/extensor_energy-N1-populate_1_payload.csv\", (\"B\", \"K1\", \"coord\", \"read\"): \"tmp/extensor_energy-K1-intersect_1.csv\", (\"B\", \"K1\", \"payload\", \"read\"): \"tmp/extensor_energy-K1-intersect_1_payload.csv\", (\"B\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"Z\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"coord\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"Z\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"payload\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"A\", \"M0\", \"payload\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 251658240, 64)\n" + \ + "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"A\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"B\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"M0\", \"type\": \"coord\", \"evict-on\": \"K1\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"M0\"}, {\"tensor\": \"B\", \"rank\": \"N0\", \"type\": \"coord\", \"evict-on\": \"K1\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"N0\"}, {\"tensor\": \"Z\", \"evict-on\": \"N1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"coord\"}, {\"tensor\": \"Z\", \"evict-on\": \"N1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"M0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"A\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"B\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"payload\"}]\n" + \ + "traces = {(\"A\", \"M0\", \"coord\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"Z\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"coord\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"Z\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"payload\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"A\", \"M0\", \"payload\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 524288, 64)\n" + \ + "metrics[\"Z\"][\"LLB\"] = {}\n" + \ + "metrics[\"Z\"][\"LLB\"][\"A\"] = {}\n" + \ + "metrics[\"Z\"][\"LLB\"][\"A\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"LLB\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"LLB\"][\"B\"] = {}\n" + \ + "metrics[\"Z\"][\"LLB\"][\"B\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"LLB\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"LLB\"][\"Z\"] = {}\n" + \ + "metrics[\"Z\"][\"LLB\"][\"Z\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"LLB\"][\"Z\"][\"write\"] = 0\n" + \ + "metrics[\"Z\"][\"LLB\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"LLB\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \ + "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 586314575512\n" + \ + "metrics[\"Z\"][\"LLB\"][\"time\"] = (metrics[\"Z\"][\"LLB\"][\"A\"][\"read\"] + metrics[\"Z\"][\"LLB\"][\"B\"][\"read\"] + metrics[\"Z\"][\"LLB\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"LLB\"][\"Z\"][\"write\"]) / 9223372036854775807\n" + \ + "metrics[\"Z\"][\"FPMul\"] = {}\n" + \ + "metrics[\"Z\"][\"FPMul\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \ + "metrics[\"Z\"][\"FPMul\"][\"time\"] = metrics[\"Z\"][\"FPMul\"][\"mul\"] / 128000000000\n" + \ + "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \ + "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 128000000000\n" + \ + "metrics[\"Z\"][\"K2Intersect\"] = 0\n" + \ + "metrics[\"Z\"][\"K2Intersect\"] += K2Intersect_K2.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"K2Intersect\"][\"time\"] = metrics[\"Z\"][\"K2Intersect\"] / 1000000000\n" + \ + "metrics[\"Z\"][\"K1Intersect\"] = 0\n" + \ + "metrics[\"Z\"][\"K1Intersect\"] += K1Intersect_K1.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"K1Intersect\"][\"time\"] = metrics[\"Z\"][\"K1Intersect\"] / 1000000000\n" + \ + "metrics[\"Z\"][\"K0Intersection\"] = 0\n" + \ + "metrics[\"Z\"][\"K0Intersection\"] += K0Intersection_K0.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"K0Intersection\"][\"time\"] = metrics[\"Z\"][\"K0Intersection\"] / 128000000000\n" + \ + "metrics[\"Z\"][\"TopSequencer\"] = {}\n" + \ + "metrics[\"Z\"][\"TopSequencer\"][\"N2\"] = Compute.numIters(\"tmp/extensor_energy-N2-iter.csv\")\n" + \ + "metrics[\"Z\"][\"TopSequencer\"][\"K2\"] = Compute.numIters(\"tmp/extensor_energy-K2-iter.csv\")\n" + \ + "metrics[\"Z\"][\"TopSequencer\"][\"M2\"] = Compute.numIters(\"tmp/extensor_energy-M2-iter.csv\")\n" + \ + "metrics[\"Z\"][\"TopSequencer\"][\"time\"] = (metrics[\"Z\"][\"TopSequencer\"][\"N2\"] + metrics[\"Z\"][\"TopSequencer\"][\"K2\"] + metrics[\"Z\"][\"TopSequencer\"][\"M2\"]) / 1000000000\n" + \ + "metrics[\"Z\"][\"MiddleSequencer\"] = {}\n" + \ + "metrics[\"Z\"][\"MiddleSequencer\"][\"M1\"] = Compute.numIters(\"tmp/extensor_energy-M1-iter.csv\")\n" + \ + "metrics[\"Z\"][\"MiddleSequencer\"][\"N1\"] = Compute.numIters(\"tmp/extensor_energy-N1-iter.csv\")\n" + \ + "metrics[\"Z\"][\"MiddleSequencer\"][\"K1\"] = Compute.numIters(\"tmp/extensor_energy-K1-iter.csv\")\n" + \ + "metrics[\"Z\"][\"MiddleSequencer\"][\"time\"] = (metrics[\"Z\"][\"MiddleSequencer\"][\"M1\"] + metrics[\"Z\"][\"MiddleSequencer\"][\"N1\"] + metrics[\"Z\"][\"MiddleSequencer\"][\"K1\"]) / 1000000000\n" + \ + "metrics[\"Z\"][\"BottomSequencer\"] = {}\n" + \ + "metrics[\"Z\"][\"BottomSequencer\"][\"M0\"] = Compute.numIters(\"tmp/extensor_energy-M0-iter.csv\")\n" + \ + "metrics[\"Z\"][\"BottomSequencer\"][\"N0\"] = Compute.numIters(\"tmp/extensor_energy-N0-iter.csv\")\n" + \ + "metrics[\"Z\"][\"BottomSequencer\"][\"K0\"] = Compute.numIters(\"tmp/extensor_energy-K0-iter.csv\")\n" + \ + "metrics[\"Z\"][\"BottomSequencer\"][\"time\"] = (metrics[\"Z\"][\"BottomSequencer\"][\"M0\"] + metrics[\"Z\"][\"BottomSequencer\"][\"N0\"] + metrics[\"Z\"][\"BottomSequencer\"][\"K0\"]) / 128000000000\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = max(metrics[\"Z\"][\"BottomSequencer\"][\"time\"], metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"Z\"][\"FPMul\"][\"time\"], metrics[\"Z\"][\"K0Intersection\"][\"time\"], metrics[\"Z\"][\"K1Intersect\"][\"time\"], metrics[\"Z\"][\"K2Intersect\"][\"time\"], metrics[\"Z\"][\"LLB\"][\"time\"], metrics[\"Z\"][\"MainMemory\"][\"time\"], metrics[\"Z\"][\"MiddleSequencer\"][\"time\"], metrics[\"Z\"][\"TopSequencer\"][\"time\"])" + + assert collector.dump().gen(0) == hifiber + + +def test_dump_sigma(): + yaml = build_sigma_yaml() + collector = build_collector(yaml, 0) + hifiber = "metrics = {}\n" + \ "metrics[\"Z\"] = {}\n" + \ - "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \ - "A_M_format = Format(A_M, {\"M\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"Z\"][\"A footprint\"] = A_M_format.getTensor()\n" + \ - "metrics[\"Z\"][\"A traffic\"] = Traffic.buffetTraffic(A_M, \"M\", A_M_format) + A_M_format.getRank(\"M\")\n" + \ - "metrics[\"Z\"][\"add\"] = Compute.opCount(Metrics.dump(), \"add\")" + "formats = {\"A\": Format(Tensor(rank_ids=[\"K1\", \"MK01\", \"MK00\"], shape=[K, M * K, M * K]), {\"rank-order\": [\"K1\", \"MK01\", \"MK00\"], \"K1\": {\"format\": \"U\"}, \"MK01\": {\"format\": \"U\"}, \"MK00\": {\"format\": \"C\", \"pbits\": 32}}), \"B\": Format(B_K1NK0, {\"rank-order\": [\"K1\", \"N\", \"K0\"], \"K1\": {\"format\": \"U\"}, \"N\": {\"format\": \"U\"}, \"K0\": {\"format\": \"U\", \"pbits\": 32}})}\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"MK00\", \"type\": \"payload\", \"evict-on\": \"root\", \"format\": \"flattened\", \"style\": \"eager\", \"root\": \"MK00\"}, {\"tensor\": \"B\", \"rank\": \"K0\", \"type\": \"payload\", \"evict-on\": \"root\", \"format\": \"partitioned\", \"style\": \"eager\", \"root\": \"K0\"}]\n" + \ + "traces = {(\"A\", \"MK00\", \"payload\", \"read\"): \"tmp/sigma-MK00-eager_a_mk00_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/sigma-K0-eager_b_k0_read.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 268435456, 32, {\"K0\": \"MK00\"})\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"MK00\", \"format\": \"flattened\", \"type\": \"payload\", \"evict-on\": \"MK01\", \"style\": \"eager\", \"root\": \"MK00\"}, {\"tensor\": \"B\", \"rank\": \"K0\", \"format\": \"partitioned\", \"type\": \"payload\", \"evict-on\": \"N\", \"style\": \"eager\", \"root\": \"K0\"}]\n" + \ + "traces = {(\"A\", \"MK00\", \"payload\", \"read\"): \"tmp/sigma-MK00-eager_a_mk00_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/sigma-K0-eager_b_k0_read.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 1048576, 4096, {\"K0\": \"MK00\"})\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"] = {}\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"] = {}\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"] = {}\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"DataSRAMBanks\"][\"time\"] = (metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"][\"read\"] + metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"][\"read\"]) / 8246337208320\n" + \ + "metrics[\"Z\"][\"Multiplier\"] = {}\n" + \ + "metrics[\"Z\"][\"Multiplier\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \ + "metrics[\"Z\"][\"Multiplier\"][\"time\"] = metrics[\"Z\"][\"Multiplier\"][\"mul\"] / 8192000000000\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = max(metrics[\"Z\"][\"DataSRAMBanks\"][\"time\"], metrics[\"Z\"][\"Multiplier\"][\"time\"])" assert collector.dump().gen(0) == hifiber -def test_dump_leader_follower_bad_rank(): +def test_dump_new_flattened_tensor_for_format(): yaml = """ einsum: declaration: + Z: [K, M] A: [K, M] - B: [K] - Z: [M] expressions: - - Z[m] = A[k, m] * B[m] - + - Z[k, m] = A[k, m] + mapping: + partitioning: + Z: + (K, M): [flatten()] + spacetime: + Z: + space: [] + time: [KM] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Intersect - class: LeaderFollower - + - name: Buffer + class: Buffet + attributes: + width: 64 + depth: 1024 bindings: - - name: Intersect - bindings: - - einsum: Z - rank: P - leader: B + Z: + - config: accel + prefix: tmp/Z + - component: Buffer + bindings: + - tensor: A + rank: KM + type: payload + evict-on: root + format: default + format: + A: + default: + rank-order: [KM] + KM: + format: C + pbits: 32 """ collector = build_collector(yaml, 0) - with pytest.raises(ValueError) as excinfo: - collector.dump() - assert str(excinfo.value) == "Tensor B has no rank P" + hifiber = "metrics = {}\n" + \ + "metrics[\"Z\"] = {}\n" + \ + "formats = {\"A\": Format(Tensor(rank_ids=[\"KM\"], shape=[K * M]), {\"rank-order\": [\"KM\"], \"KM\": {\"format\": \"C\", \"pbits\": 32}})}\n" + \ + "bindings = [{\"tensor\": \"A\", \"rank\": \"KM\", \"type\": \"payload\", \"evict-on\": \"root\", \"format\": \"default\", \"style\": \"lazy\"}]\n" + \ + "Traffic.filterTrace(\"tmp/Z-KM-populate_1.csv\", \"tmp/Z-KM-iter.csv\", \"tmp/Z-KM-populate_1_payload.csv\")\n" + \ + "traces = {(\"A\", \"KM\", \"payload\", \"read\"): \"tmp/Z-KM-populate_1_payload.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 65536, 64)\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = 0" + + assert collector.dump().gen(0) == hifiber -def test_dump_leader_follower(): +def test_dump_skip_zero_bits(): yaml = """ einsum: declaration: + Z: [K, M] A: [K, M] - B: [K] - Z: [M] expressions: - - Z[m] = A[k, m] * B[m] - + - Z[k, m] = A[k, m] + mapping: + spacetime: + Z: + space: [] + time: [K, M] architecture: - subtree: - - name: System + accel: + - name: level0 local: - - name: Intersect - class: LeaderFollower - + - name: Buffer + class: Buffet + attributes: + width: 64 + depth: 1024 bindings: - - name: Intersect - bindings: - - einsum: Z - rank: K - leader: B + Z: + - config: accel + prefix: tmp/Z + - component: Buffer + bindings: + - tensor: Z + rank: K + type: payload + style: eager + evict-on: root + format: default + - tensor: A + rank: K + type: coord + style: eager + evict-on: root + format: default + format: + Z: + default: + rank-order: [K, M] + K: + format: C + cbits: 0 + M: + format: C + pbits: 32 + A: + default: + rank-order: [K, M] + K: + format: C + pbits: 0 + M: + format: C + pbits: 32 """ collector = build_collector(yaml, 0) hifiber = "metrics = {}\n" + \ "metrics[\"Z\"] = {}\n" + \ - "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \ - "metrics[\"Z\"][\"A footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"A traffic\"] = 0\n" + \ - "metrics[\"Z\"][\"B footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"B traffic\"] = 0\n" + \ - "metrics[\"Z\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 1)" + "formats = {\"Z\": Format(Z_KM, {\"rank-order\": [\"K\", \"M\"], \"K\": {\"format\": \"C\", \"cbits\": 0}, \"M\": {\"format\": \"C\", \"pbits\": 32}}), \"A\": Format(A_KM, {\"rank-order\": [\"K\", \"M\"], \"K\": {\"format\": \"C\", \"pbits\": 0}, \"M\": {\"format\": \"C\", \"pbits\": 32}})}\n" + \ + "bindings = [{\"tensor\": \"Z\", \"evict-on\": \"root\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"K\", \"rank\": \"M\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"root\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"K\", \"rank\": \"M\", \"type\": \"payload\"}]\n" + \ + "traces = {(\"Z\", \"M\", \"payload\", \"read\"): \"tmp/Z-M-eager_z_k_read.csv\", (\"Z\", \"M\", \"payload\", \"write\"): \"tmp/Z-M-eager_z_k_write.csv\", (\"A\", \"M\", \"payload\", \"read\"): \"tmp/Z-M-eager_a_k_read.csv\"}\n" + \ + "traffic = Traffic.buffetTraffic(bindings, formats, traces, 65536, 64)\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = 0" assert collector.dump().gen(0) == hifiber -def test_dump_skip_ahead(): +def test_end(): + hifiber = "Metrics.endCollect()" + + assert Collector.end().gen(0) == hifiber + + +def test_make_body_none(): + yaml = build_extensor_yaml() + collector = build_collector(yaml, 0) + + hifiber = "" + + assert collector.make_body().gen(0) == hifiber + + +def test_make_body_iter_num(): yaml = """ einsum: declaration: + Z: [K, M] A: [K, M] - B: [K] - Z: [M] expressions: - - Z[m] = A[k, m] * B[m] + - Z[k, m] = A[k, m] + mapping: + spacetime: + Z: + space: [] + time: [K, M] + architecture: + accel: + - name: level0 + local: + - name: Buffer + class: Buffet + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: Buffer + bindings: + - tensor: Z + rank: K + type: payload + style: eager + evict-on: root + format: default + format: + Z: + default: + rank-order: [K, M] + K: + format: C + pbits: 32 + M: + format: C + cbits: 32 + pbits: 64 + """ + collector = build_collector(yaml, 0) + + hifiber = "m_iter_num = Metrics.getIter().copy()" + + assert collector.make_body().gen(0) == hifiber + + +def test_make_loop_footer_unconfigured(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + + with pytest.raises(ValueError) as excinfo: + collector.make_loop_footer("K") + assert str( + excinfo.value) == "Unconfigured collector. Make sure to first call start()" + + +def test_make_loop_footer(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + collector.start() + + hifiber = "Intersect_K.addTraces(Metrics.consumeTrace(\"K\", \"intersect_2\"))" + assert collector.make_loop_footer("K").gen(0) == hifiber + + yaml = build_extensor_yaml() + collector = build_collector(yaml, 0) + collector.start() + + program = collector.program + part_ir = program.get_partitioning() + for tensor in program.get_equation().get_tensors(): + tensor.update_ranks( + part_ir.partition_ranks( + tensor.get_ranks(), + part_ir.get_all_parts(), + True, + True)) + program.get_loop_order().apply(tensor) + + assert collector.make_loop_footer("M2").gen(0) == "" + + hifiber = "n0_iter_num = Metrics.getIter().copy()\n" + \ + "K0Intersection_K0.addTraces(Metrics.consumeTrace(\"K0\", \"intersect_0\"), Metrics.consumeTrace(\"K0\", \"intersect_1\"))" + + assert collector.make_loop_footer("K0").gen(0) == hifiber + + hifiber = "K1Intersect_K1.addTraces(Metrics.consumeTrace(\"K1\", \"intersect_0\"), Metrics.consumeTrace(\"K1\", \"intersect_1\"))\n" + \ + "z_k1.trace(\"eager_z_k1_write\", iteration_num=n0_iter_num)" + + assert collector.make_loop_footer("K1").gen(0) == hifiber + + +def test_make_loop_header_unconfigured(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + + with pytest.raises(ValueError) as excinfo: + collector.make_loop_header("K") + assert str( + excinfo.value) == "Unconfigured collector. Make sure to first call start()" + + +def test_make_loop_header(): + yaml = build_extensor_yaml() + collector = build_collector(yaml, 0) + collector.start() + + assert collector.make_loop_header("N2").gen(0) == "" + hifiber = "eager_a_m0_read = set()\n" + \ + "eager_z_m0_read = set()" + + assert collector.make_loop_header("M1").gen(0) == hifiber + + hifiber_option1 = "if () not in eager_z_m0_read:\n" + \ + " eager_z_m0_read.add(())\n" + \ + " z_m0.trace(\"eager_z_m0_read\")\n" + \ + "if () not in eager_a_m0_read:\n" + \ + " eager_a_m0_read.add(())\n" + \ + " a_m0.trace(\"eager_a_m0_read\")" + + hifiber_option2 = "if () not in eager_a_m0_read:\n" + \ + " eager_a_m0_read.add(())\n" + \ + " a_m0.trace(\"eager_a_m0_read\")\n" + \ + "if () not in eager_z_m0_read:\n" + \ + " eager_z_m0_read.add(())\n" + \ + " z_m0.trace(\"eager_z_m0_read\")" + + assert collector.make_loop_header("M0").gen( + 0) in {hifiber_option1, hifiber_option2} + +# TODO: Multiply buffer size by number of instances + + +def test_make_loop_header_eager_root(): + yaml = """ + einsum: + declaration: + A: [K] + Z: [M] + expressions: + - Z[M] = A[k] + mapping: + loop-order: + Z: [K, M] + spacetime: + Z: + space: [] + time: [K, M] + format: + Z: + default: + rank-order: [M] + M: + format: U + pbits: 32 architecture: - subtree: + Accelerator: - name: System + attributes: + clock_frequency: 1000 local: - - name: Intersect - class: SkipAhead - + - name: MainMemory + class: DRAM + attributes: + bandwidth: 2048 + subtree: + - name: Chip + local: + - name: Buffer + class: Buffet + attributes: + width: 32 + depth: 128 bindings: - - name: Intersect - bindings: - - einsum: Z - rank: K + Z: + - config: Accelerator + prefix: tmp/eager_root + - component: MainMemory + bindings: + - tensor: Z + rank: M + type: payload + format: default + - component: Buffer + bindings: + - tensor: Z + rank: M + type: payload + format: default + style: eager + evict-on: root """ collector = build_collector(yaml, 0) + collector.start() - hifiber = "metrics = {}\n" + \ - "metrics[\"Z\"] = {}\n" + \ - "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \ - "metrics[\"Z\"][\"A footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"A traffic\"] = 0\n" + \ - "metrics[\"Z\"][\"B footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"B traffic\"] = 0\n" + \ - "metrics[\"Z\"][\"K intersections\"] = Compute.skipCount(Metrics.dump(), \"K\")" + hifiber = "z_k.trace(\"eager_z_k_write\", iteration_num=m_iter_num)" + assert collector.make_loop_footer("K").gen(0) == hifiber - assert collector.dump().gen(0) == hifiber -# def test_dump_leader_follower_not_intersected(): -# yaml = """ -# einsum: -# declaration: -# A: [M] -# B: [K] -# C: [K, M] -# Z: [M] -# expressions: -# - Z[m] = A[k, m] * B[m] * C[k, m] -# -# architecture: -# subtree: -# - name: System -# local: -# - name: Intersect -# class: LeaderFollower -# -# bindings: -# - name: Intersect -# bindings: -# - einsum: Z -# rank: K -# leader: C -# """ -# collector = build_collector(yaml, 0) -# -# hifiber = "metrics = {}\n" + \ -# "metrics[\"Z\"] = {}\n" + \ -# "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \ -# "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \ -# "metrics[\"Z\"][\"A footprint\"] = 0\n" + \ -# "metrics[\"Z\"][\"A traffic\"] = 0\n" + \ -# "metrics[\"Z\"][\"B footprint\"] = 0\n" + \ -# "metrics[\"Z\"][\"B traffic\"] = 0\n" + \ -# "metrics[\"Z\"][\"C footprint\"] = 0\n" + \ -# "metrics[\"Z\"][\"C traffic\"] = 0\n" + \ -# "metrics[\"Z\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 1)" -# -# assert collector.dump().gen(0) == hifiber +def test_register_ranks(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + + hifiber = "Metrics.registerRank(\"M\")\n" + \ + "Metrics.registerRank(\"K\")\n" + \ + "Metrics.registerRank(\"N\")" + assert collector.register_ranks().gen(0) == hifiber -def test_end(): - hifiber = "Metrics.endCollect()" - assert Collector.end().gen(0) == hifiber +def test_set_collecting_type_err(): + yaml = build_gamma_yaml() + collector = build_collector(yaml, 0) + + with pytest.raises(ValueError) as excinfo: + collector.set_collecting(None, "K", "fiber", False, True) + assert str( + excinfo.value) == "Tensor must be specified for trace type fiber" def test_set_collecting(): yaml = build_gamma_yaml() collector = build_collector(yaml, 0) - hifiber = "B_KN.setCollecting(\"K\", True)" - assert collector.set_collecting("B", "K").gen(0) == hifiber + hifiber = "Metrics.trace(\"K\", type_=\"intersect_3\", consumable=False)" + assert collector.set_collecting( + "B", "K", "fiber", False, True).gen(0) == hifiber + + hifiber = "Metrics.trace(\"K\", type_=\"iter\", consumable=False)" + assert collector.set_collecting( + None, "K", "iter", False, True).gen(0) == hifiber + + +def test_set_collecting_eager(): + yaml = build_extensor_yaml() + collector = build_collector(yaml, 0) + + program = collector.program + part_ir = program.get_partitioning() + for tensor in program.get_equation().get_tensors(): + tensor.update_ranks( + part_ir.partition_ranks( + tensor.get_ranks(), + part_ir.get_all_parts(), + True, + True)) + program.get_loop_order().apply(tensor) + + hifiber = "Metrics.trace(\"N0\", type_=\"eager_a_m0_read\", consumable=False)" + assert collector.set_collecting( + "A", "N0", "M0", False, True).gen(0) == hifiber + + hifiber = "n0_iter_num = None\n" + \ + "Metrics.trace(\"N0\", type_=\"eager_z_m0_write\", consumable=False)" + assert collector.set_collecting( + "Z", "N0", "M0", False, False).gen(0) == hifiber def test_start(): yaml = build_gamma_yaml() collector = build_collector(yaml, 0) - hifiber = "Metrics.beginCollect([\"M\", \"K\", \"N\"])" - assert collector.start().gen(0) == hifiber + generated = collector.start().gen(0).split("\n") + + corr = ["Metrics.beginCollect(\"tmp/gamma_T\")"] + check_hifiber_lines(generated[:1], corr) + + corr = ["Intersect_K = LeaderFollowerIntersector()"] + check_hifiber_lines(generated[1:2], corr) + + corr = ["Metrics.trace(\"K\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"K\", type_=\"intersect_2\", consumable=True)", + "Metrics.trace(\"M\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"M\", type_=\"populate_1\", consumable=False)", + "Metrics.trace(\"K\", type_=\"intersect_2\", consumable=False)", + "Metrics.trace(\"K\", type_=\"intersect_3\", consumable=False)", + "Metrics.trace(\"N\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"N\", type_=\"populate_1\", consumable=False)"] + check_hifiber_lines(generated[2:], corr) + + +def test_start_sequencer(): + yaml = build_extensor_energy_yaml() + collector = build_collector(yaml, 0) + + generated = collector.start().gen(0).split("\n") + + corr = ['Metrics.beginCollect("tmp/extensor_energy")'] + check_hifiber_lines(generated[:1], corr) + + corr = ["K2Intersect_K2 = SkipAheadIntersector()", + "K1Intersect_K1 = SkipAheadIntersector()", + "K0Intersection_K0 = SkipAheadIntersector()"] + check_hifiber_lines(generated[1:4], corr) + + corr = [ + "Metrics.trace(\"N2\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"K2\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"M2\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"M1\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"N1\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"K1\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"M0\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"N0\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"K0\", type_=\"iter\", consumable=False)", + "Metrics.trace(\"N0\", type_=\"eager_z_m0_read\", consumable=False)", + "n0_iter_num = None", + "Metrics.trace(\"N0\", type_=\"eager_z_m0_write\", consumable=False)", + "Metrics.trace(\"M0\", type_=\"eager_z_m0_read\", consumable=False)", + "n0_iter_num = None", + "Metrics.trace(\"M0\", type_=\"eager_z_m0_write\", consumable=False)", + "Metrics.trace(\"K0\", type_=\"eager_a_m0_read\", consumable=False)", + "Metrics.trace(\"K1\", type_=\"intersect_0\", consumable=True)", + "Metrics.trace(\"M0\", type_=\"eager_a_m0_read\", consumable=False)", + "Metrics.trace(\"K0\", type_=\"intersect_0\", consumable=True)", + "Metrics.trace(\"K1\", type_=\"intersect_0\", consumable=False)", + "Metrics.trace(\"K2\", type_=\"intersect_0\", consumable=True)", + "Metrics.trace(\"N1\", type_=\"populate_1\", consumable=False)", + "Metrics.trace(\"K1\", type_=\"intersect_1\", consumable=True)", + "Metrics.trace(\"N0\", type_=\"eager_b_n0_read\", consumable=False)", + "Metrics.trace(\"K0\", type_=\"intersect_1\", consumable=True)", + "Metrics.trace(\"K1\", type_=\"intersect_1\", consumable=False)", + "Metrics.trace(\"K2\", type_=\"intersect_1\", consumable=True)", + "Metrics.trace(\"K0\", type_=\"eager_b_n0_read\", consumable=False)"] + check_hifiber_lines(generated[4:32], corr) + + corr = ["Metrics.registerRank(\"N2\")", + "Metrics.registerRank(\"K2\")", + "Metrics.registerRank(\"M2\")", + "Metrics.registerRank(\"M1\")", + "Metrics.registerRank(\"N1\")", + "Metrics.registerRank(\"K1\")", + "Metrics.registerRank(\"M0\")", + "Metrics.registerRank(\"N0\")", + "Metrics.registerRank(\"K0\")"] + check_hifiber_lines(generated[32:], corr) + + +def test_start_flattening(): + yaml = build_sigma_yaml() + collector = build_collector(yaml, 0) + + generated = collector.start().gen(0).split("\n") + + corr = ["Metrics.beginCollect(\"tmp/sigma\")", + "Metrics.associateShape(\"MK01\", (M, K))", + "Metrics.matchRanks(\"MK00\", \"M\")", + "Metrics.matchRanks(\"MK00\", \"K0\")", + "Metrics.associateShape(\"MK00\", (M, K))"] + check_hifiber_lines(generated[:5], corr) + + corr = [ + "Metrics.trace(\"MK00\", type_=\"eager_a_mk00_read\", consumable=False)", + "Metrics.trace(\"K0\", type_=\"eager_b_k0_read\", consumable=False)"] + check_hifiber_lines(generated[5:7], corr) + + corr = ["Metrics.registerRank(\"K1\")", + "Metrics.registerRank(\"MK01\")", + "Metrics.registerRank(\"N\")", + "Metrics.registerRank(\"MK00\")"] + check_hifiber_lines(generated[7:], corr) + + +def test_trace_tree(): + yaml = build_extensor_yaml() + collector = build_collector(yaml, 0) + + program = collector.program + part_ir = program.get_partitioning() + for tensor in program.get_equation().get_tensors(): + tensor.update_ranks( + part_ir.partition_ranks( + tensor.get_ranks(), + part_ir.get_all_parts(), + True, + True)) + program.get_loop_order().apply(tensor) + + hifiber = "if (m1, k1) not in eager_a_m0_read:\n" + \ + " eager_a_m0_read.add((m1, k1))\n" + \ + " a_m0.trace(\"eager_a_m0_read\")" + assert collector.trace_tree("A", "M0", True).gen(0) == hifiber + + hifiber = "z_m0.trace(\"eager_z_m0_write\", iteration_num=n0_iter_num)" + assert collector.trace_tree("Z", "M0", False).gen(0) == hifiber diff --git a/tests/trans/test_equation.py b/tests/trans/test_equation.py index 7be9cfe..d6c1774 100644 --- a/tests/trans/test_equation.py +++ b/tests/trans/test_equation.py @@ -1,10 +1,11 @@ import pytest from sympy import symbols +from teaal.ir.hardware import Hardware from teaal.ir.iter_graph import IterationGraph +from teaal.ir.metrics import Metrics from teaal.ir.program import Program -from teaal.parse.einsum import Einsum -from teaal.parse.mapping import Mapping +from teaal.parse import * from teaal.trans.equation import Equation from tests.utils.parse_tree import make_plus @@ -25,7 +26,7 @@ def make_basic(): program = Program(einsum, mapping) program.add_einsum(0) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_output(): @@ -44,7 +45,7 @@ def make_output(): program = Program(einsum, mapping) program.add_einsum(0) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_mult_terms(): @@ -65,7 +66,7 @@ def make_mult_terms(): program = Program(einsum, mapping) program.add_einsum(0) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_take(): @@ -83,7 +84,7 @@ def make_take(): program = Program(einsum, mapping) program.add_einsum(0) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_other(einsum, mapping): @@ -130,7 +131,7 @@ def make_display(style, opt): program = Program(einsum, mapping) program.add_einsum(0) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_matmul(mapping): @@ -155,7 +156,7 @@ def make_matmul(mapping): program.apply_all_partitioning(tensor) program.get_loop_order().apply(tensor) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_conv(expr, loop_order): @@ -181,7 +182,7 @@ def make_conv(expr, loop_order): program.apply_all_partitioning(tensor) program.get_loop_order().apply(tensor) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) def make_conv_part(expr, loop_order): @@ -211,7 +212,24 @@ def make_conv_part(expr, loop_order): program.apply_all_partitioning(tensor) program.get_loop_order().apply(tensor) - return IterationGraph(program), Equation(program) + return IterationGraph(program), Equation(program, None) + + +def make_gamma(): + fname = "tests/integration/gamma.yaml" + einsum = Einsum.from_file(fname) + mapping = Mapping.from_file(fname) + arch = Architecture.from_file(fname) + bindings = Bindings.from_file(fname) + format_ = Format.from_file(fname) + + program = Program(einsum, mapping) + hardware = Hardware(arch, bindings, program) + + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + + return IterationGraph(program), Equation(program, metrics) def test_eager_inputs_one_fiber(): @@ -332,6 +350,15 @@ def test_make_iter_expr_display_slip(): assert eqn.make_iter_expr(rank, tensors).gen() == iter_expr +def test_make_iter_expr_leader_follower(): + graph, eqn = make_gamma() + + graph.pop_concord() + iter_expr = "t_k << Fiber.intersection(a_k, b_k, style=\"leader-follower\")" + + assert eqn.make_iter_expr(*graph.peek_concord()).gen() == iter_expr + + def test_flattened_output_only_bad(): mapping = """ partitioning: @@ -352,7 +379,7 @@ def test_flattened_output_only_bad(): def test_make_iter_expr_output_only(): program = make_other("A[i] = b", "") graph = IterationGraph(program) - eqn = Equation(program) + eqn = Equation(program, None) rank, tensors = graph.peek_concord() iter_expr = "a_i.iterRangeShapeRef(0, I, 1)" @@ -370,7 +397,7 @@ def test_make_iter_expr_output_only_display(): program = make_other("A[i] = b", mapping) graph = IterationGraph(program) - eqn = Equation(program) + eqn = Equation(program, None) rank, tensors = graph.peek_concord() iter_expr = "enumerate(a_i.iterRangeShapeRef(0, I, 1))" @@ -387,7 +414,7 @@ def test_make_iter_expr_output_only_partition(): program = make_other("A[i] = b", mapping) graph = IterationGraph(program) - eqn = Equation(program) + eqn = Equation(program, None) rank, tensors = graph.peek_concord() iter_expr = "a_i2.iterRangeShapeRef(0, I, I1)" @@ -472,6 +499,13 @@ def test_make_iter_expr_conv_part(): assert eqn.make_iter_expr(*graph.peek_concord()).gen() == hifiber +def test_make_iter_expr_metrics(): + graph, eqn = make_gamma() + hifiber = "t_m << a_m" + + assert eqn.make_iter_expr(*graph.peek_concord()).gen() == hifiber + + def test_make_payload_no_tensors(): _, eqn = make_basic() with pytest.raises(ValueError) as excinfo: @@ -547,7 +581,7 @@ def test_make_payload_display_slip(): def test_make_payload_output_only(): program = make_other("A[i] = b", "") graph = IterationGraph(program) - eqn = Equation(program) + eqn = Equation(program, None) rank, tensors = graph.pop_concord() iter_expr = "i, a_ref" @@ -585,6 +619,13 @@ def test_make_payload_conv_enum(): assert eqn.make_payload(*graph.pop_concord()).gen(parens=False) == hifiber +def test_make_payload_metrics(): + graph, eqn = make_gamma() + hifiber = "m, (t_k, a_k)" + + assert eqn.make_payload(*graph.pop_concord()).gen(parens=False) == hifiber + + def test_make_update(): _, eqn = make_basic() stmt = "a_ref += b_val * c_val * d_val" @@ -593,14 +634,14 @@ def test_make_update(): def test_make_update_vars(): program = make_other("A[i] = b * c * d", "") - eqn = Equation(program) + eqn = Equation(program, None) stmt = "a_ref += b * c * d" assert eqn.make_update().gen(depth=0) == stmt def test_make_update_mult_terms(): program = make_other("A[i] = b * B[i] + c * C[i] + d * D[i]", "") - eqn = Equation(program) + eqn = Equation(program, None) stmt = "a_ref += b * b_val + c * c_val + d * d_val" assert eqn.make_update().gen(depth=0) == stmt @@ -609,17 +650,3 @@ def test_make_update_take(): _, eqn = make_take() stmt = "z_ref += b" assert eqn.make_update().gen(depth=0) == stmt - - -def test_iter_fiber_not_fiber(): - expr = "O[p, q] = I[q + s] * F[s]" - graph, eqn = make_conv(expr, "[P, W, Q]") - graph.pop_concord() - graph.pop_concord() - graph.pop_concord() - _, tensors = graph.peek_concord() - - with pytest.raises(ValueError) as excinfo: - eqn._Equation__iter_fiber(None, tensors[0]) - - assert str(excinfo.value) == "Cannot iterate over payload o_ref" diff --git a/tests/trans/test_footer.py b/tests/trans/test_footer.py index d507922..1ceb22f 100644 --- a/tests/trans/test_footer.py +++ b/tests/trans/test_footer.py @@ -28,7 +28,7 @@ def assert_make_footer(loop_order, partitioning, display, hifiber_options): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - graphics = Graphics(program) + graphics = Graphics(program, None) graphics.make_header() for tensor in program.get_equation().get_tensors(): @@ -37,7 +37,7 @@ def assert_make_footer(loop_order, partitioning, display, hifiber_options): hifiber = Footer.make_footer( program, graphics, Partitioner( - program, TransUtils())).gen( + program, TransUtils(program))).gen( depth=0) assert hifiber in hifiber_options diff --git a/tests/trans/test_graphics.py b/tests/trans/test_graphics.py index f40385a..04fdbbd 100644 --- a/tests/trans/test_graphics.py +++ b/tests/trans/test_graphics.py @@ -1,6 +1,7 @@ +from teaal.ir.hardware import Hardware +from teaal.ir.metrics import Metrics from teaal.ir.program import Program -from teaal.parse.einsum import Einsum -from teaal.parse.mapping import Mapping +from teaal.parse import * from teaal.trans.graphics import Graphics @@ -16,7 +17,7 @@ def create_default(): """ program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - return Graphics(program) + return Graphics(program, None) def create_spacetime(opt): @@ -38,7 +39,24 @@ def create_spacetime(opt): opt: """ + opt program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - return Graphics(program) + return Graphics(program, None) + + +def create_gamma(): + fname = "tests/integration/gamma.yaml" + einsum = Einsum.from_file(fname) + mapping = Mapping.from_file(fname) + arch = Architecture.from_file(fname) + bindings = Bindings.from_file(fname) + format_ = Format.from_file(fname) + + program = Program(einsum, mapping) + hardware = Hardware(arch, bindings, program) + + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + + return Graphics(program, metrics) def test_make_body_none(): @@ -64,6 +82,11 @@ def test_make_body_slip(): assert graphics.make_body().gen(0) == hifiber +def test_make_body_metrics(): + graphics = create_gamma() + assert graphics.make_body().gen(0) == "" + + def test_make_footer_none(): graphics = create_default() assert graphics.make_footer().gen(0) == "" @@ -76,6 +99,11 @@ def test_make_footer(): assert graphics.make_footer().gen(0) == hifiber +def test_make_footer_metrics(): + graphics = create_gamma() + assert graphics.make_footer().gen(0) == "" + + def test_make_header_none(): graphics = create_default() assert graphics.make_header().gen(0) == "" @@ -92,3 +120,8 @@ def test_make_header_slip(): hifiber = "canvas = createCanvas(A_KM, B_KN, Z_MN)\n" + \ "timestamps = {}" assert graphics.make_header().gen(0) == hifiber + + +def test_make_header_metrics(): + graphics = create_gamma() + assert graphics.make_header().gen(0) == "" diff --git a/tests/trans/test_header.py b/tests/trans/test_header.py index a1ca39a..6c6da39 100644 --- a/tests/trans/test_header.py +++ b/tests/trans/test_header.py @@ -1,10 +1,11 @@ import pytest +from teaal.ir.hardware import Hardware from teaal.ir.iter_graph import IterationGraph +from teaal.ir.metrics import Metrics from teaal.ir.program import Program from teaal.ir.tensor import Tensor -from teaal.parse.einsum import Einsum -from teaal.parse.mapping import Mapping +from teaal.parse import * from teaal.trans.header import Header from teaal.trans.partitioner import Partitioner from teaal.trans.utils import TransUtils @@ -26,7 +27,7 @@ def build_header(exprs, mapping): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - header = Header(program, Partitioner(program, TransUtils())) + header = Header(program, None, Partitioner(program, TransUtils(program))) return header @@ -47,11 +48,34 @@ def build_header_conv(loop_order): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - header = Header(program, Partitioner(program, TransUtils())) + header = Header(program, None, Partitioner(program, TransUtils(program))) return header +def build_header_gamma(): + fname = "tests/integration/gamma.yaml" + einsum = Einsum.from_file(fname) + mapping = Mapping.from_file(fname) + arch = Architecture.from_file(fname) + bindings = Bindings.from_file(fname) + format_ = Format.from_file(fname) + + program = Program(einsum, mapping) + hardware = Hardware(arch, bindings, program) + + program.add_einsum(0) + metrics = Metrics(program, hardware, format_) + + header = Header( + program, + metrics, + Partitioner( + program, + TransUtils(program))) + return header + + def build_matmul_header(mapping): exprs = """ - Z[m, n] = A[k, m] * B[k, n] @@ -60,18 +84,28 @@ def build_matmul_header(mapping): def test_make_get_payload(): - hifiber = "a_val = a_m.getPayload(m, k)" - + header = build_matmul_header("") tensor = Tensor("A", ["M", "K"]) - assert Header.make_get_payload(tensor, ["M", "K"]).gen(0) == hifiber + hifiber = "a_val = a_m.getPayload(m, k)" + assert header.make_get_payload(tensor, ["M", "K"]).gen(0) == hifiber -def test_make_get_payload_output(): - hifiber = "z_n = z_m.getPayloadRef(m)" +def test_make_get_payload_output(): + header = build_matmul_header("") tensor = Tensor("Z", ["M", "N"]) tensor.set_is_output(True) - assert Header.make_get_payload(tensor, ["M"]).gen(0) == hifiber + + hifiber = "z_n = z_m.getPayloadRef(m)" + assert header.make_get_payload(tensor, ["M"]).gen(0) == hifiber + + +def test_make_get_payload_metrics(): + header = build_header_gamma() + tensor = Tensor("A", ["M", "K"]) + + hifiber = "a_k = a_m.getPayload(m, trace=\"get_payload_A\")" + assert header.make_get_payload(tensor, ["M"]).gen(0) == hifiber def test_make_get_root(): @@ -137,11 +171,18 @@ def test_make_output_conv_shape(): assert header.make_output().gen(0) == hifiber +def test_make_output_metrics_shape(): + hifiber = "T_MKN = Tensor(rank_ids=[\"M\", \"K\", \"N\"], shape=[M, K, N])" + header = build_header_gamma() + + assert header.make_output().gen(0) == hifiber + + def test_make_swizzle_bad(): header = build_matmul_header("") tensor = Tensor("A", ["K", "M"]) with pytest.raises(ValueError) as excinfo: - header.make_swizzle(tensor, "foo") + header.make_swizzle(tensor, [], "foo") assert str( excinfo.value) == "Unknown swizzling reason: foo" @@ -152,7 +193,22 @@ def test_make_swizzle_loop_order(): header = build_matmul_header("") tensor = Tensor("A", ["K", "M"]) - assert header.make_swizzle(tensor, "loop-order").gen(depth=0) == hifiber + assert header.make_swizzle( + tensor, ["M", "K"], "loop-order").gen(depth=0) == hifiber + + +def test_make_swizzle_none(): + hifiber = "" + + mapping = """ + rank-order: + A: [M, K] + """ + + header = build_matmul_header(mapping) + tensor = Tensor("A", ["M", "K"]) + assert header.make_swizzle( + tensor, ["M", "K"], "loop-order").gen(depth=0) == hifiber def test_make_swizzle_partitioning(): @@ -168,7 +224,21 @@ def test_make_swizzle_partitioning(): header = build_matmul_header(mapping) tensor = Tensor("A", ["K1", "K0", "M"]) - assert header.make_swizzle(tensor, "partitioning").gen(depth=0) == hifiber + assert header.make_swizzle( + tensor, [ + "M", "K0"], "partitioning").gen( + depth=0) == hifiber + + +def test_make_swizzle_metrics(): + hifiber = "A_KM = A_MK.swizzleRanks(rank_ids=[\"K\", \"M\"])" + + header = build_matmul_header("") + tensor = Tensor("A", ["M", "K"]) + assert header.make_swizzle( + tensor, [ + "K", "M"], "metrics").gen( + depth=0) == hifiber def test_make_tensor_from_fiber(): diff --git a/tests/trans/test_hifiber.py b/tests/trans/test_hifiber.py index 6d7ad94..5c52e59 100644 --- a/tests/trans/test_hifiber.py +++ b/tests/trans/test_hifiber.py @@ -571,58 +571,193 @@ def test_hifiber_dyn_flattening(): assert str(HiFiber(einsum, mapping)) == hifiber -def test_hifiber_hardware(): - fname = "tests/integration/gamma.yaml" - einsum = Einsum.from_file(fname) - mapping = Mapping.from_file(fname) - arch = Architecture.from_file(fname) - bindings = Bindings.from_file(fname) - format_ = Format.from_file(fname) +def test_hifiber_traffic(): + yaml = """ + einsum: + declaration: + A: [K, M] + B: [K, M] + C: [K] + Z: [M] + expressions: + - Z[m] = A[k, m] * B[k, m] * C[k] + mapping: + spacetime: + Z: + space: [] + time: [M, K] + architecture: + accel: + - name: level0 + attributes: + clock_frequency: 2048 + local: + - name: DRAM + class: DRAM + attributes: + bandwidth: 512 + subtree: + - name: level1 + local: + - name: L2Cache + class: Cache + attributes: + width: 64 + depth: 1024 + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: DRAM + bindings: + - tensor: Z + rank: M + type: elem + format: default + - component: L2Cache + bindings: + - tensor: Z + rank: M + type: elem + format: default + format: + Z: + default: + rank-order: [M] + M: + format: C + cbits: 32 + pbits: 64 + """ + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + format_ = Format.from_str(yaml) - hifiber = "T_MKN = Tensor(rank_ids=[\"M\", \"K\", \"N\"])\n" + \ - "t_m = T_MKN.getRoot()\n" + \ + hifiber = "Z_M = Tensor(rank_ids=[\"M\"], shape=[M])\n" + \ + "A_MK = A_KM.swizzleRanks(rank_ids=[\"M\", \"K\"])\n" + \ + "B_MK = B_KM.swizzleRanks(rank_ids=[\"M\", \"K\"])\n" + \ + "z_m = Z_M.getRoot()\n" + \ "a_m = A_MK.getRoot()\n" + \ - "b_k = B_KN.getRoot()\n" + \ - "B_KN.setCollecting(\"K\", True)\n" + \ - "Metrics.beginCollect([\"M\", \"K\", \"N\"])\n" + \ - "for m, (t_k, a_k) in t_m << a_m:\n" + \ - " for k, (t_n, (a_val, b_n)) in t_k << (a_k & b_k):\n" + \ - " for n, (t_ref, b_val) in t_n << b_n:\n" + \ - " t_ref += b_val\n" + \ + "b_m = B_MK.getRoot()\n" + \ + "c_k = C_K.getRoot()\n" + \ + "Metrics.beginCollect(\"tmp/Z\")\n" + \ + "Metrics.trace(\"M\", type_=\"populate_read_0\", consumable=False)\n" + \ + "Metrics.trace(\"M\", type_=\"populate_write_0\", consumable=False)\n" + \ + "for m, (z_ref, (a_k, b_k)) in z_m << (a_m & b_m):\n" + \ + " for k, (a_val, (b_val, c_val)) in a_k & (b_k & c_k):\n" + \ + " z_ref += a_val * b_val * c_val\n" + \ "Metrics.endCollect()\n" + \ "metrics = {}\n" + \ - "metrics[\"T\"] = {}\n" + \ - "metrics[\"T\"][\"T footprint\"] = 0\n" + \ - "metrics[\"T\"][\"T traffic\"] = 0\n" + \ - "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"T\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \ - "metrics[\"T\"][\"A traffic\"] = metrics[\"T\"][\"A footprint\"]\n" + \ - "B_KN_format = Format(B_KN, {\"K\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"T\"][\"B footprint\"] = B_KN_format.getTensor()\n" + \ - "metrics[\"T\"][\"B traffic\"] = Traffic.cacheTraffic(B_KN, \"K\", B_KN_format, 25165824) + B_KN_format.getRank(\"K\")\n" + \ - "metrics[\"T\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 0)\n" + \ - "Z_MN = Tensor(rank_ids=[\"M\", \"N\"])\n" + \ - "T_MNK = T_MKN.swizzleRanks(rank_ids=[\"M\", \"N\", \"K\"])\n" + \ - "z_m = Z_MN.getRoot()\n" + \ - "t_m = T_MNK.getRoot()\n" + \ - "a_m = A_MK.getRoot()\n" + \ - "Metrics.beginCollect([\"M\", \"N\", \"K\"])\n" + \ - "for m, (z_n, (t_n, a_k)) in z_m << (t_m & a_m):\n" + \ - " for n, (z_ref, t_k) in z_n << t_n:\n" + \ - " for k, (t_val, a_val) in t_k & a_k:\n" + \ - " z_ref += t_val * a_val\n" + \ + "metrics[\"Z\"] = {}\n" + \ + "formats = {\"Z\": Format(Z_M, {\"rank-order\": [\"M\"], \"M\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \ + "bindings = [{\"tensor\": \"Z\", \"rank\": \"M\", \"type\": \"elem\", \"format\": \"default\"}]\n" + \ + "traces = {(\"Z\", \"M\", \"elem\", \"read\"): \"tmp/Z-M-populate_read_0.csv\", (\"Z\", \"M\", \"elem\", \"write\"): \"tmp/Z-M-populate_write_0.csv\"}\n" + \ + "traffic = Traffic.cacheTraffic(bindings, formats, traces, 65536, 64)\n" + \ + "metrics[\"Z\"][\"DRAM\"] = {}\n" + \ + "metrics[\"Z\"][\"DRAM\"][\"Z\"] = {}\n" + \ + "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"read\"] = 0\n" + \ + "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"write\"] = 0\n" + \ + "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \ + "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \ + "metrics[\"Z\"][\"DRAM\"][\"time\"] = (metrics[\"Z\"][\"DRAM\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"DRAM\"][\"Z\"][\"write\"]) / 512\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = metrics[\"Z\"][\"DRAM\"][\"time\"]" + + assert str(HiFiber(einsum, mapping, arch, bindings, format_)) == hifiber + + +def test_hifiber_intersect(): + yaml = """ + einsum: + declaration: + Z: [] + A: [I, J, K] + B: [I, J, K] + expressions: + - Z[] = A[i, j, k] * B[i, j, k] + mapping: + spacetime: + Z: + space: [] + time: [I, J, K] + architecture: + accel: + - name: level0 + attributes: + clock_frequency: 2048 + local: + - name: TF + class: Intersector + attributes: + type: two-finger + bindings: + Z: + - config: accel + prefix: tmp/Z + - component: TF + bindings: + - rank: K + # TODO: Allow the format to be empty + format: + Z: + default: + rank-order: [] + """ + einsum = Einsum.from_str(yaml) + mapping = Mapping.from_str(yaml) + arch = Architecture.from_str(yaml) + bindings = Bindings.from_str(yaml) + format_ = Format.from_str(yaml) + + hifiber = "Z_ = Tensor(rank_ids=[], shape=[])\n" + \ + "z_ref = Z_.getRoot()\n" + \ + "a_i = A_IJK.getRoot()\n" + \ + "b_i = B_IJK.getRoot()\n" + \ + "Metrics.beginCollect(\"tmp/Z\")\n" + \ + "TF_K = TwoFingerIntersector()\n" + \ + "Metrics.trace(\"K\", type_=\"intersect_0\", consumable=True)\n" + \ + "Metrics.trace(\"K\", type_=\"intersect_1\", consumable=True)\n" + \ + "for i, (a_j, b_j) in a_i & b_i:\n" + \ + " for j, (a_k, b_k) in a_j & b_j:\n" + \ + " for k, (a_val, b_val) in a_k & b_k:\n" + \ + " z_ref += a_val * b_val\n" + \ + " TF_K.addTraces(Metrics.consumeTrace(\"K\", \"intersect_0\"), Metrics.consumeTrace(\"K\", \"intersect_1\"))\n" + \ "Metrics.endCollect()\n" + \ + "metrics = {}\n" + \ "metrics[\"Z\"] = {}\n" + \ - "Z_MN_format = Format(Z_MN, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"Z\"][\"Z footprint\"] = Z_MN_format.getTensor()\n" + \ - "metrics[\"Z\"][\"Z traffic\"] = metrics[\"Z\"][\"Z footprint\"]\n" + \ - "metrics[\"Z\"][\"T footprint\"] = 0\n" + \ - "metrics[\"Z\"][\"T traffic\"] = 0\n" + \ - "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \ - "metrics[\"Z\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \ - "metrics[\"Z\"][\"A traffic\"] = metrics[\"Z\"][\"A footprint\"]\n" + \ - "metrics[\"Z\"][\"mul\"] = Compute.opCount(Metrics.dump(), \"mul\")\n" + \ - "metrics[\"Z\"][\"add\"] = Compute.opCount(Metrics.dump(), \"add\")\n" + \ - "metrics[\"Z\"][\"T_MKN merge ops\"] = Compute.swapCount(T_MKN, 1, 64, 1)" + "formats = {}\n" + \ + "metrics[\"Z\"][\"TF\"] = 0\n" + \ + "metrics[\"Z\"][\"TF\"] += TF_K.getNumIntersects()\n" + \ + "metrics[\"Z\"][\"TF\"][\"time\"] = metrics[\"Z\"][\"TF\"] / 2048\n" + \ + "metrics[\"blocks\"] = [[\"Z\"]]\n" + \ + "metrics[\"time\"] = metrics[\"Z\"][\"TF\"][\"time\"]" assert str(HiFiber(einsum, mapping, arch, bindings, format_)) == hifiber + + +def test_hifiber_gamma_no_errors(): + # There is too much variation in the Gamma spec to test if the HiFiber + # remains unchanged + fname = "tests/integration/gamma.yaml" + einsum = Einsum.from_file(fname) + mapping = Mapping.from_file(fname) + arch = Architecture.from_file(fname) + bindings = Bindings.from_file(fname) + format_ = Format.from_file(fname) + + print(HiFiber(einsum, mapping, arch, bindings, format_)) + + +def test_hifiber_extensor_no_errors(): + # There is too much variation in the ExTensor spec to test if the HiFiber + # remains unchanged + fname = "tests/integration/extensor.yaml" + einsum = Einsum.from_file(fname) + mapping = Mapping.from_file(fname) + arch = Architecture.from_file(fname) + bindings = Bindings.from_file(fname) + format_ = Format.from_file(fname) + + print(HiFiber(einsum, mapping, arch, bindings, format_)) diff --git a/tests/trans/test_partitioner.py b/tests/trans/test_partitioner.py index bf928b9..9e48f01 100644 --- a/tests/trans/test_partitioner.py +++ b/tests/trans/test_partitioner.py @@ -29,7 +29,7 @@ def build_partitioner(parts): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - partitioner = Partitioner(program, TransUtils()) + partitioner = Partitioner(program, TransUtils(program)) return program, partitioner @@ -51,7 +51,7 @@ def build_partitioner_conv(expr, parts): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - partitioner = Partitioner(program, TransUtils()) + partitioner = Partitioner(program, TransUtils(program)) return program, partitioner @@ -70,7 +70,7 @@ def build_partitioner_copy(parts): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - partitioner = Partitioner(program, TransUtils()) + partitioner = Partitioner(program, TransUtils(program)) return program, partitioner @@ -90,7 +90,7 @@ def build_partitioner_math_no_halo(parts): program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) program.add_einsum(0) - partitioner = Partitioner(program, TransUtils()) + partitioner = Partitioner(program, TransUtils(program)) return program, partitioner @@ -566,7 +566,7 @@ def assert_unpartition(spec, hifiber_options): for tensor in program.get_equation().get_tensors(): program.apply_all_partitioning(tensor) - partitioner = Partitioner(program, TransUtils()) + partitioner = Partitioner(program, TransUtils(program)) hifiber = partitioner.unpartition( program.get_equation().get_output()).gen(0) @@ -643,7 +643,7 @@ def test_unpartition_flatten(): program.apply_all_partitioning(program.get_equation().get_output()) - partitioner = Partitioner(program, TransUtils()) + partitioner = Partitioner(program, TransUtils(program)) hifiber = partitioner.unpartition( program.get_equation().get_output()).gen(0) corr = "tmp0 = Z_M1NM01NM00\n" + \ diff --git a/tests/trans/test_utils.py b/tests/trans/test_utils.py index d0f4989..6b13e68 100644 --- a/tests/trans/test_utils.py +++ b/tests/trans/test_utils.py @@ -1,10 +1,25 @@ import pytest from teaal.hifiber import * +from teaal.ir.program import Program from teaal.ir.tensor import Tensor +from teaal.parse import * from teaal.trans.utils import TransUtils +def make_program(): + yaml = """ + einsum: + declaration: + A: [K, M] + B: [K, N] + Z: [M, N] + expressions: + - Z[m, n] = A[k, m] * B[k, n] + """ + return Program(Einsum.from_str(yaml), Mapping.from_str(yaml)) + + def test_build_expr_bad(): with pytest.raises(ValueError) as excinfo: TransUtils.build_expr(range(5)) @@ -18,6 +33,7 @@ def test_build_expr(): assert TransUtils.build_expr("foo").gen() == "\"foo\"" assert TransUtils.build_expr([1, 2, 3, 4]).gen() == "[1, 2, 3, 4]" assert TransUtils.build_expr({1: 2, 3: 4}).gen() == "{1: 2, 3: 4}" + assert TransUtils.build_expr((1, 2, 3, 4)).gen() == "(1, 2, 3, 4)" def test_build_rank_ids(): @@ -33,8 +49,7 @@ def test_build_set_rank_ids(): def test_build_shape(): - tensor = Tensor("A", ["I", "J"]) - assert TransUtils.build_shape(tensor).gen() == "shape=[I, J]" + assert TransUtils.build_shape(["I", "J"]).gen() == "shape=[I, J]" def test_build_swizzle(): @@ -44,14 +59,16 @@ def test_build_swizzle(): def test_next_tmp(): - utils = TransUtils() + program = make_program() + utils = TransUtils(program) assert utils.next_tmp() == "tmp0" assert utils.next_tmp() == "tmp1" assert utils.next_tmp() == "tmp2" def test_curr_tmp(): - utils = TransUtils() + program = make_program() + utils = TransUtils(program) with pytest.raises(ValueError) as excinfo: utils.curr_tmp()