diff --git a/teaal/hifiber/op.py b/teaal/hifiber/op.py
index 81e051f..f5e3073 100644
--- a/teaal/hifiber/op.py
+++ b/teaal/hifiber/op.py
@@ -148,6 +148,18 @@ def gen(self) -> str:
         return "*"
 
 
+class ONotIn(Operator):
+    """
+    The HiFiber not in operator
+    """
+
+    def gen(self) -> str:
+        """
+        Generate the HiFiber code for the ONotIn operator
+        """
+        return "not in"
+
+
 class OOr(Operator):
     """
     The HiFiber or operator
diff --git a/teaal/ir/component.py b/teaal/ir/component.py
index 9949b0b..05f5ecf 100644
--- a/teaal/ir/component.py
+++ b/teaal/ir/component.py
@@ -24,7 +24,9 @@
 Representation an hardware component
 """
 
-from typing import Any, Dict, Iterable, List, Optional, Union
+from typing import Any, Dict, List, Optional, Set, Tuple, Type, TypeVar, Union
+
+S = TypeVar("S")
 
 
 class Component:
@@ -32,13 +34,14 @@ class Component:
     Representation an hardware component
     """
 
-    def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None:
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
         """
         Construct a component
         """
         self.name = name
-        self.attrs = attrs
-        self.bindings: Any = {}
+        self.num_instances = num_instances
+        self.bindings = bindings
 
     def get_name(self) -> str:
         """
@@ -46,6 +49,18 @@ def get_name(self) -> str:
         """
         return self.name
 
+    def get_num_instances(self) -> int:
+        """
+        Get the number of instances
+        """
+        return self.num_instances
+
+    def get_bindings(self) -> Dict[str, List[dict]]:
+        """
+        Get the operations that are bound to this component
+        """
+        return self.bindings
+
     def __eq__(self, other: object) -> bool:
         """
         The == operator for components
@@ -55,11 +70,17 @@ def __eq__(self, other: object) -> bool:
             return self.__key() == other.__key()
         return False
 
-    def __key(self) -> Iterable[Any]:
+    def __hash__(self) -> int:
+        """
+        Hash the component
+        """
+        return hash(repr(self))
+
+    def __key(self) -> Tuple[Any, ...]:
         """
         A tuple of all fields of a component
         """
-        return (self.name, self.attrs, self.bindings)
+        return (self.name, self.num_instances, self.bindings)
 
     def __repr__(self) -> str:
         """
@@ -69,38 +90,97 @@ def __repr__(self) -> str:
                 for key in self.__key()]
         return "(" + type(self).__name__ + ", " + ", ".join(strs) + ")"
 
-
-class ComputeComponent(Component):
-    """
-    A Component for compute (acting also as a superclass for all compute
-    operations)
-    """
-
-    def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None:
+    def _check_attr(
+            self,
+            attrs: dict,
+            key: str,
+            type_: Type[S]) -> Optional[S]:
         """
-        Construct a compute component
+        Check that the attribute is correctly specified
         """
-        super().__init__(name, attrs, bindings)
-        self.bindings = {}
+        if key not in attrs.keys():
+            return None
 
-        for binding in bindings:
-            einsum = binding["einsum"]
-            if einsum not in self.bindings.keys():
-                self.bindings[einsum] = []
+        if not isinstance(attrs[key], type_):
+            class_ = type(self).__name__[:-9]
+            raise ValueError("Bad " +
+                             key +
+                             " " +
+                             str(attrs[key]) +
+                             " for " +
+                             class_ +
+                             " " +
+                             self.name)
 
-            # Append the dictionary containing the other properties
-            info = binding.copy()
-            del info["einsum"]
-            self.bindings[einsum].append(info)
+        return attrs[key]
 
-    def get_bindings(self, einsum: str) -> List[dict]:
+    def _check_float_attr(self, attrs: dict, key: str) -> Optional[float]:
         """
-        Get the operations that are bound for this einsum
+        Check that the attribute is correctly specified
         """
-        if einsum not in self.bindings.keys():
-            return []
+        if key not in attrs.keys():
+            return None
+
+        if attrs[key] == "inf":
+            return float("inf")
+
+        if not isinstance(
+                attrs[key],
+                float) and not isinstance(
+                attrs[key],
+                int):
+            class_ = type(self).__name__[:-9]
+            raise ValueError("Bad " +
+                             key +
+                             " " +
+                             str(attrs[key]) +
+                             " for " +
+                             class_ +
+                             " " +
+                             self.name)
+
+        return attrs[key]
+
+    def _check_str_attr(
+            self,
+            attrs: dict,
+            key: str,
+            options: Set[str]) -> Optional[str]:
+        """
+        Check that a string attribute is correctly specified
+        """
+        if key not in attrs.keys():
+            return None
 
-        return self.bindings[einsum]
+        class_ = type(self).__name__[:-9]
+        if not isinstance(attrs[key], str):
+            raise ValueError("Bad " +
+                             key +
+                             " " +
+                             str(attrs[key]) +
+                             " for " +
+                             class_ +
+                             " " +
+                             self.name)
+
+        if attrs[key] not in options:
+            raise ValueError(
+                attrs[key] +
+                " is not a valid value for attribute " +
+                key +
+                " of class " +
+                class_ +
+                ". Choose one of " +
+                str(options))
+
+        return attrs[key]
+
+
+class FunctionalComponent(Component):
+    """
+    Superclass for all functional unit components (compute, intersection, mergers, etc.)
+    """
+    pass
 
 
 class MemoryComponent(Component):
@@ -108,74 +188,331 @@ class MemoryComponent(Component):
     Superclass for all memory components
     """
 
-    def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None:
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
         """
         Construct a memory component
         """
-        super().__init__(name, attrs, bindings)
-        self.bindings = {}
+        super().__init__(name, num_instances, attrs, bindings)
+
+        self.bandwidth = self._check_attr(attrs, "bandwidth", int)
+
+        self.tensor_bindings: Dict[str, Dict[str, List[dict]]] = {}
+        for einsum in self.bindings.keys():
+            self.tensor_bindings[einsum] = {}
+            for binding in self.bindings[einsum]:
+                if "tensor" not in binding:
+                    raise ValueError(
+                        "Tensor not specified for Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                tensor = binding["tensor"]
+                if "rank" not in binding:
+                    raise ValueError(
+                        "Rank not specified for tensor " +
+                        tensor +
+                        " in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                if "type" not in binding:
+                    raise ValueError(
+                        "Type not specified for tensor " +
+                        tensor +
+                        " in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                types = {"coord", "payload", "elem"}
+                if binding["type"] not in types:
+                    raise ValueError("Type " +
+                                     str(binding["type"]) +
+                                     " for " +
+                                     self.name +
+                                     " on tensor " +
+                                     tensor +
+                                     " in Einsum " +
+                                     einsum +
+                                     " not one of " +
+                                     str(types))
+
+                if "format" not in binding:
+                    raise ValueError(
+                        "Format not specified for tensor " +
+                        tensor +
+                        " in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                if binding["tensor"] not in self.tensor_bindings[einsum]:
+                    self.tensor_bindings[einsum][binding["tensor"]] = []
+                self.tensor_bindings[einsum][binding["tensor"]].append(binding)
+
+    def get_bandwidth(self) -> int:
+        """
+        Get the bandwidth
+        """
+        if self.bandwidth is None:
+            raise ValueError(
+                "Bandwidth unspecified for component " +
+                self.name)
 
-        for binding in bindings:
-            self.bindings[binding["tensor"]] = binding["rank"]
+        return self.bandwidth
 
-    def get_binding(self, tensor: str) -> Optional[str]:
+    def get_binding(self, einsum: str, tensor: str, rank: str,
+                    type_: str, format_: str) -> Optional[Dict[str, Any]]:
         """
-        Given a tensor, give the rank bound to this memory
+        Given a tensor, get a list of bindings to that rank
         """
-        if tensor not in self.bindings.keys():
+        if einsum not in self.tensor_bindings:
+            return None
+
+        if tensor not in self.tensor_bindings[einsum]:
             return None
 
-        return self.bindings[tensor]
+        final_binding: Optional[Dict[str, Any]] = None
+        for binding in self.tensor_bindings[einsum][tensor]:
+            if binding["rank"] == rank and binding["type"] == type_ and binding["format"] == format_:
+
+                if final_binding is None:
+                    final_binding = binding
+
+                else:
+                    raise ValueError("Multiple bindings for " + str(
+                        [("einsum", einsum), ("tensor", tensor), ("rank", rank), ("type", type_), ("format", format_)]))
+
+        return final_binding
 
+    def _Component__key(self) -> Tuple[Any, ...]:
+        """
+        A tuple of all fields
+        """
+        return (self.name, self.num_instances, self.bindings, self.bandwidth)
 
-class BuffetComponent(MemoryComponent):
+
+class BufferComponent(MemoryComponent):
     """
-    A Component for Buffet
+    A Component for a buffer
     """
-    pass
 
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
+        """
+        Construct a buffer component
+        """
+        super().__init__(name, num_instances, attrs, bindings)
+
+        self.depth = self._check_float_attr(attrs, "depth")
+        self.width = self._check_attr(attrs, "width", int)
+
+    def get_depth(self) -> float:
+        """
+        Get the buffer depth
+        """
+        if self.depth is None:
+            raise ValueError("Depth unspecified for component " + self.name)
+
+        return self.depth
+
+    def get_width(self) -> int:
+        """
+        Get the buffer width
+        """
+        if self.width is None:
+            raise ValueError("Width unspecified for component " + self.name)
+
+        return self.width
+
+    def _Component__key(self) -> Tuple[Any, ...]:
+        """
+        A tuple of all fields
+        """
+        return (
+            self.name,
+            self.num_instances,
+            self.bindings,
+            self.bandwidth,
+            self.depth,
+            self.width)
+
+
+class BuffetComponent(BufferComponent):
+    """
+    A Component for a Buffet
+    """
 
-class CacheComponent(MemoryComponent):
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
+        """
+        Construct a buffet component
+        """
+        super().__init__(name, num_instances, attrs, bindings)
+        for einsum in self.tensor_bindings:
+            for tensor, tensor_bindings in self.tensor_bindings[einsum].items(
+            ):
+                for binding in tensor_bindings:
+                    if "evict-on" not in binding:
+                        raise ValueError(
+                            "Evict-on not specified for tensor " +
+                            tensor +
+                            " in Einsum " +
+                            einsum +
+                            " in binding to " +
+                            self.name)
+
+                    if "style" not in binding:
+                        binding["style"] = "lazy"
+
+                    styles = {"lazy", "eager"}
+                    if binding["style"] not in styles:
+                        raise ValueError("Style " +
+                                         str(binding["style"]) +
+                                         " for " +
+                                         self.name +
+                                         " on tensor " +
+                                         tensor +
+                                         " in Einsum " +
+                                         einsum +
+                                         " not one of " +
+                                         str(styles))
+
+                    if binding["style"] == "eager":
+                        binding["root"] = binding["rank"]
+
+    def expand_eager(self,
+                     einsum: str,
+                     tensor: str,
+                     format_: str,
+                     ranks: List[str],
+                     types: List[List[str]]) -> None:
+        """
+        Expand eager bindings to have separate bindings for each rank
+        """
+        if tensor not in self.tensor_bindings[einsum]:
+            return
+
+        for binding in self.tensor_bindings[einsum][tensor].copy():
+            if binding["style"] != "eager":
+                continue
+
+            if binding["format"] != format_:
+                continue
+
+            root_rank = binding["rank"]
+
+            new_binding_template = {
+                "tensor": tensor,
+                "evict-on": binding["evict-on"],
+                "style": "eager",
+                "format": binding["format"],
+                "root": root_rank}
+            start_i = ranks.index(root_rank)
+            if binding["type"] == "coord" and "payload" in types[start_i]:
+                new_binding = {**new_binding_template, **
+                               {"rank": root_rank, "type": "payload"}}
+                self.tensor_bindings[einsum][tensor].append(new_binding)
+                self.bindings[einsum].append(new_binding)
+
+            for rank, rank_types in zip(
+                    ranks[start_i + 1:], types[start_i + 1:]):
+                for type_ in rank_types:
+                    new_binding = {**new_binding_template,
+                                   **{"rank": rank, "type": type_}}
+                    self.tensor_bindings[einsum][tensor].append(new_binding)
+                    self.bindings[einsum].append(new_binding)
+
+
+class CacheComponent(BufferComponent):
     """
     A Component for a Cache
     """
+    pass
+
+
+class ComputeComponent(FunctionalComponent):
+    """
+    A Component for a compute functional unit
+    """
 
-    def get_depth(self) -> int:
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
         """
-        Get the cache depth
+        Construct a compute component
         """
-        return self.attrs["depth"]
+        super().__init__(name, num_instances, attrs, bindings)
 
-    def get_width(self) -> int:
+        type_ = self._check_str_attr(attrs, "type", {"mul", "add"})
+        if type_ is None:
+            raise ValueError("Type unspecified for component " + self.name)
+        self.type = type_
+
+    def get_type(self) -> str:
         """
-        Get the cache width
+        Get the type of compute component
         """
-        return self.attrs["width"]
+        return self.type
+
+    def _Component__key(self) -> Tuple[Any, ...]:
+        """
+        A tuple of all fields
+        """
+        return (self.name, self.num_instances, self.bindings, self.type)
 
 
 class DRAMComponent(MemoryComponent):
     """
     A Component for DRAM
     """
+    pass
+
+
+class IntersectorComponent(FunctionalComponent):
+    """
+    A Component superclass for all intersectors
+    """
 
-    def get_bandwidth(self) -> int:
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
         """
-        Get the bandwidth
+        Construct an intersector component
         """
-        return self.attrs["bandwidth"]
+        super().__init__(name, num_instances, attrs, bindings)
 
-    def get_datawidth(self) -> int:
-        """
-        Get the datawidth
-        """
-        return self.attrs["datawidth"]
+        for einsum, einsum_bindings in bindings.items():
+            for binding in einsum_bindings:
+                if "rank" not in binding:
+                    raise ValueError(
+                        "Rank unspecified in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
 
 
-class LeaderFollowerComponent(ComputeComponent):
+class LeaderFollowerComponent(IntersectorComponent):
     """
     A Component for leader-follower intersection
     """
-    pass
+
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
+        """
+        Construct a leader-follower intersector component
+        """
+        super().__init__(name, num_instances, attrs, bindings)
+
+        for einsum, einsum_bindings in bindings.items():
+            for binding in einsum_bindings:
+                if "leader" not in binding:
+                    raise ValueError(
+                        "Leader unspecified in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
 
 
 class MergerComponent(Component):
@@ -183,47 +520,198 @@ class MergerComponent(Component):
     A Component for a merger
     """
 
-    def __init__(self, name: str, attrs: dict, bindings: List[dict]) -> None:
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
+        """
+        Construct a merger component
+        """
+        super().__init__(name, num_instances, attrs, bindings)
+
+        # TODO: change back to int
+        inputs = self._check_float_attr(attrs, "inputs")
+        if inputs is None:
+            raise ValueError("Inputs unspecified for component " + self.name)
+        self.inputs = inputs
+
+        # TODO: change back to int
+        comparator_radix = self._check_float_attr(attrs, "comparator_radix")
+        if comparator_radix is None:
+            raise ValueError(
+                "Comparator radix unspecified for component " +
+                self.name)
+        self.comparator_radix = comparator_radix
+
+        outputs = self._check_attr(attrs, "outputs", int)
+        if outputs is None:
+            self.outputs = 1
+        else:
+            self.outputs = outputs
+
+        order = self._check_str_attr(attrs, "order", {"fifo", "opt"})
+        if order is None:
+            self.order = "fifo"
+        else:
+            self.order = order
+
+        reduce_ = self._check_attr(attrs, "reduce", bool)
+        if reduce_:
+            raise NotImplementedError(
+                "Concurrent merge and reduction not supported")
+        self.reduce = False
+
+        self.tensor_bindings: Dict[str, Dict[str, List[dict]]] = {}
+        for einsum, einsum_bindings in self.bindings.items():
+            self.tensor_bindings[einsum] = {}
+            for binding in einsum_bindings:
+                if "tensor" not in binding:
+                    raise ValueError(
+                        "Tensor not specified for Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                tensor = binding["tensor"]
+                if tensor not in self.tensor_bindings[einsum]:
+                    self.tensor_bindings[einsum][tensor] = []
+
+                if "init-ranks" not in binding:
+                    raise ValueError(
+                        "Initial ranks not specified for tensor " +
+                        tensor +
+                        " in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                if "final-ranks" not in binding:
+                    raise ValueError(
+                        "Final ranks not specified for tensor " +
+                        tensor +
+                        " in Einsum " +
+                        einsum +
+                        " in binding to " +
+                        self.name)
+
+                self.tensor_bindings[einsum][tensor].append(binding)
+
+    def get_comparator_radix(self) -> float:
+        """
+        Get the comparator_radix
+        """
+        return self.comparator_radix
+
+    def get_init_ranks(self, einsum: str, tensor: str,
+                       final_ranks: List[str]) -> Optional[List[str]]:
+        """
+        Get the initial ranks for the given merge
+        """
+        if einsum not in self.tensor_bindings:
+            return None
+
+        if tensor not in self.tensor_bindings[einsum]:
+            return None
+
+        init_ranks: Optional[List[str]] = None
+        for binding in self.tensor_bindings[einsum][tensor]:
+            if binding["final-ranks"] == final_ranks:
+                if init_ranks is not None:
+                    raise ValueError("Merge binding from both " +
+                                     str(init_ranks) +
+                                     " and " +
+                                     str(binding["init-ranks"]) +
+                                     " to " +
+                                     str(final_ranks))
+
+                init_ranks = binding["init-ranks"]
+
+        return init_ranks
+
+    def get_inputs(self) -> float:
         """
-        Construct a compute component
+        Get the number of inputs
         """
-        super().__init__(name, attrs, bindings)
+        return self.inputs
 
-        self.bindings = []
-        for binding in bindings:
-            init = binding["init_ranks"]
-            d = binding["swap_depth"]
-            final = init[:d] + [init[d + 1]] + [init[d]] + init[(d + 2):]
-
-            info = binding.copy()
-            info["final_ranks"] = final
+    def get_order(self) -> str:
+        """
+        Get the order
+        """
+        return self.order
 
-            self.bindings.append(info)
+    def get_outputs(self) -> int:
+        """
+        Get the number of outputs
+        """
+        return self.outputs
 
-    def get_bindings(self) -> List[dict]:
+    def get_reduce(self) -> bool:
         """
-        Get the operations that are bound to this merger
+        Get whether or not the merger performs concurrent reduction
         """
-        return self.bindings
+        return self.reduce
 
-    def get_next_latency(self) -> Union[int, str]:
+    def _Component__key(self) -> Tuple[Any, ...]:
         """
-        Get the latency of accessing the next element
+        A tuple of all fields
         """
-        return self.attrs["next_latency"]
+        return (
+            self.name,
+            self.num_instances,
+            self.bindings,
+            self.inputs,
+            self.comparator_radix,
+            self.outputs,
+            self.order,
+            self.reduce)
+
 
-    def get_radix(self) -> float:
+class SequencerComponent(FunctionalComponent):
+    """
+    A Component for a sequencer
+    """
+
+    def __init__(self, name: str, num_instances: int, attrs: dict,
+                 bindings: Dict[str, List[dict]]) -> None:
         """
-        Get the radix
+        Construct a sequencer component
         """
-        if self.attrs["radix"] == "inf":
-            return float("inf")
+        super().__init__(name, num_instances, attrs, bindings)
+
+        num_ranks = self._check_attr(attrs, "num_ranks", int)
+        if num_ranks is None:
+            raise ValueError(
+                "Number of ranks unspecified for sequencer " +
+                self.name)
+
+        self.ranks: Dict[str, List[str]] = {}
+        for einsum, ebindings in self.bindings.items():
+            if len(ebindings) > num_ranks:
+                raise ValueError(
+                    "Too many ranks bound to sequencer " +
+                    self.name +
+                    " during Einsum " +
+                    einsum)
+
+            self.ranks[einsum] = []
+            for binding in ebindings:
+                self.ranks[einsum].append(binding["rank"])
 
-        return self.attrs["radix"]
+    def get_ranks(self, einsum: str) -> List[str]:
+        """
+        Get the ranks sequenced by this sequencer
+        """
+        return self.ranks[einsum]
 
 
-class SkipAheadComponent(ComputeComponent):
+class SkipAheadComponent(IntersectorComponent):
     """
     A Component for skip-ahead intersection
     """
     pass
+
+
+class TwoFingerComponent(IntersectorComponent):
+    """
+    A Component for two-finger intersection
+    """
+    pass
diff --git a/teaal/ir/equation.py b/teaal/ir/equation.py
index 3ad21e6..d9b1df0 100644
--- a/teaal/ir/equation.py
+++ b/teaal/ir/equation.py
@@ -28,7 +28,7 @@
 
 from lark.lexer import Token
 from lark.tree import Tree
-from typing import Any, Dict, Iterable, List, Tuple
+from typing import Any, Dict, Iterable, List, Optional, Tuple
 
 from teaal.ir.tensor import Tensor
 from teaal.parse.utils import ParseUtils
@@ -56,6 +56,27 @@ def get_factor_order(self) -> Dict[str, Tuple[int, int]]:
         """
         return self.factor_order
 
+    def get_iter(self,
+                 tensors: List[Tensor]) -> Tuple[Optional[Tensor],
+                                                 List[List[Tensor]]]:
+        """
+        Organize the tensors as they are iterated in the for loop
+        Returns (Optional[output], [[tensors intersected together] unioned together])
+        """
+        output: Optional[Tensor] = None
+        inputs: List[List[Tensor]] = [[] for _ in self.term_tensors]
+        for tensor in tensors:
+            if tensor.get_is_output():
+                output = tensor
+                continue
+
+            inputs[self.factor_order[tensor.root_name()][0]].append(tensor)
+
+        for term in inputs:
+            term.sort(key=lambda t: self.factor_order[t.root_name()][1])
+
+        return output, [term for term in inputs if term]
+
     def get_in_update(self) -> List[List[bool]]:
         """
         Get the information about which values are actually used in the update
diff --git a/teaal/ir/flow_graph.py b/teaal/ir/flow_graph.py
index 083ad45..52f7aa3 100644
--- a/teaal/ir/flow_graph.py
+++ b/teaal/ir/flow_graph.py
@@ -29,6 +29,7 @@
 from sympy import Symbol
 from typing import cast, Dict, List, Optional, Tuple
 
+from teaal.ir.component import *
 from teaal.ir.flow_nodes import *
 from teaal.ir.iter_graph import IterationGraph
 from teaal.ir.metrics import Metrics
@@ -94,7 +95,7 @@ def __build(self) -> None:
         self.graph = nx.DiGraph()
         self.iter_map: Dict[str, List[str]] = {}
 
-        self.__build_loop_nest()
+        chain = self.__build_loop_nest()
         self.__build_output()
 
         # Add Swizzle, GetRoot and FiberNodes for each tensor
@@ -116,10 +117,6 @@ def __build(self) -> None:
             # Get the root fiber
             self.__build_swizzle_root_fiber(tensor, True)
 
-        # Add CollectingNodes
-        for tensor in self.program.get_equation().get_tensors():
-            self.__build_collecting(tensor)
-
         iter_graph = IterationGraph(self.program)
         while iter_graph.peek_concord()[0] is not None:
             self.__build_fiber_nodes(iter_graph, flatten_info)
@@ -136,31 +133,6 @@ def __build(self) -> None:
             tensor.reset()
             tensor.set_is_output(is_output)
 
-    def __build_collecting(self, tensor: Tensor) -> None:
-        """
-        Build a CollectingNode should it be required
-        """
-        # None if there is no hardware
-        if not self.metrics:
-            return
-
-        # None if the tensor is never stored in DRAM
-        if not self.metrics.in_dram(tensor):
-            return
-
-        # None if the tensor is stationary
-        if self.metrics.on_chip_stationary(tensor):
-            return
-
-        # Otherwise, add a CollectingNode
-        root = tensor.root_name()
-        rank = self.metrics.get_on_chip_rank(tensor)
-        swizzle_node = SwizzleNode(root, tensor.get_ranks(), "loop-order")
-        collecting_node = CollectingNode(root, rank)
-
-        self.graph.add_edge(swizzle_node, collecting_node)
-        self.graph.add_edge(collecting_node, MetricsNode("Start"))
-
     def __build_dyn_part(
             self, tensor: Tensor, partitioning: Tuple[str, ...], flatten_info: Dict[str, List[Tuple[str, ...]]]) -> None:
         """
@@ -180,6 +152,7 @@ def __build_dyn_part(
 
             for rank in partitioning:
                 self.graph.add_edge(RankNode(root, rank), swizzle_node)
+            self.program.apply_partition_swizzling(tensor)
 
             # Add to flattening info
             flatten_info[root].append(partitioning)
@@ -250,7 +223,7 @@ def __build_fiber_nodes(self, iter_graph: IterationGraph,
         # We need a EagerInputNode and an IntervalNode if at least one tensor
         # will be projected and it is a partitioned rank (so we don't know the
         # bounds)
-        if any(tensor.peek() != rank.lower() for tensor in tensors) and \
+        if any(tensor.peek_clean() != rank for tensor in tensors) and \
                 self.program.get_partitioning().split_rank_name(rank)[1] == "0":
             self.__build_project_interval(rank)
 
@@ -270,7 +243,8 @@ def __build_fiber_nodes(self, iter_graph: IterationGraph,
                 get_payload_node)
 
             for rank in ranks:
-                loop_rank = part.get_final_rank_id(tensor, rank)
+                loop_rank = part.get_final_rank_id(
+                    tensor.get_init_ranks(), rank)
                 self.graph.add_edge(LoopNode(loop_rank), get_payload_node)
 
         for ranks, tensor in iter_graph.pop_discord():
@@ -279,9 +253,9 @@ def __build_fiber_nodes(self, iter_graph: IterationGraph,
                 get_payload_node, FiberNode(
                     tensor.fiber_name()))
 
-    def __build_loop_nest(self) -> None:
+    def __build_loop_nest(self) -> List[Node]:
         """
-        Build the loop nest
+        Build the loop nest, returns the chain of nodes
         """
         loop_order = self.program.get_loop_order().get_ranks()
 
@@ -289,23 +263,47 @@ def __build_loop_nest(self) -> None:
         chain: List[Node] = [OtherNode("StartLoop")]
         for rank in loop_order:
             chain.append(LoopNode(rank))
-            self.graph.add_edge(chain[-2], chain[-1])
+        chain.append(OtherNode("Body"))
+        for rank in reversed(loop_order):
+            chain.append(EndLoopNode(rank))
+        chain.append(OtherNode("Footer"))
+
+        # Note that the chain is guaranteed to have at least two nodes
+        for i in range(len(chain) - 1):
+            self.graph.add_edge(chain[i], chain[i + 1])
 
-        # Add the graphics generation, body, and footer
+        # Add the graphics generation
         self.graph.add_edge(OtherNode("Graphics"), OtherNode("StartLoop"))
         self.graph.add_edge(OtherNode("Output"), OtherNode("Graphics"))
-        self.graph.add_edge(chain[-1], OtherNode("Body"))
-        self.graph.add_edge(OtherNode("Body"), OtherNode("Footer"))
 
         # If we have Metrics, we need to add the MetricsNodes
         if self.metrics:
             self.graph.add_edge(OtherNode("StartLoop"), MetricsNode("Start"))
             self.graph.add_edge(MetricsNode("Start"), chain[1])
 
-            self.graph.add_edge(OtherNode("Body"), MetricsNode("End"))
+            metrics_chain: List[Node] = []
+            for rank in loop_order:
+                metrics_chain.append(MetricsHeaderNode(rank))
+            metrics_chain.append(MetricsNode("Body"))
+            for rank in reversed(loop_order):
+                metrics_chain.append(MetricsFooterNode(rank))
+
+            j = 0
+            for i, metrics_node in enumerate(metrics_chain):
+                self.graph.add_edge(chain[i + j], metrics_chain[i])
+                self.graph.add_edge(metrics_chain[i], chain[i + j + 1])
+
+                if metrics_node == MetricsNode("Body"):
+                    j = 1
+
+            self.graph.add_edge(MetricsNode("Start"), metrics_chain[0])
+            self.graph.add_edge(metrics_chain[-1], MetricsNode("End"))
+            self.graph.add_edge(chain[-2], MetricsNode("End"))
             self.graph.add_edge(MetricsNode("End"), OtherNode("Footer"))
             self.graph.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
 
+        return chain
+
     def __build_output(self) -> None:
         """
         Build all of the output-specific edges
@@ -403,6 +401,22 @@ def __build_static_part(self, tensor: Tensor,
 
             self.graph.add_edge(swizzle_node, part_node)
 
+            # Add an additional swizzle node to ensure that the tensor always
+            # starts in the correct order before being merged by a hardware
+            # merger
+            if self.metrics:
+                init_ranks = self.metrics.get_merger_init_ranks(
+                    root, tensor.get_ranks())
+                if init_ranks:
+                    metrics_swizzle_node = SwizzleNode(
+                        root, init_ranks, "metrics")
+
+                    for rank in init_ranks:
+                        self.graph.add_edge(
+                            RankNode(root, rank), metrics_swizzle_node)
+
+                    self.graph.add_edge(metrics_swizzle_node, swizzle_node)
+
         # Otherwise, add the edge from the source rank to the partitioning
         else:
             self.graph.add_edge(RankNode(root, partitioning[0]), part_node)
@@ -441,6 +455,23 @@ def __build_swizzle_root_fiber(self, tensor: Tensor, static: bool) -> None:
         if static:
             self.graph.add_edge(swizzle_node, OtherNode("Graphics"))
 
+        # Add an additional swizzle node to ensure that the tensor always
+        # starts in the correct order before being merged by a hardware merger
+        if self.metrics:
+            init_ranks = self.metrics.get_merger_init_ranks(
+                root, tensor.get_ranks())
+            if init_ranks:
+                metrics_swizzle_node = SwizzleNode(root, init_ranks, "metrics")
+
+                for rank in init_ranks:
+                    self.graph.add_edge(
+                        RankNode(
+                            root,
+                            rank),
+                        metrics_swizzle_node)
+
+                self.graph.add_edge(metrics_swizzle_node, swizzle_node)
+
     def __connect_dyn_part(self, tensor: Tensor, rank: str,
                            flatten_info: Dict[str, List[Tuple[str, ...]]]) -> None:
         """
diff --git a/teaal/ir/flow_nodes.py b/teaal/ir/flow_nodes.py
index d063e2f..97ee04c 100644
--- a/teaal/ir/flow_nodes.py
+++ b/teaal/ir/flow_nodes.py
@@ -25,24 +25,22 @@
 """
 
 import abc
-from typing import Any, Iterable, List, Tuple
+from typing import Any, Iterable, List, Optional, Tuple
 
 from teaal.ir.node import Node
 
 
-class CollectingNode(Node):
+class EagerInputNode(Node):
     """
-    A Node to turn on reuse distance collection for a particular rank of a
-    tensor
+    A node that ensures that the inputs are eager
     """
 
-    def __init__(self, tensor: str, rank: str) -> None:
+    def __init__(self, rank: str, tensors: List[str]) -> None:
         """
-        Construct a node for the collection of reuse metrics for a tensor's
-        rank
+        Construct a EagerInputNode
         """
-        self.tensor = tensor
         self.rank = rank
+        self.tensors = tensors
 
     def get_rank(self) -> str:
         """
@@ -50,30 +48,29 @@ def get_rank(self) -> str:
         """
         return self.rank
 
-    def get_tensor(self) -> str:
+    def get_tensors(self) -> List[str]:
         """
         Accessor for the tensor
         """
-        return self.tensor
+        return self.tensors
 
     def _Node__key(self) -> Iterable[Any]:
         """
-        Iterable of fields of a Collecting
+        Iterable of fields of a FromFiberNode
         """
-        return self.tensor, self.rank
+        return self.rank, self.tensors
 
 
-class EagerInputNode(Node):
+class EndLoopNode(Node):
     """
-    A node that ensures that the inputs are eager
+    A Node representing the end of a loop
     """
 
-    def __init__(self, rank: str, tensors: List[str]) -> None:
+    def __init__(self, rank: str) -> None:
         """
-        Construct a EagerInputNode
+        Construct a EndLoopNode
         """
         self.rank = rank
-        self.tensors = tensors
 
     def get_rank(self) -> str:
         """
@@ -81,17 +78,11 @@ def get_rank(self) -> str:
         """
         return self.rank
 
-    def get_tensors(self) -> List[str]:
-        """
-        Accessor for the tensor
-        """
-        return self.tensors
-
     def _Node__key(self) -> Iterable[Any]:
         """
-        Iterable of fields of a FromFiberNode
+        Iterable of fields of a EndLoopNode
         """
-        return self.rank, self.tensors
+        return self.rank,
 
 
 class FiberNode(Node):
@@ -260,6 +251,54 @@ def _Node__key(self) -> Iterable[Any]:
         return self.rank,
 
 
+class MetricsFooterNode(Node):
+    """
+    A Node for collecting metrics before the start of the given loop
+    """
+
+    def __init__(self, rank: str) -> None:
+        """
+        Construct a MetricsFooterNode
+        """
+        self.rank = rank
+
+    def get_rank(self) -> str:
+        """
+        Accessor for the rank
+        """
+        return self.rank
+
+    def _Node__key(self) -> Iterable[Any]:
+        """
+        Iterable of fields of a MetricsFooterNode
+        """
+        return self.rank,
+
+
+class MetricsHeaderNode(Node):
+    """
+    A Node for collecting metrics before the start of the given loop
+    """
+
+    def __init__(self, rank: str) -> None:
+        """
+        Construct a MetricsHeaderNode
+        """
+        self.rank = rank
+
+    def get_rank(self) -> str:
+        """
+        Accessor for the rank
+        """
+        return self.rank
+
+    def _Node__key(self) -> Iterable[Any]:
+        """
+        Iterable of fields of a MetricsHeaderNode
+        """
+        return self.rank,
+
+
 class MetricsNode(Node):
     """
     A Node for metrics collection
diff --git a/teaal/ir/fusion.py b/teaal/ir/fusion.py
new file mode 100644
index 0000000..7ba3ba7
--- /dev/null
+++ b/teaal/ir/fusion.py
@@ -0,0 +1,117 @@
+"""
+MIT License
+
+Copyright (c) 2023 University of Illinois
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+Representation of the fusion schedule of this accelerator
+"""
+
+from typing import List, Optional, Set
+
+from teaal.ir.component import *
+from teaal.ir.hardware import Hardware
+from teaal.ir.program import Program
+
+
+class Fusion:
+    """
+    Representation of the fusion schedule of the accelerator
+    """
+
+    def __init__(self, hardware: Hardware) -> None:
+        """
+        Construct a new fusion object
+        """
+        self.hardware = hardware
+
+        self.blocks: List[List[str]] = []
+        self.curr_block: List[str] = []
+        self.fused_ranks: List[str] = []
+
+        self.curr_config: Optional[str] = None
+        self.components_used: Set[str] = set()
+
+        self.component_dict: Dict[str, List[str]] = {}
+
+    def add_einsum(self, program: Program) -> None:
+        """
+        Add the information corresponding to this Einsum
+        """
+        einsum = program.get_equation().get_output().root_name()
+        loop_ranks = program.get_loop_order().get_ranks()
+
+        spacetime = program.get_spacetime()
+        if not spacetime:
+            raise ValueError("Undefined spacetime for Einsum " + einsum)
+
+        space_ranks = spacetime.get_space()
+
+        # Get the temporal ranks in all loop orders before the first spatial
+        # rank
+        fused_ranks: List[str]
+        if space_ranks:
+            fused_ranks = loop_ranks[:loop_ranks.index(space_ranks[0])]
+        else:
+            fused_ranks = loop_ranks
+
+        # Get the components used for this Einsum
+        components_used = set()
+        for component in self.hardware.get_components(
+                einsum, FunctionalComponent):
+            if component.get_bindings()[einsum]:
+                components_used.add(component.get_name())
+
+        # Get the config
+        config = self.hardware.get_config(einsum)
+
+        # Check if the fusion conditions are met
+        if config == self.curr_config and fused_ranks == self.fused_ranks and not self.components_used.intersection(
+                components_used):
+            self.curr_block.append(einsum)
+            self.components_used = self.components_used.union(components_used)
+
+        # Otherwise, start a new block
+        else:
+            self.blocks.append([einsum])
+            self.curr_block = self.blocks[-1]
+            self.fused_ranks = fused_ranks
+            self.curr_config = config
+
+        # Prepare to record the components contributing to the exectuion time
+        self.component_dict[einsum] = []
+
+    def add_component(self, einsum: str, component: str) -> None:
+        """
+        Add a component whose time is being tracked
+        """
+        self.component_dict[einsum].append(component)
+
+    def get_blocks(self) -> List[List[str]]:
+        """
+        Get the Einsums organized by their fusion blocks
+        """
+        return self.blocks
+
+    def get_components(self, einsum: str) -> List[str]:
+        """
+        Get the names of the components used for this Einsum
+        """
+        return self.component_dict[einsum]
diff --git a/teaal/ir/hardware.py b/teaal/ir/hardware.py
index cecd586..eb54047 100644
--- a/teaal/ir/hardware.py
+++ b/teaal/ir/hardware.py
@@ -24,33 +24,58 @@
 Representation of the hardware of an accelerator
 """
 
-from typing import Dict, Type
+from typing import Dict, Set, Type, TypeVar
 
 from teaal.ir.component import *
 from teaal.ir.level import Level
+from teaal.ir.program import Program
+
 from teaal.parse import *
 
+T = TypeVar("T")
+
 
 class Hardware:
     """
     Representation of the hardware of an accelerator
     """
 
-    def __init__(self, arch: Architecture, bindings: Bindings) -> None:
+    def __init__(
+            self,
+            arch: Architecture,
+            bindings: Bindings,
+            program: Program) -> None:
         """
         Construct the hardware
+
+        TODO: The program is only used to get the Einsum name; standardize
+        so all use program or all take it as an argument
         """
+        self.bindings = bindings
+        self.program = program
+
         self.components: Dict[str, Component] = {}
 
+        # Get the configuration for each Einsum
+        self.configs = {}
+        for einsum in self.program.get_all_einsums():
+            self.configs[einsum] = self.bindings.get_config(einsum)
+
         spec = arch.get_spec()
         if spec is None:
             raise ValueError("Empty architecture specification")
 
-        subtree = spec["architecture"]["subtree"]
-        if len(subtree) != 1:
-            raise ValueError("Architecture must have a single root level")
+        # Build the architecture tree for each configuration
+        self.tree = {}
+        for config in spec["architecture"]:
+            subtree = spec["architecture"][config]
+            if len(subtree) != 1:
+                raise ValueError(
+                    "Configuration " +
+                    config +
+                    " must have a single root level")
 
-        self.tree = self.__build_level(subtree[0], bindings)
+            self.tree[config] = self.__build_level(subtree[0])
 
     def get_component(self, name: str) -> Component:
         """
@@ -58,198 +83,158 @@ def get_component(self, name: str) -> Component:
         """
         return self.components[name]
 
-    def get_compute_path(self, einsum: str) -> List[Level]:
+    def get_components(self, einsum: str, class_: Type[T]) -> List[T]:
         """
-        Get a list of levels with dataflow corresponding to this einsum
+        Get a list of components relevant to this einsum
         """
-        return self.__compute_helper(einsum, self.tree)
+        components: List[T] = []
+        for name in self.bindings.get_bindings()[einsum]:
+            component = self.components[name]
+            if isinstance(component, class_):
+                components.append(component)
+        return components
 
-    def get_compute_components(self, einsum: str) -> List[ComputeComponent]:
+    def get_config(self, einsum: str) -> str:
         """
-        Get a list of compute components relevant to this einsum
+        Get the name of the hardware configuration for this Einsum
         """
-        path = self.get_compute_path(einsum)
+        return self.configs[einsum]
 
-        components = []
-        for level in path:
-            for component in level.get_local():
-                if isinstance(component, ComputeComponent) and \
-                        component.get_bindings(einsum):
-                    components.append(component)
+    def get_frequency(self, einsum: str) -> int:
+        """
+        The clock_frequency (in Hz) should be specified as an attribute at the
+        top level
+        """
+        top_level = self.tree[self.configs[einsum]]
+        freq = top_level.get_attr("clock_frequency")
 
-        return components
+        if freq is None:
+            raise ValueError(
+                "Unspecified clock frequency for config " +
+                self.configs[einsum])
 
-    def get_merger_components(self) -> List[MergerComponent]:
+        if isinstance(freq, str):
+            raise ValueError(
+                "Bad clock frequency for config " +
+                self.configs[einsum])
+
+        return freq
+
+    def get_prefix(self, einsum: str) -> str:
         """
-        Get all merger components
+        Get the prefix for collected metrics for the given Einsum
         """
-        mergers = []
-        for component in self.components.values():
-            if isinstance(component, MergerComponent):
-                mergers.append(component)
-
-        return mergers
+        return self.bindings.get_prefix(einsum)
 
     def get_traffic_path(
             self,
-            einsum: str,
-            tensor: str) -> List[MemoryComponent]:
+            tensor: str,
+            rank: str,
+            type_: str,
+            format_: str) -> List[Tuple[MemoryComponent, str]]:
         """
-        Get a list of paths this tensor will be loaded into
+        Get a list of components  this tensor will be loaded into and either
+        a lazy style or the source rank of the eager load
         """
-        paths = self.__traffic_helper(tensor, self.tree)
-
-        # Merge all paths together
-        final: List[MemoryComponent] = []
-        compute_path = self.get_compute_path(einsum)
-
-        for path in paths:
-            sub_path = Hardware.__sub_path(path, compute_path)
-
-            if len(final) < len(sub_path) and final == sub_path[:len(final)]:
-                final = sub_path
+        einsum = self.program.get_equation().get_output().root_name()
 
-            elif len(sub_path) < len(final) and sub_path == final[:len(sub_path)]:
-                pass
+        components: List[Tuple[MemoryComponent, str]] = []
 
-            elif sub_path == final:
-                pass
+        levels = [(self.tree[self.configs[einsum]], 0)]
+        depths_covered = set()
+        while levels:
+            level, depth = levels.pop()
 
-            else:
-                raise ValueError(
-                    "Multiple bindings for einsum " +
-                    einsum +
-                    " and tensor " +
-                    tensor)
+            for component in level.get_local():
+                if not isinstance(component, MemoryComponent):
+                    continue
+
+                binding = component.get_binding(
+                    einsum, tensor, rank, type_, format_)
+                if binding:
+                    if isinstance(
+                            component,
+                            BuffetComponent) and binding["style"] == "eager":
+                        components.append((component, binding["root"]))
+                    else:
+                        components.append((component, "lazy"))
+
+                    if depth in depths_covered:
+                        raise ValueError(
+                            "Multiple traffic paths for tensor " +
+                            tensor +
+                            " in Einsum " +
+                            einsum)
+                    depths_covered.add(depth)
+
+            levels.extend((tree, depth + 1) for tree in level.get_subtrees())
 
-        return final
+        return components
 
     def get_tree(self) -> Level:
         """
         Get the architecture tree
         """
-        return self.tree
-
-    @staticmethod
-    def __sub_path(
-            mem_path: List[MemoryComponent],
-            compute_path: List[Level]) -> List[MemoryComponent]:
-        """
-        Return the prefix of the mem_path captured by this compute_path
-        """
-        i = 0
-        for level in compute_path:
-            if mem_path[i] in level.get_local():
-                i += 1
-
-            if i == len(mem_path):
-                break
+        einsum = self.program.get_equation().get_output().root_name()
+        return self.tree[self.configs[einsum]]
 
-        return mem_path[:i]
-
-    def __build_component(self, local: dict, bindings: Bindings) -> Component:
+    def __build_component(self, local: dict, num_instances: int) -> Component:
         """
         Build a component
         """
         class_: Type[Component]
-        if local["class"].lower() == "buffet":
+        class_name = local["class"].lower()
+        if class_name == "buffet":
             class_ = BuffetComponent
 
-        elif local["class"].lower() == "cache":
+        elif class_name == "cache":
             class_ = CacheComponent
 
-        elif local["class"].lower() == "compute":
+        elif class_name == "compute":
             class_ = ComputeComponent
 
-        elif local["class"].lower() == "dram":
+        elif class_name == "dram":
             class_ = DRAMComponent
 
-        elif local["class"].lower() == "leaderfollower":
-            class_ = LeaderFollowerComponent
+        elif class_name == "intersector":
+            type_ = local["attributes"]["type"].lower()
+            if type_ == "leader-follower":
+                class_ = LeaderFollowerComponent
+
+            elif type_ == "skip-ahead":
+                class_ = SkipAheadComponent
+
+            elif type_ == "two-finger":
+                class_ = TwoFingerComponent
 
-        elif local["class"].lower() == "merger":
+            else:
+                raise ValueError("Unknown intersection type: " + type_)
+
+        elif class_name == "merger":
             class_ = MergerComponent
 
-        elif local["class"].lower() == "skipahead":
-            class_ = SkipAheadComponent
+        elif class_name == "sequencer":
+            class_ = SequencerComponent
 
         else:
             raise ValueError("Unknown class: " + local["class"])
 
         name = local["name"]
-        binding = bindings.get(name)
+        binding = self.bindings.get_component(name)
 
-        component = class_(name, local["attributes"], binding)
+        component = class_(name, num_instances, local["attributes"], binding)
         self.components[component.get_name()] = component
 
         return component
 
-    def __build_level(self, tree: dict, bindings: Bindings) -> Level:
+    def __build_level(self, tree: dict) -> Level:
         """
         Build the levels of the architecture tree
         """
         attrs = tree["attributes"]
-        local = [self.__build_component(comp, bindings)
+        local = [self.__build_component(comp, tree["num"])
                  for comp in tree["local"]]
-        subtrees = [self.__build_level(subtree, bindings)
+        subtrees = [self.__build_level(subtree)
                     for subtree in tree["subtree"]]
 
         return Level(tree["name"], tree["num"], attrs, local, subtrees)
-
-    def __compute_helper(self, einsum: str, level: Level) -> List[Level]:
-        """
-        Recursive implementation to find the dataflow to compute for a given
-        einsum
-        """
-        # Recurse down the tree
-        paths = []
-        for subtree in level.get_subtrees():
-            sub_path = self.__compute_helper(einsum, subtree)
-            if sub_path:
-                paths.append(sub_path)
-
-        if len(paths) > 1:
-            raise ValueError("Only one compute path allowed per einsum")
-
-        if paths:
-            return [level] + paths[0]
-
-        # Check if a local component performs compute for this einsum
-        root = False
-        for comp in level.get_local():
-            if isinstance(comp, ComputeComponent) and \
-                    comp.get_bindings(einsum):
-                return [level]
-
-        return []
-
-    def __traffic_helper(self, tensor: str,
-                         level: Level) -> List[List[MemoryComponent]]:
-        """
-        Recursive implementation to find the memory traffic pattern of a tensor
-        from a given subtree
-        """
-        # Recurse down the tree
-        paths = []
-        for subtree in level.get_subtrees():
-            paths.extend(self.__traffic_helper(tensor, subtree))
-
-        # Check if the memory components at this level store the tensor
-        mem_components = []
-        for comp in level.get_local():
-            if isinstance(comp, MemoryComponent) and comp.get_binding(tensor):
-                mem_components.append(comp)
-
-        # Return a list of paths
-        if not paths:
-            return [[mem] for mem in mem_components]
-
-        if not mem_components:
-            return paths
-
-        final = []
-        for mem in mem_components:
-            for path in paths:
-                final.append([mem] + path)
-
-        return final
diff --git a/teaal/ir/loop_order.py b/teaal/ir/loop_order.py
index 9ba668a..93af527 100644
--- a/teaal/ir/loop_order.py
+++ b/teaal/ir/loop_order.py
@@ -83,7 +83,9 @@ def apply(self, tensor: Tensor) -> None:
         # Get the names of the final rank ids for the tensor
         final_ids = []
         for rank in tensor.get_ranks():
-            final_ids.append(self.partitioning.get_final_rank_id(tensor, rank))
+            final_ids.append(
+                self.partitioning.get_final_rank_id(
+                    tensor.get_init_ranks(), rank))
 
         # Order the current rank ids based on their final posititon
         expanded: List[List[str]] = [[] for _ in range(len(self.ranks))]
diff --git a/teaal/ir/metrics.py b/teaal/ir/metrics.py
index 24c138d..19a03f9 100644
--- a/teaal/ir/metrics.py
+++ b/teaal/ir/metrics.py
@@ -24,10 +24,11 @@
 Representation of the metrics that need to be collected for this accelerator
 """
 
-from typing import Tuple
+from typing import Dict, List, Optional, Tuple, Union
 
 from teaal.ir.component import *
 from teaal.ir.hardware import Hardware
+from teaal.ir.iter_graph import IterationGraph
 from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
 from teaal.parse.format import Format
@@ -45,256 +46,512 @@ def __init__(
             hardware: Hardware,
             format_: Format) -> None:
         """
-        Construct a new metrics object
+        Construct a new Metrics object
         """
         self.program = program
         self.hardware = hardware
         self.format = format_
 
-        # Check that we can collect metrics for this accelerator
-        self.__check_configuration()
+        self.__build_format_options()
+        self.__build_eager_evicts()
+        self.__expand_eager()
 
-        # Get the final form of all tensors
-        for tensor in self.program.get_equation().get_tensors():
-            self.program.apply_all_partitioning(tensor)
-            self.program.get_loop_order().apply(tensor)
+        self.__build_coiter_ranks()
+        self.__build_fiber_traces()
+        self.__build_traffic_paths()
 
-        # Collect the memory traffic information
-        self.__build_dram_tensors()
-        self.__build_off_chip_traffic_info()
-        self.__build_stationary()
+    def get_coiter(self, rank: str) -> Optional[Component]:
+        """
+        Get the coiterator used for this rank
+        """
+        if rank not in self.coiterators:
+            return None
 
-        # Reset all tensors
-        for tensor in self.program.get_equation().get_tensors():
-            is_output = tensor.get_is_output()
-            tensor.reset()
-            tensor.set_is_output(is_output)
+        return self.coiterators[rank]
 
-        # Collect other information
-        self.__build_mergers()
+    def get_coiter_traces(self, coiter: str, rank: str) -> List[str]:
+        """
+        Get the trace names used for this coiterator
+        """
+        return self.coiter_traces[coiter][rank]
 
-    def get_compute_components(self) -> List[ComputeComponent]:
+    def get_collected_iter_info(self) -> Set[str]:
         """
-        Get all relevant compute components for this Einsum
+        Get the specification for which ranks iteration needs to be traced
         """
+        ranks = set()
         einsum = self.program.get_equation().get_output().root_name()
-        return self.hardware.get_compute_components(einsum)
+        for sequencer in self.hardware.get_components(
+                einsum, SequencerComponent):
+            ranks.update(sequencer.get_ranks(einsum))
 
-    def get_format(self, tensor: Tensor) -> dict:
+        return ranks
+
+    def get_collected_tensor_info(
+            self, tensor: str) -> Set[Tuple[str, str, bool]]:
         """
-        Get the format specification for the given tensor
+        Get a specification for which ranks need to be collected in the form
+        {(rank, type, consumable)}, where type is one of
+            - "fiber" - corresponding to iteration over that fiber
+            - "iter" - corresponding to the iteration of the loop nest
+            - rank - the rank that the eager iteration starts at
         """
-        return self.format.get_spec(tensor.root_name())
-
-    def get_merger_components(self) -> List[Tuple[MergerComponent, dict]]:
+        # Collect traces for data traffic
+        info = set()
+        einsum = self.program.get_equation().get_output().root_name()
+        if tensor in self.traffic_paths:
+            for rank, paths in self.traffic_paths[tensor][1].items():
+                for i, path in enumerate(paths):
+                    for component, style in path:
+                        if isinstance(component, DRAMComponent):
+                            continue
+
+                        if style == "lazy":
+                            info.add((rank, "fiber", False))
+                            fiber_trace = self.get_fiber_trace(
+                                tensor, rank, True)
+                            if i == 1 and fiber_trace != "iter" and fiber_trace[:11] != "get_payload":
+                                info.add((rank, "iter", False))
+
+                        else:
+                            info.add((rank, style, False))
+
+        # Collect traces for intersection
+        if not tensor == einsum:
+            tensor_ir = self.program.get_equation().get_tensor(tensor)
+            part_ir = self.program.get_partitioning()
+            final_ranks = part_ir.partition_ranks(
+                tensor_ir.get_init_ranks(), part_ir.get_all_parts(), True, True)
+
+            for intersector in self.hardware.get_components(
+                    einsum, IntersectorComponent):
+                for binding in intersector.get_bindings()[einsum]:
+                    if isinstance(intersector, LeaderFollowerComponent) and \
+                            binding["leader"] != tensor:
+                        continue
+
+                    if binding["rank"] not in final_ranks:
+                        continue
+
+                    info.add((binding["rank"], "fiber", True))
+
+        return info
+
+    def get_eager_evict_on(self, tensor: str, rank: str) -> List[str]:
         """
-        Get all relevant merger components and the relevant tensor being merged
+        Get the ranks eager load should be evicted on in loop order
         """
-        return self.mergers
+        ranks = []
+        for loop_rank, evicts in self.eager_evicts.items():
+            if (tensor, rank) in evicts:
+                ranks.append(loop_rank)
+
+        ranks.sort(key=self.program.get_loop_order().get_ranks().index)
+        return ranks
 
-    def get_on_chip_buffer(self, tensor: Tensor) -> MemoryComponent:
+    def get_eager_evicts(self, rank: str) -> List[Tuple[str, str]]:
         """
-        Gets the on-chip buffer for a particular tensor
+        Get the subtrees that were eager loaded and should be evicted on this
+        rank
         """
-        if not self.in_dram(tensor):
-            raise ValueError(
-                "Tensor " +
-                tensor.root_name() +
-                " not stored in DRAM")
+        if rank not in self.eager_evicts:
+            return []
 
-        return self.on_chip_buffer[tensor.root_name()]
+        return self.eager_evicts[rank]
 
-    def get_on_chip_rank(self, tensor: Tensor) -> str:
+    def get_eager_write(self) -> bool:
         """
-        Returns the rank of the given tensor that is used for memory traffic
+        Returns True if the kernel perfoms an eager write
         """
-        if not self.in_dram(tensor):
-            raise ValueError(
-                "Tensor " +
-                tensor.root_name() +
-                " not stored in DRAM")
+        return self.eager_write
 
-        return self.on_chip_rank[tensor.root_name()][1]
+    def get_fiber_trace(
+            self,
+            tensor: str,
+            rank: str,
+            is_read_trace: bool) -> str:
+        """
+        Get the name of the fiber trace for this fiber
+        """
+        # If the rank is not in the set of fiber_traces (not in the loop
+        # order), it must be being iterated with a get payload
+        if rank not in self.fiber_traces:
+            return "get_payload_" + tensor
+        return self.fiber_traces[rank][tensor][is_read_trace]
 
-    def in_dram(self, tensor: Tensor) -> bool:
+    def get_format(self) -> Format:
         """
-        Returns True if the tensor is stored in DRAM
+        Get the parsed format yaml
         """
-        return tensor.root_name() in self.dram_tensors
+        return self.format
 
-    def on_chip_stationary(self, tensor: Tensor) -> bool:
+    def get_hardware(self) -> Hardware:
         """
-        Returns True if this tensor is stationary (i.e., its DRAM traffic
-        can be computed by calculating its footprint)
+        Get the hardware IR
         """
-        return tensor.root_name() in self.stationary
+        return self.hardware
 
-    def __build_dram_tensors(self) -> None:
+    def get_loop_formats(self) -> Dict[str, str]:
         """
-        Build the set of tensors stored in DRAM
+        Get the tensors that have assigned formats during the loop nest as
+        well as the corresponding format
         """
-        self.dram_tensors = set()
-        einsum = self.program.get_equation().get_output().root_name()
+        loop_formats = {}
+        for tensor, (format_, _) in self.traffic_paths.items():
+            loop_formats[tensor] = format_
+        return loop_formats
 
-        # For each tensor
-        for tensor in self.program.get_equation().get_tensors():
-            path = self.hardware.get_traffic_path(einsum, tensor.root_name())
+    def get_merger_init_ranks(self, tensor: str,
+                              final_ranks: List[str]) -> Optional[List[str]]:
+        """
+        Get the initial ranks for merges that must be tracked by the hardware
+        """
+        einsum = self.program.get_equation().get_output().root_name()
+        mergers = self.hardware.get_components(einsum, MergerComponent)
+        init_ranks: Optional[List[str]] = None
+        for merger in mergers:
+            opt_init_ranks = merger.get_init_ranks(einsum, tensor, final_ranks)
 
-            if not path or not isinstance(path[0], DRAMComponent):
+            if opt_init_ranks is None:
                 continue
 
-            if len(path) < 2:
+            if init_ranks is not None:
                 raise ValueError(
-                    "Tensor " +
-                    tensor.root_name() +
-                    " never buffered on chip")
+                    "Multiple bindings for merge of tensor " +
+                    tensor +
+                    " to final rank order " +
+                    str(final_ranks))
 
-            self.dram_tensors.add(tensor.root_name())
+            init_ranks = opt_init_ranks
 
-    def __build_mergers(self) -> None:
+        return init_ranks
+
+    def get_source_memory(
+            self,
+            component: str,
+            tensor: str,
+            rank: str,
+            type_: str) -> Optional[MemoryComponent]:
         """
-        Build a list of mergers that will be relevant
+        Get the source for this data
         """
-        all_mergers = self.hardware.get_merger_components()
-        easy_access = {}
+        t = ["coord", "payload", "elem"].index(type_)
 
-        for merger in all_mergers:
-            for binding in merger.get_bindings():
-                # Create a map from
-                # (tensor name, init ranks, final ranks) to the component
-                name = binding["tensor"]
-                init = tuple(binding["init_ranks"])
-                final = tuple(binding["final_ranks"])
+        if tensor not in self.traffic_paths:
+            return None
 
-                easy_access[(name, init, final)] = (merger, binding)
-
-        self.mergers = []
-        part = self.program.get_partitioning()
-
-        def check_tensor(tensor):
-            """
-            Check if the tensor matches a merge operation, if so, add it
-            """
-            name = tensor.root_name()
-            init = tuple(tensor.get_ranks())
-            self.program.get_loop_order().apply(tensor)
-            final = tuple(tensor.get_ranks())
-
-            if (name, init, final) in easy_access.keys():
-                self.mergers.append(easy_access[(name, init, final)])
-
-        for tensor in self.program.get_equation().get_tensors():
-            # If it is the output, we swizzle on the way out
-            is_output = tensor.get_is_output()
-            if is_output:
-                name = tensor.root_name()
+        path = self.traffic_paths[tensor][1][rank][t]
+        component_ir = self.hardware.get_component(component)
+        if not isinstance(component_ir, MemoryComponent):
+            raise ValueError(
+                "Destination component " +
+                component +
+                " not a memory")
 
-                # With the output, we first swizzle back, and then flatten
-                self.program.apply_all_partitioning(tensor)
-                self.program.get_loop_order().apply(tensor)
+        inds = [i for i, (comp, _) in enumerate(path) if comp == component_ir]
+        if not inds:
+            return None
 
-                init = tuple(tensor.get_ranks())
-                tensor.reset()
-                self.program.apply_all_partitioning(tensor)
-                final = tuple(tensor.get_ranks())
+        if inds[0] == 0:
+            return None
 
-                if (name, init, final) in easy_access.keys():
-                    self.mergers.append(easy_access[(name, init, final)])
+        return path[inds[0] - 1][0]
 
-            else:
-                name = tensor.root_name()
+    def __build_coiter_ranks(self) -> None:
+        """
+        Map the ranks to the coiterators that coiterate over them
+        """
+        self.coiterators: Dict[str, Component] = {}
+        einsum = self.program.get_equation().get_output().root_name()
+        for intersector in self.hardware.get_components(
+                einsum, IntersectorComponent):
+            for binding in intersector.get_bindings()[einsum]:
+                rank = binding["rank"]
+                # Not clear how to map co-iterators onto multiple components
+                if rank in self.coiterators:
+                    raise NotImplementedError
 
-                # First apply all static partitioning
-                for ranks in part.get_static_parts():
-                    # TODO: allow flattening
-                    if len(ranks) > 1:
-                        raise ValueError("Cannot deal with this yet")
-                    rank = ranks[0]
-                    if rank in tensor.get_ranks():
-                        # TODO Support flattening
-                        self.program.apply_partitioning(tensor, (rank,))
+                self.coiterators[rank] = intersector
 
-                check_tensor(tensor)
+    def __build_eager_evicts(self) -> None:
+        """
+        Build a dictionary describing the ranks eager accesses will be evicted on
 
-                # Now check any dynamic swizzling after partitioning
-                # opt_rank = tensor.peek()
-                # while opt_rank is not None:
-                #     if opt_rank.upper() in part.get_dyn_parts().keys():
-                #         tensor.from_fiber()
-                #         self.program.apply_partitioning(
-                #             tensor, (opt_rank.upper(),))
+        self.eager_evicts: Dict[evict_rank, List[Tuple[tensor, root_rank]]]
+        """
+        einsum = self.program.get_equation().get_output().root_name()
 
-                #         check_tensor(tensor)
+        self.eager_evicts: Dict[str, List[Tuple[str, str]]] = {}
+        for buffet in self.hardware.get_components(einsum, BuffetComponent):
+            for binding in buffet.get_bindings()[einsum]:
+                if binding["style"] != "eager":
+                    continue
 
-                #     tensor.pop()
-                #     opt_rank = tensor.peek()
+                evict_on = binding["evict-on"]
+                if evict_on not in self.eager_evicts:
+                    self.eager_evicts[evict_on] = []
 
-            tensor.reset()
-            tensor.set_is_output(is_output)
+                self.eager_evicts[evict_on].append(
+                    (binding["tensor"], binding["root"]))
 
-    def __build_off_chip_traffic_info(self) -> None:
+    def __build_fiber_traces(self) -> None:
         """
-        Build a mapping from tensors to the rank buffered on chip
+        Build the fiber traces
+
+        self.fiber_traces: Dict[rank, Dict[tensor, Dict[is_read_trace, trace]]]
+        self.coiter_traces: Dict[component, Dict[rank, List[trace]]]
         """
-        self.on_chip_rank = {}
-        self.on_chip_buffer = {}
+        part_ir = self.program.get_partitioning()
         einsum = self.program.get_equation().get_output().root_name()
 
-        # For each tensor
+        iter_graph = IterationGraph(self.program)
         for tensor in self.program.get_equation().get_tensors():
-            # We don't care about tensors not in DRAM
-            if not self.in_dram(tensor):
-                continue
-
-            name = tensor.root_name()
-            path = self.hardware.get_traffic_path(einsum, name)
-
-            # Get the bindings
-            mem_binding = path[0].get_binding(name)
-            on_chip_binding = path[1].get_binding(name)
+            self.program.apply_all_partitioning(tensor)
+            self.program.get_loop_order().apply(tensor)
 
-            # Indicates an error with Hardware.get_traffic_path()
-            if not mem_binding or not on_chip_binding:
-                raise ValueError("Something is wrong...")  # pragma: no cover
+        # Get the corresponding traces
+        self.fiber_traces: Dict[str, Dict[str, Dict[bool, str]]] = {}
+        self.coiter_traces: Dict[str, Dict[str, List[str]]] = {}
+
+        # TODO: Think about when we want the pre-projected and when we want the
+        # post-projected traces
+
+        rank, tensors = iter_graph.peek_concord()
+        while rank:
+            # Create empty dictionaries for new ranks
+            for tensor in tensors:
+                trank = tensor.peek_clean()
+                if trank not in self.fiber_traces:
+                    self.fiber_traces[trank] = {}
+
+            output, inputs = self.program.get_equation().get_iter(tensors)
+
+            parent = "iter"
+            next_label = 0
+            if output and not inputs:
+                # If there is only an output, there is no separate read and
+                # write trace
+                self.fiber_traces[rank][output.root_name()] = {
+                    True: parent, False: parent}
+
+                # Advance the iteration graph
+                iter_graph.pop_concord()
+                iter_graph.pop_discord()
+                rank, tensors = iter_graph.peek_concord()
+                continue
 
-            # Build a dictionary of tensors to the
-            # (rank in DRAM, rank in last on-chip buffer)
-            self.on_chip_rank[name] = (mem_binding, on_chip_binding)
+            if output:
+                self.fiber_traces[rank][output.root_name()] = {
+                    True: "populate_read_0", False: "populate_write_0"}
+
+                parent = "populate_1"
+
+                next_label = 2
+
+            union_label: Optional[int] = None
+            if len(inputs) > 1:
+                union_label = next_label
+                next_label += 2
+
+            for i, term in enumerate(inputs):
+                if len(term) == 1:
+                    trank = term[0].peek_clean()
+
+                    if i + 1 < len(inputs):
+                        self.fiber_traces[trank][term[0].root_name()] = {
+                            True: "union_" + str(union_label)}
+                    # i + 1 == len(inputs)
+                    else:
+                        self.fiber_traces[trank][term[0].root_name()] = {
+                            True: parent}
+
+                # Otherwise we have multiple tensors intersected together
+                else:
+                    # Not clear which intersection should performed
+                    # with this component
+                    if rank in self.coiterators and len(inputs) > 1:
+                        raise NotImplementedError
+
+                    # Reorganize the leader to be first
+                    tensors = term.copy()
+                    if rank in self.coiterators and isinstance(
+                            self.coiterators[rank], LeaderFollowerComponent):
+                        for binding in self.coiterators[rank].get_bindings()[
+                                einsum]:
+                            if binding["rank"] == rank:
+                                leader = binding["leader"]
+                                break
+
+                        leader_tensor = self.program.get_equation().get_tensor(leader)
+                        tensors.remove(leader_tensor)
+                        tensors.insert(0, leader_tensor)
+
+                    for j, tensor in enumerate(tensors[:-1]):
+                        trank = tensor.peek_clean()
+                        self.fiber_traces[trank][tensor.root_name()] = {
+                            True: "intersect_" + str(next_label)}
+
+                        if rank in self.coiterators and isinstance(
+                                self.coiterators[rank], LeaderFollowerComponent) and j + 2 < len(tensors):
+                            next_label += 1
+                        else:
+                            next_label += 2
+
+                    trank = tensors[-1].peek_clean()
+
+                    self.fiber_traces[trank][tensors[-1].root_name()
+                                             ] = {True: "intersect_" + str(next_label - 1)}
+
+                    if rank in self.coiterators:
+                        coiter = self.coiterators[rank]
+                        if coiter.get_name() not in self.coiter_traces:
+                            self.coiter_traces[coiter.get_name()] = {}
+                        self.coiter_traces[coiter.get_name()][rank] = []
+
+                        traces = self.coiter_traces[coiter.get_name()][rank]
+                        if isinstance(coiter, LeaderFollowerComponent):
+                            # TODO: Can the leader-follower component store
+                            # this info itself
+                            leader = ""
+                            for binding in coiter.get_bindings()[einsum]:
+                                if binding["rank"] == rank:
+                                    leader = binding["leader"]
+                                    break
+                            traces.append(
+                                self.fiber_traces[rank][leader][True])
+
+                        else:
+                            # Do not support tracing intersection of more than
+                            # two components
+                            if len(tensors) > 2:
+                                raise NotImplementedError
+
+                            for tensor in tensors:
+                                trank = tensor.peek_clean()
+                                traces.append(
+                                    self.fiber_traces[rank][tensor.root_name()][True])
+
+                if union_label is not None:
+                    parent = "union_" + str(union_label + 1)
+                    union_label = next_label
+                    next_label += 2
+
+            # Advance the iteration graph
+            iter_graph.pop_concord()
+            iter_graph.pop_discord()
+            rank, tensors = iter_graph.peek_concord()
 
-            # Save the component where the tensor is buffered on-chip
-            self.on_chip_buffer[name] = path[1]
+        # Reset all tensors
+        for tensor in self.program.get_equation().get_tensors():
+            is_output = tensor.get_is_output()
+            tensor.reset()
+            tensor.set_is_output(is_output)
 
-    def __build_stationary(self) -> None:
+    def __build_format_options(self) -> None:
         """
-        Build a set of DRAM -> on chip stationary tensors
+        Build a set of possible formats for each tensor
+
+        self.format_options: Dict[tensor, List[format]]
         """
-        self.stationary = set()
-        einsum = self.program.get_equation().get_output().root_name()
+        self.format_options: Dict[str, List[str]] = {}
+        for tensor_ir in self.program.get_equation().get_tensors():
+            tensor = tensor_ir.root_name()
+            self.format_options[tensor] = []
 
-        for name, (mem_rank, on_chip_rank) in self.on_chip_rank.items():
-            tensor = self.program.get_equation().get_tensor(name)
+            spec = self.format.get_spec(tensor)
 
-            if mem_rank != "root":
-                raise NotImplementedError
+            # Identify the formats that can correspond to the iteration of this
+            # loop nest
+            loop_order = self.program.get_loop_order()
+            for format_ in spec:
+                format_ranks = spec[format_]["rank-order"]
 
-            prefix = tensor.get_prefix(on_chip_rank)
+                temp_tensor = Tensor(tensor, format_ranks)
+                loop_order.apply(temp_tensor)
 
-            # The tensor is stationary if its prefix is also a prefix to the
-            # loop order
-            if prefix == self.program.get_loop_order().get_ranks()[
-                    :len(prefix)]:
-                self.stationary.add(name)
+                if temp_tensor.get_ranks() == format_ranks:
+                    self.format_options[tensor].append(format_)
 
-    def __check_configuration(self) -> None:
+    def __build_traffic_paths(self) -> None:
         """
-        There are many mappings that we cannot model right now. Make sure this
-        is a legal configuration
+        Build a dictionary of used loop formats:
+        Dict[tensor, Tuple[format, Dict[rank, Tuple[coord_path, payload_path, elem_path]]]]
         """
-        # Check that there is no dynamic partitioning
-        if self.program.get_partitioning().get_dyn_parts() != set():
-            raise NotImplementedError
+        self.traffic_paths: Dict[str,
+                                 Tuple[str,
+                                       Dict[str,
+                                            Tuple[List[Tuple[MemoryComponent, str]],
+                                                  List[Tuple[MemoryComponent, str]],
+                                                  List[Tuple[MemoryComponent, str]]]]]] = {}
+        for tensor_ir in self.program.get_equation().get_tensors():
+            tensor = tensor_ir.root_name()
+            spec = self.format.get_spec(tensor)
+
+            # Build the set of specs to collect
+            einsum = self.program.get_equation().get_output().root_name()
+
+            for format_ in self.format_options[tensor]:
+                for rank in spec[format_]:
+                    if rank == "rank-order":
+                        continue
+
+                    coord_path = self.hardware.get_traffic_path(
+                        tensor, rank, "coord", format_)
+                    payload_path = self.hardware.get_traffic_path(
+                        tensor, rank, "payload", format_)
+                    elem_path = self.hardware.get_traffic_path(
+                        tensor, rank, "elem", format_)
+
+                    if tensor in self.traffic_paths and self.traffic_paths[
+                            tensor][0] != format_:
+                        raise ValueError("Multiple potential formats " +
+                                         str({self.traffic_paths[tensor][0], format_}) +
+                                         " for tensor " +
+                                         tensor +
+                                         " in Einsum " +
+                                         einsum)
+
+                    if tensor not in self.traffic_paths:
+                        self.traffic_paths[tensor] = (format_, {})
+
+                    self.traffic_paths[tensor][1][rank] = (
+                        coord_path, payload_path, elem_path)
+
+    def __expand_eager(self):
+        """
+        Expand all eager bindings
+        """
+        einsum = self.program.get_equation().get_output().root_name()
 
-        # Check that there are at most three tensors (no danger of multiple
-        # intersections per rank)
-        if len(self.program.get_equation().get_tensors()) > 3:
-            raise NotImplementedError
+        self.eager_write = False
+        for tensor_ir in self.program.get_equation().get_tensors():
+            tensor = tensor_ir.root_name()
+            spec = self.format.get_spec(tensor)
+
+            for format_ in self.format_options[tensor]:
+                types = []
+                for rank in spec[format_]["rank-order"]:
+                    types.append([])
+                    if "layout" in spec[format_][rank] and \
+                            spec[format_][rank]["layout"] == "interleaved":
+                        types[-1].append("elem")
+                        continue
+
+                    if "cbits" in spec[format_][rank] and \
+                            spec[format_][rank]["cbits"] > 0:
+                        types[-1].append("coord")
+
+                    if "pbits" in spec[format_][rank] and \
+                            spec[format_][rank]["pbits"] > 0:
+                        types[-1].append("payload")
+
+                for component in self.hardware.get_components(
+                        einsum, BuffetComponent):
+
+                    if tensor_ir.get_is_output():
+                        for binding in component.get_bindings()[einsum]:
+                            if binding["style"] == "eager":
+                                self.eager_write = True
+
+                    component.expand_eager(
+                        einsum, tensor, format_, spec[format_]["rank-order"], types)
diff --git a/teaal/ir/partitioning.py b/teaal/ir/partitioning.py
index c091d0b..1033b2c 100644
--- a/teaal/ir/partitioning.py
+++ b/teaal/ir/partitioning.py
@@ -94,6 +94,8 @@ def get_all_parts(self) -> Set[Tuple[str, ...]]:
     def get_available(self, rank: str) -> Set[str]:
         """
         Get the tensor ranks that may be available with this rank
+
+        TODO: Cache this information
         """
         avail: Set[str] = set()
         avail.add(rank)
@@ -146,7 +148,7 @@ def get_dyn_parts(self) -> Set[Tuple[str, ...]]:
         """
         return self.dyn_parts
 
-    def get_final_rank_id(self, tensor: Tensor, rank: str) -> str:
+    def get_final_rank_id(self, init_ranks: Iterable[str], rank: str) -> str:
         """
         Get the name of this rank in the final loop order
         """
@@ -165,7 +167,7 @@ def get_final_rank_id(self, tensor: Tensor, rank: str) -> str:
                 # If all flattened ranks do not appear in the tensor, the final
                 # rank ID is the bottom flattened rank
                 for rank in node.get_ranks():
-                    if self.get_root_name(rank) not in tensor.get_init_ranks():
+                    if self.get_root_name(rank) not in init_ranks:
                         comp = min
             else:
                 node = comp(
diff --git a/teaal/ir/program.py b/teaal/ir/program.py
index 1dca8b4..f04216e 100644
--- a/teaal/ir/program.py
+++ b/teaal/ir/program.py
@@ -71,6 +71,12 @@ def __init__(self, einsum: Einsum, mapping: Mapping) -> None:
 
             self.tensors[tensor.root_name()] = tensor
 
+        # Get all einsums
+        self.einsums = []
+        for expr in self.einsum.get_expressions():
+            self.einsums.append(
+                str(next(expr.find_data("output")).children[0]))
+
         self.einsum_ind: Optional[int] = None
         self.equation: Optional[Equation] = None
         self.es_tensors: List[Tensor] = []
@@ -175,6 +181,12 @@ def apply_partition_swizzling(self, tensor: Tensor) -> None:
             tensor.get_ranks())
         tensor.update_ranks(new_ranks)
 
+    def get_all_einsums(self) -> List[str]:
+        """
+        Get a list of all of the Einsums (as specified by their output tensor)
+        """
+        return self.einsums
+
     def get_equation(self) -> Equation:
         """
         Get the parse tree representation of the einsum
diff --git a/teaal/ir/spacetime.py b/teaal/ir/spacetime.py
index acacade..374807e 100644
--- a/teaal/ir/spacetime.py
+++ b/teaal/ir/spacetime.py
@@ -124,7 +124,8 @@ def get_style(self, rank: str) -> str:
         """
         Get the style of display for the given rank
         """
-        return self.styles[rank]
+        final = self.partitioning.get_final_rank_id([rank], rank)
+        return self.styles[final]
 
     def get_time(self) -> List[str]:
         """
diff --git a/teaal/ir/tensor.py b/teaal/ir/tensor.py
index 7e30536..f59c7ff 100644
--- a/teaal/ir/tensor.py
+++ b/teaal/ir/tensor.py
@@ -120,6 +120,12 @@ def peek(self) -> Optional[str]:
             return self.__get_rank()
         return None
 
+    def peek_clean(self) -> str:
+        """
+        Peek at the top rank; should only be called if there is a rank to look at
+        """
+        return self.ranks[self.iter_ptr]
+
     def peek_rest(self) -> List[str]:
         """
         Return the list of ranks that have not yet been iterated over for this
diff --git a/teaal/parse/arch.py b/teaal/parse/arch.py
index 2dad9c0..aedbd75 100644
--- a/teaal/parse/arch.py
+++ b/teaal/parse/arch.py
@@ -52,57 +52,60 @@ def __init__(self, yaml: Optional[dict]) -> None:
         if not isinstance(self.yaml["architecture"], dict):
             raise ValueError("Bad architecture spec: " + str(self.yaml))
 
-        subtrees = self.yaml["architecture"]["subtree"].copy()
+        subtrees = {}
+        for config in self.yaml["architecture"]:
+            subtrees[config] = self.yaml["architecture"][config].copy()
 
-        while subtrees:
-            tree = subtrees.pop()
+        for config in subtrees:
+            while subtrees[config]:
+                tree = subtrees[config].pop()
 
-            if "name" not in tree.keys():
-                raise ValueError("Unnamed subtree: " + repr(tree))
+                if "name" not in tree.keys():
+                    raise ValueError("Unnamed subtree: " + repr(tree))
 
-            name_tree = LevelParser.parse(tree["name"])
+                name_tree = LevelParser.parse(tree["name"])
 
-            if name_tree.data == "single":
-                tree["name"] = str(name_tree.children[0])
-                tree["num"] = 1
+                if name_tree.data == "single":
+                    tree["name"] = str(name_tree.children[0])
+                    tree["num"] = 1
 
-            elif name_tree.data == "multiple":
-                tree["name"] = str(name_tree.children[0])
+                elif name_tree.data == "multiple":
+                    tree["name"] = str(name_tree.children[0])
 
-                num = name_tree.children[1]
-                if isinstance(num, Tree):
-                    # This error should be caught by the LevelParser
-                    raise ValueError(
-                        "Unknown num: " + repr(num))  # pragma: no cover
+                    num = name_tree.children[1]
+                    if isinstance(num, Tree):
+                        # This error should be caught by the LevelParser
+                        raise ValueError(
+                            "Unknown num: " + repr(num))  # pragma: no cover
 
-                tree["num"] = int(num) + 1
+                    tree["num"] = int(num) + 1
 
-            else:
-                # This error should be caught by the LevelParser
-                raise ValueError(
-                    "Unknown level name: " +
-                    repr(name_tree))  # pragma: no cover
+                else:
+                    # This error should be caught by the LevelParser
+                    raise ValueError(
+                        "Unknown level name: " +
+                        repr(name_tree))  # pragma: no cover
 
-            if "attributes" not in tree.keys():
-                tree["attributes"] = {}
+                if "attributes" not in tree.keys():
+                    tree["attributes"] = {}
 
-            if "local" not in tree.keys():
-                tree["local"] = []
+                if "local" not in tree.keys():
+                    tree["local"] = []
 
-            for local in tree["local"]:
-                if "name" not in local.keys():
-                    raise ValueError("Unnamed local: " + repr(local))
+                for local in tree["local"]:
+                    if "name" not in local.keys():
+                        raise ValueError("Unnamed local: " + repr(local))
 
-                if "class" not in local.keys():
-                    raise ValueError("Unclassed local: " + repr(local))
+                    if "class" not in local.keys():
+                        raise ValueError("Unclassed local: " + repr(local))
 
-                if "attributes" not in local.keys():
-                    local["attributes"] = {}
+                    if "attributes" not in local.keys():
+                        local["attributes"] = {}
 
-            if "subtree" not in tree.keys():
-                tree["subtree"] = []
+                if "subtree" not in tree.keys():
+                    tree["subtree"] = []
 
-            subtrees.extend(tree["subtree"])
+                subtrees[config].extend(tree["subtree"])
 
     @classmethod
     def from_file(cls, filename: str) -> "Architecture":
diff --git a/teaal/parse/bindings.py b/teaal/parse/bindings.py
index 3fb33b4..ece3406 100644
--- a/teaal/parse/bindings.py
+++ b/teaal/parse/bindings.py
@@ -24,7 +24,7 @@
 Parse the input YAML for the bindings
 """
 
-from typing import List, Optional
+from typing import Dict, List, Optional
 
 from teaal.parse.yaml import YamlParser
 
@@ -39,12 +39,30 @@ def __init__(self, yaml: Optional[dict]) -> None:
         Read the YAML input
         """
 
-        self.components = {}
+        self.components: Dict[str, Dict[str, List[dict]]] = {}
+        self.configs = {}
+        self.prefixes = {}
         if yaml is None or "bindings" not in yaml.keys():
             return
 
-        for binding in yaml["bindings"]:
-            self.components[binding["name"]] = binding["bindings"]
+        for einsum in yaml["bindings"]:
+            self.components[einsum] = {}
+
+            configured = False
+            for binding in yaml["bindings"][einsum]:
+                if "config" in binding:
+                    self.configs[einsum] = binding["config"]
+                    self.prefixes[einsum] = binding["prefix"]
+
+                    configured = True
+
+                else:
+                    self.components[einsum][binding["component"]
+                                            ] = binding["bindings"]
+
+            if not configured:
+                raise ValueError(
+                    "Accelerator config and prefix missing for Einsum " + einsum)
 
     @classmethod
     def from_file(cls, filename: str) -> "Bindings":
@@ -60,11 +78,32 @@ def from_str(cls, string: str) -> "Bindings":
         """
         return cls(YamlParser.parse_str(string))
 
-    def get(self, name) -> List[dict]:
+    def get_component(self, name: str) -> Dict[str, List[dict]]:
         """
         Get the binding information for a component
         """
-        if name not in self.components.keys():
-            return []
+        info = {}
+
+        for einsum in self.components:
+            if name in self.components[einsum].keys():
+                info[einsum] = self.components[einsum][name]
+
+        return info
+
+    def get_bindings(self) -> Dict[str, Dict[str, List[dict]]]:
+        """
+        Get the binding information for all components
+        """
+        return self.components
 
-        return self.components[name]
+    def get_config(self, einsum: str) -> str:
+        """
+        Get the hardware configuration for a given Einsum
+        """
+        return self.configs[einsum]
+
+    def get_prefix(self, einsum: str) -> str:
+        """
+        Get the metrics prefix for the given Einsum
+        """
+        return self.prefixes[einsum]
diff --git a/teaal/parse/format.py b/teaal/parse/format.py
index 7f3ad90..8cb2528 100644
--- a/teaal/parse/format.py
+++ b/teaal/parse/format.py
@@ -45,6 +45,15 @@ def __init__(self, yaml: Optional[dict]) -> None:
 
         self.yaml = yaml["format"]
 
+        for tensor, formats in self.yaml.items():
+            for format_, spec in formats.items():
+                if "rank-order" not in spec.keys():
+                    raise ValueError(
+                        "Rank order not specified for tensor " +
+                        tensor +
+                        " in format " +
+                        format_)
+
     @classmethod
     def from_file(cls, filename: str) -> "Format":
         """
@@ -64,6 +73,6 @@ def get_spec(self, tensor: str) -> dict:
         Get the specification for a particular tensor
         """
         if tensor not in self.yaml.keys():
-            raise ValueError("Format unspecified for tensor " + tensor)
+            return {}
 
         return self.yaml[tensor]
diff --git a/teaal/trans/collector.py b/teaal/trans/collector.py
index 00f2ee5..6202c98 100644
--- a/teaal/trans/collector.py
+++ b/teaal/trans/collector.py
@@ -26,6 +26,7 @@
 
 from teaal.hifiber import *
 from teaal.ir.component import *
+from teaal.ir.fusion import Fusion
 from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
@@ -37,12 +38,60 @@ class Collector:
     Translate the metrics collection
     """
 
-    def __init__(self, program: Program, metrics: Metrics) -> None:
+    def __init__(
+            self,
+            program: Program,
+            metrics: Metrics,
+            fusion: Fusion) -> None:
         """
         Construct a collector object
         """
         self.program = program
         self.metrics = metrics
+        self.fusion = fusion
+
+        # tree_traces: Optional[Dict[rank, Dict[is_read, Set[tensor]]]]
+        self.tree_traces: Optional[Dict[str, Dict[bool, Set[str]]]] = None
+
+    def create_component(self, component: Component, rank: str) -> Statement:
+        """
+        Create a component to track metrics
+        """
+        name = component.get_name()
+        if isinstance(component, LeaderFollowerComponent):
+            constructor = "LeaderFollowerIntersector"
+        elif isinstance(component, SkipAheadComponent):
+            constructor = "SkipAheadIntersector"
+        elif isinstance(component, TwoFingerComponent):
+            constructor = "TwoFingerIntersector"
+        else:
+            raise ValueError(
+                "Unable to create consumable metrics component for " +
+                name + " of type " + type(component).__name__)
+
+        return SAssign(AVar(name + "_" + rank), EFunc(constructor, []))
+
+    def consume_traces(self, component: str, rank: str) -> Statement:
+        """
+        Consume the traces to track this component
+        """
+        component_ir = self.metrics.get_hardware().get_component(component)
+
+        if isinstance(component_ir, IntersectorComponent):
+            tracker_name = EVar(component + "_" + rank)
+            traces = self.metrics.get_coiter_traces(component, rank)
+            consume_args = [[AJust(EString(rank)),
+                             AJust(EString(trace))] for trace in traces]
+            args = [AJust(EMethod(EVar("Metrics"), "consumeTrace", arg))
+                    for arg in consume_args]
+            return SExpr(EMethod(tracker_name, "addTraces", args))
+
+        else:
+            raise ValueError(
+                "Unable to consume traces for component " +
+                component +
+                " of type " +
+                type(component_ir).__name__)
 
     def dump(self) -> Statement:
         """
@@ -54,33 +103,35 @@ def dump(self) -> Statement:
         if self.program.get_einsum_ind() == 0:
             block.add(SAssign(AVar("metrics"), EDict({})))
 
-        einsum = EString(self.program.get_equation().get_output().root_name())
-        block.add(SAssign(AAccess(EVar("metrics"), einsum), EDict({})))
+        einsum = self.program.get_equation().get_output().root_name()
+        block.add(
+            SAssign(
+                AAccess(
+                    EVar("metrics"), EString(einsum)), EDict(
+                    {})))
 
-        # Add the memory traffic information
-        for tensor in self.program.get_equation().get_tensors():
-            # First revert the output to its loop nest form
-            if tensor.get_is_output():
-                tensor.reset()
-                tensor.set_is_output(True)
-                self.program.apply_all_partitioning(tensor)
-                self.program.get_loop_order().apply(tensor)
+        # Create the formats
+        block.add(self.__build_formats())
+
+        # Track the traffic
+        block.add(self.__build_traffic())
+
+        # Track the merges
+        block.add(self.__build_merges())
 
-            # Add the memory traffic information
-            block.add(self.__mem_metrics(tensor))
+        # Track the compute
+        block.add(self.__build_compute())
 
-            # Fix the output tensor
-            if tensor.get_is_output():
-                tensor.reset()
-                tensor.set_is_output(True)
+        # Track the intersections
+        block.add(self.__build_intersections())
 
-        # Add the compute information
-        for compute in self.metrics.get_compute_components():
-            block.add(self.__compute_metrics(compute))
+        # Track the sequences
+        block.add(self.__build_sequencers())
 
-        # Add the merger information
-        for merge, name in self.metrics.get_merger_components():
-            block.add(self.__merger_metrics(merge, name))
+        # Add the final execution time modeling
+        num_einsums = len(self.program.get_all_einsums())
+        if self.program.get_einsum_ind() + 1 == num_einsums:
+            block.add(self.__build_time())
 
         return block
 
@@ -91,195 +142,990 @@ def end() -> Statement:
         """
         return SExpr(EMethod(EVar("Metrics"), "endCollect", []))
 
-    def set_collecting(self, tensor_name: str, rank: str) -> Statement:
+    def make_body(self) -> Statement:
+        """
+        Make the body of the loop
+        """
+        return self.__make_iter_num("body")
+
+    def make_loop_footer(self, rank: str) -> Statement:
+        """
+        Make a footer for the loop
+        """
+        block = SBlock([])
+
+        if self.tree_traces is None:
+            raise ValueError(
+                "Unconfigured collector. Make sure to first call start()")
+
+        # Collect the iteration number if necessary
+        block.add(self.__make_iter_num(rank))
+
+        # Consume a trace if necessary
+        coiter = self.metrics.get_coiter(rank)
+        if coiter is not None:
+            block.add(self.consume_traces(coiter.get_name(), rank))
+
+        # Eagerly store subtrees as necessary
+        for tensor in self.tree_traces[rank][False]:
+            block.add(self.trace_tree(tensor, rank, False))
+
+        return block
+
+    def make_loop_header(self, rank: str) -> Statement:
+        """
+        Make a header for a loop
+        """
+        block = SBlock([])
+
+        if self.tree_traces is None:
+            raise ValueError(
+                "Unconfigured collector. Make sure to first call start()")
+
+        loop_ranks = ["root"] + self.program.get_loop_order().get_ranks()
+        i = loop_ranks.index(rank)
+
+        # Save the set of subtrees already eagerly loaded
+        eager_evicts = self.metrics.get_eager_evicts(loop_ranks[i - 1])
+        for tensor, root in eager_evicts:
+            tracker = "eager_" + tensor.lower() + "_" + root.lower() + "_read"
+            block.add(SAssign(AVar(tracker), EFunc("set", ())))
+
+        # Eagerly load new subtrees as necessary
+        for tensor in self.tree_traces[rank][True]:
+            block.add(self.trace_tree(tensor, rank, True))
+
+        return block
+
+    def register_ranks(self) -> Statement:
+        """
+        Register the given ranks
+        """
+        block = SBlock([])
+        for rank in self.program.get_loop_order().get_ranks():
+            block.add(
+                SExpr(
+                    EMethod(
+                        EVar("Metrics"), "registerRank", [
+                            AJust(
+                                EString(rank))])))
+
+        return block
+
+    def set_collecting(
+            self,
+            tensor: Optional[str],
+            rank: str,
+            type_: str,
+            consumable: bool,
+            is_read_trace: bool) -> Statement:
         """
         Collect the statistics about a tensor
         """
-        tensor = self.program.get_equation().get_tensor(tensor_name)
-        args = [AJust(EString(rank)), AJust(EBool(True))]
-        call = EMethod(EVar(tensor.tensor_name()), "setCollecting", args)
+        block = SBlock([])
+        if tensor is None:
+            if type_ != "iter":
+                raise ValueError(
+                    "Tensor must be specified for trace type " + type_)
+            trace = "iter"
+
+        elif type_ == "fiber":
+            trace = self.metrics.get_fiber_trace(tensor, rank, is_read_trace)
 
-        return SExpr(call)
+        # Type is an eager rank
+        else:
+            trace = "eager_" + tensor.lower() + "_" + type_.lower()
+            if is_read_trace:
+                trace += "_read"
+            else:
+                trace += "_write"
+
+                # We want to collect the iteration number for the last loop
+                # rank
+                output = self.program.get_equation().get_tensor(tensor)
+                final_tensor = Tensor(
+                    output.root_name(), output.get_init_ranks())
+                self.program.apply_all_partitioning(final_tensor)
+                self.program.get_loop_order().apply(final_tensor)
+
+                iter_var = final_tensor.get_ranks()[-1].lower() + "_iter_num"
+                # TODO: Add a separate None type
+                block.add(SAssign(AVar(iter_var), EVar("None")))
+
+        args: List[Argument] = [
+            AJust(
+                EString(rank)), AParam(
+                "type_", EString(trace)), AParam(
+                "consumable", EBool(consumable))]
+
+        block.add(SExpr(EMethod(EVar("Metrics"), "trace", args)))
+        return block
 
     def start(self) -> Statement:
         """
         Start metrics collection
         """
-        loop_order = self.program.get_loop_order()
-        order = [EString(rank) for rank in loop_order.get_ranks()]
-        call = EMethod(EVar("Metrics"), "beginCollect", [AJust(EList(order))])
+        block = SBlock([])
+
+        einsum = self.program.get_equation().get_output().root_name()
+        prefix = EString(self.metrics.get_hardware().get_prefix(einsum))
+        call = EMethod(EVar("Metrics"), "beginCollect", [AJust(prefix)])
+
+        block.add(SExpr(call))
+
+        block.add(self.__build_components())
+
+        block.add(self.__build_match_ranks())
+
+        stmt, register = self.__build_trace_ranks()
+        block.add(stmt)
+
+        if register:
+            block.add(self.register_ranks())
+
+        return block
+
+    def trace_tree(
+            self,
+            tensor: str,
+            rank: str,
+            is_read_trace: bool) -> Statement:
+        """
+        Trace a subtree under the fiber specified
+        """
+        fiber = tensor.lower() + "_" + rank.lower()
+
+        trace = "eager_" + fiber
+        if is_read_trace:
+            trace += "_read"
+        else:
+            trace += "_write"
+
+        args: List[Argument] = [AJust(EString(trace))]
+        if not is_read_trace:
+            # We want to use the iteration number for the last loop rank
+            output = self.program.get_equation().get_tensor(tensor)
+            final_tensor = Tensor(output.root_name(), output.get_init_ranks())
+            self.program.apply_all_partitioning(final_tensor)
+            self.program.get_loop_order().apply(final_tensor)
+
+            iter_var = final_tensor.get_ranks()[-1].lower() + "_iter_num"
+            args.append(AParam("iteration_num", EVar(iter_var)))
+
+        trace_stmt = SExpr(EMethod(EVar(fiber), "trace", args))
+        if not is_read_trace:
+            return trace_stmt
 
-        return SExpr(call)
+        # If read, only read the first time
+        loop_ranks = self.program.get_loop_order().get_ranks()
+        tensor_ir = self.program.get_equation().get_tensor(tensor)
 
-    def __compute_metrics(self, component: ComputeComponent) -> Statement:
+        get_final = self.program.get_partitioning().get_final_rank_id
+        evict_rank = self.metrics.get_eager_evict_on(tensor, rank)[-1]
+        er_ind = loop_ranks.index(get_final([evict_rank], evict_rank))
+        tree_ind = loop_ranks.index(get_final([rank], rank))
+
+        key = []
+        for loop_rank in loop_ranks[er_ind + 1:tree_ind]:
+            if loop_rank in tensor_ir.get_ranks():
+                key.append(EVar(loop_rank.lower()))
+        key_tuple = ETuple(tuple(key))
+
+        cond = EBinOp(key_tuple, ONotIn(), EVar(trace))
+        add_key = SExpr(EMethod(EVar(trace), "add", [AJust(key_tuple)]))
+        return SIf((cond, SBlock([add_key, trace_stmt])), [], None)
+
+    def __add_collection(self,
+                         trace: Tuple[Optional[str],
+                                      str,
+                                      str,
+                                      bool,
+                                      bool],
+                         traces: Set[Tuple[Optional[str],
+                                           str,
+                                           str,
+                                           bool,
+                                           bool]]) -> Statement:
         """
-        Get the compute metrics for this hardware
+        Add a collection and update the set of traces
+        """
+        if trace not in traces:
+            traces.add(trace)
+            return self.set_collecting(*trace)
+
+        return SBlock([])
+
+    def __get_trace(self, binding: dict,
+                    is_read: bool) -> Tuple[str, Statement]:
+        """
+        Get the (trace, HiFiber to produce the trace)
         """
         einsum = self.program.get_equation().get_output().root_name()
-        metrics = EAccess(EVar("metrics"), EString(einsum))
+        prefix = self.metrics.get_hardware().get_prefix(einsum) + \
+            "-" + binding["rank"] + "-"
+
         block = SBlock([])
+        if "style" in binding and binding["style"] == "eager":
+            trace_fn = prefix + "eager_" + \
+                binding["tensor"].lower() + "_" + binding["root"].lower()
+            if is_read:
+                trace_fn += "_read"
+            else:
+                trace_fn += "_write"
+            trace_fn += ".csv"
 
-        for binding in component.get_bindings(einsum):
-            if isinstance(component, LeaderFollowerComponent):
-                rank = binding["rank"]
-                leader = self.__get_leader(rank, binding["leader"])
+        # Otherwise binding is lazy
+        else:
+            fiber_trace = self.metrics.get_fiber_trace(
+                binding["tensor"], binding["rank"], is_read)
 
-                args = []
-                args.append(AJust(EMethod(EVar("Metrics"), "dump", [])))
-                args.append(AJust(EString(rank)))
-                args.append(AJust(EInt(leader)))
+            if binding["type"] == "payload" and fiber_trace != "iter" and \
+                    fiber_trace[:11] != "get_payload":
+                input_fn = prefix + fiber_trace + ".csv"
+                filter_fn = prefix + "iter.csv"
+                trace_fn = prefix + fiber_trace + "_payload.csv"
 
-                access = AAccess(metrics, EString(rank + " intersections"))
-                count = EMethod(EVar("Compute"), "lfCount", args)
-                block.add(SAssign(access, count))
+                args = [AJust(EString(fn))
+                        for fn in [input_fn, filter_fn, trace_fn]]
+                block.add(SExpr(EMethod(EVar("Traffic"), "filterTrace", args)))
 
-            elif isinstance(component, SkipAheadComponent):
-                rank = binding["rank"]
+            else:
+                trace_fn = prefix + fiber_trace + ".csv"
 
-                args = []
-                args.append(AJust(EMethod(EVar("Metrics"), "dump", [])))
-                args.append(AJust(EString(rank)))
+        return trace_fn, block
+
+    def __build_components(self) -> Statement:
+        """
+        Build the creation of any necessary hardware components
+        """
+        block = SBlock([])
+        einsum = self.program.get_equation().get_output().root_name()
 
-                access = AAccess(metrics, EString(rank + " intersections"))
-                count = EMethod(EVar("Compute"), "skipCount", args)
-                block.add(SAssign(access, count))
+        for component in self.metrics.get_hardware().get_components(einsum,
+                                                                    IntersectorComponent):
+            name = component.get_name()
 
-            else:
+            for binding in component.get_bindings()[einsum]:
+                block.add(self.create_component(component, binding["rank"]))
+
+        return block
+
+    def __build_compute(self) -> Statement:
+        """
+        Add the code to count compute operations
+        """
+        block = SBlock([])
+        einsum = self.program.get_equation().get_output().root_name()
+
+        metrics_einsum = EAccess(EVar("metrics"), EString(einsum))
+        metrics_dump = EAccess(
+            EMethod(
+                EVar("Metrics"),
+                "dump",
+                []),
+            EString("Compute"))
+        for fu in self.metrics.get_hardware().get_components(einsum, ComputeComponent):
+            block.add(
+                SAssign(
+                    AAccess(
+                        metrics_einsum, EString(
+                            fu.get_name())), EDict(
+                        {})))
+
+            metrics_fu = EAccess(metrics_einsum, EString(fu.get_name()))
+            ops = []
+            for binding in fu.get_bindings()[einsum]:
                 op = binding["op"]
+                ops.append(EString(op))
+                block.add(
+                    SAssign(
+                        AAccess(metrics_fu, EString(op)),
+                        EAccess(metrics_dump, EString("payload_" + op))))
 
-                args = []
-                args.append(AJust(EMethod(EVar("Metrics"), "dump", [])))
-                args.append(AJust(EString(op)))
+            # TODO: Handle multi-op functional units
+            assert len(ops) == 1
 
-                access = AAccess(metrics, EString(op))
-                count = EMethod(EVar("Compute"), "opCount", args)
-                block.add(SAssign(access, count))
+            # op_freq = cycles / s * ops / cycle
+            op_freq = self.metrics.get_hardware().get_frequency(einsum) * \
+                fu.get_num_instances()
+            time = EBinOp(EAccess(metrics_fu, ops[0]), ODiv(), EInt(op_freq))
+
+            metrics_time = AAccess(metrics_fu, EString("time"))
+            block.add(SAssign(metrics_time, time))
+            self.fusion.add_component(einsum, fu.get_name())
 
         return block
 
-    def __get_leader(self, rank: str, leader: str) -> int:
+    def __build_formats(self) -> Statement:
         """
-        Get the index of the leader
+        Add the code to build the formats dictionary
         """
-        i = 0
-        for tensor in self.program.get_equation().get_tensors():
-            if tensor.get_is_output():
+        formats_dict: Dict[Expression, Expression] = {}
+        part_ir = self.program.get_partitioning()
+        for tensor, format_ in self.metrics.get_loop_formats().items():
+            loop_format = self.metrics.get_format().get_spec(tensor)[format_]
+            rank_order = loop_format["rank-order"]
+
+            # If there is dynamic partitioning applied we cannot use the
+            # existing tensor
+            build_new = False
+
+            # TODO: This should be in teaal.ir.partitioning
+            tensor_ir = self.program.get_equation().get_tensor(tensor)
+            old_ranks: List[str] = []
+            new_ranks = tensor_ir.get_init_ranks()
+            while old_ranks != new_ranks:
+                old_ranks = new_ranks
+                new_ranks = part_ir.partition_ranks(
+                    new_ranks, part_ir.get_static_parts(), False, True)
+
+            for static_rank in new_ranks:
+                if (static_rank,) in part_ir.get_dyn_parts():
+                    build_new = True
+                    break
+
+                if part_ir.is_flattened(static_rank):
+                    build_new = True
+                    break
+
+            tensor_expr: Expression
+            if build_new:
+                rank_ids = TransUtils.build_expr(rank_order)
+
+                shape: List[Expression] = []
+                for rank in rank_order:
+                    if not part_ir.is_flattened(rank):
+                        shape.append(EVar(part_ir.get_root_name(rank)))
+                        continue
+
+                    unpacked = part_ir.unpack(rank)
+                    roots = [part_ir.get_root_name(src) for src in unpacked]
+                    rank_shape: Expression = EVar(roots[0])
+                    for root in roots[1:]:
+                        rank_shape = EBinOp(rank_shape, OMul(), EVar(root))
+                    shape.append(rank_shape)
+
+                args = [
+                    AParam(
+                        "rank_ids", rank_ids), AParam(
+                        "shape", EList(shape))]
+                tensor_expr = EFunc("Tensor", args)
+
+            else:
+                tensor_expr = EVar(
+                    tensor + "_" + "".join(rank_order))
+
+            format_yaml = TransUtils.build_expr(loop_format)
+
+            formats_dict[EString(tensor)] = EFunc(
+                "Format", [AJust(tensor_expr), AJust(format_yaml)])
+
+        return SAssign(AVar("formats"), EDict(formats_dict))
+
+    def __build_intersections(self) -> Statement:
+        """
+        Add the code to compute the intersection operations
+        """
+        block = SBlock([])
+        einsum = self.program.get_equation().get_output().root_name()
+
+        metrics_einsum = EAccess(EVar("metrics"), EString(einsum))
+        for intersector in self.metrics.get_hardware().get_components(einsum,
+                                                                      IntersectorComponent):
+            isect_name = intersector.get_name()
+            metrics_isect = AAccess(metrics_einsum, EString(isect_name))
+            block.add(SAssign(metrics_isect, EInt(0)))
+
+            for binding in intersector.get_bindings()[einsum]:
+                isects = EMethod(
+                    EVar(
+                        isect_name +
+                        "_" +
+                        binding["rank"]),
+                    "getNumIntersects",
+                    [])
+                block.add(SIAssign(metrics_isect, OAdd(), isects))
+
+            # op_freq = cycles / s * ops / cycle
+            op_freq = self.metrics.get_hardware().get_frequency(einsum) * \
+                intersector.get_num_instances()
+            metrics_isect_expr = EAccess(metrics_einsum, EString(isect_name))
+            time = EBinOp(metrics_isect_expr, ODiv(), EInt(op_freq))
+
+            metrics_time = AAccess(metrics_isect_expr, EString("time"))
+            block.add(SAssign(metrics_time, time))
+            self.fusion.add_component(einsum, intersector.get_name())
+
+        return block
+
+    def __build_match_ranks(self) -> Statement:
+        """
+        Add the code to match ranks, e.g., if we have flattening
+        """
+        block = SBlock([])
+
+        part_ir = self.program.get_partitioning()
+        for rank in self.program.get_loop_order().get_ranks():
+            if not part_ir.is_flattened(rank):
                 continue
 
-            # TODO: Cover this when we allow more than two tensors
-            # See test test_dump_leader_follower_not_intersected
-            if rank not in tensor.get_ranks():
-                continue  # pragma: no cover
+            unpacked = part_ir.unpack(rank)
+            roots = []
+            for unpack_rank in unpacked:
+                if part_ir.get_final_rank_id(
+                        [unpack_rank], unpack_rank) == rank:
+                    args = [AJust(EString(rank)), AJust(EString(unpack_rank))]
+                    block.add(
+                        SExpr(
+                            EMethod(
+                                EVar("Metrics"),
+                                "matchRanks",
+                                args)))
 
-            if tensor.root_name() == leader:
-                return i
+                roots.append(EVar(part_ir.get_root_name(unpack_rank)))
 
-            i += 1
+            args = [AJust(EString(rank)), AJust(ETuple(tuple(roots)))]
+            block.add(SExpr(EMethod(EVar("Metrics"), "associateShape", args)))
 
-        raise ValueError("Tensor " + leader + " has no rank " + rank)
+        return block
 
-    def __mem_metrics(self, tensor: Tensor) -> Statement:
+    def __build_merges(self) -> Statement:
         """
-        Get the memory metrics for a given tensor
+        Add the code to compute the merge operations
         """
         block = SBlock([])
+        einsum = self.program.get_equation().get_output().root_name()
 
-        # Dictionary accesses
-        einsum = EString(self.program.get_equation().get_output().root_name())
-        metrics = EAccess(EVar("metrics"), einsum)
-        fp_access = (metrics, EString(tensor.root_name() + " footprint"))
-        tf_access = (metrics, EString(tensor.root_name() + " traffic"))
-
-        # No memory traffic if the tensor is not stored in DRAM
-        if not self.metrics.in_dram(tensor):
-            block.add(SAssign(AAccess(*fp_access), EInt(0)))
-            block.add(SAssign(AAccess(*tf_access), EInt(0)))
-            return block
-
-        # Make a format for this tensor
-        name = tensor.tensor_name()
-        spec = TransUtils.build_expr(self.metrics.get_format(tensor))
-        constr = EFunc("Format", [AJust(EVar(name)), AJust(spec)])
-        format_ = name + "_format"
-        block.add(SAssign(AVar(format_), constr))
-
-        # Compute its memory footprint
-        footprint = EMethod(EVar(format_), "getTensor", [])
-        block.add(SAssign(AAccess(*fp_access), footprint))
-
-        # If it is stationary, its footprint is its traffic, else compue
-        # the traffic
-        if self.metrics.on_chip_stationary(tensor):
-            block.add(SAssign(AAccess(*tf_access), EAccess(*fp_access)))
+        metrics_einsum = EAccess(EVar("metrics"), EString(einsum))
+        for merger in self.metrics.get_hardware().get_components(einsum, MergerComponent):
+            merger_name = merger.get_name()
+            block.add(
+                SAssign(
+                    AAccess(
+                        metrics_einsum, EString(merger_name)), EDict(
+                        {})))
+            metrics_merger = EAccess(metrics_einsum, EString(merger_name))
+            tensors = []
+            for binding in merger.get_bindings()[einsum]:
+                init_ranks = binding["init-ranks"]
+                final_ranks = binding["final-ranks"]
 
-        else:
-            # First compute the traffic from loading the buffered subtrees
-            traffic = self.__mem_traffic(tensor)
+                input_ = binding["tensor"] + "_" + "".join(init_ranks)
+                tensor_name = EVar(input_)
+                tensors.append(tensor_name)
+
+                # TODO: Way more complicated merges are possible than a single
+                # swap
+                depth = EInt([i == f for i, f in zip(
+                    init_ranks, final_ranks)].index(False))
 
-            # TODO: Make this more realistic
-            # We assume that the other ranks are secretly buffered
-            # somewhere else
-            buffer_rank = self.metrics.get_on_chip_rank(tensor)
-            prefix = tensor.get_prefix(buffer_rank)
+                # TODO: Need to first update the HiFiber to use new merge
+                # hardware spec
+                radix = TransUtils.build_expr(merger.get_comparator_radix())
+                next_latency: Expression
+                if merger.get_inputs() < float("inf"):
+                    next_latency = EInt(1)
+                else:
+                    next_latency = EString("N")
 
-            for rank in prefix:
-                arg = AJust(EString(rank))
-                rank_fp = EMethod(EVar(format_), "getRank", [arg])
+                args = [
+                    AJust(expr) for expr in [
+                        tensor_name,
+                        depth,
+                        radix,
+                        next_latency]]
+                swaps_call = EMethod(EVar("Compute"), "numSwaps", args)
+                block.add(
+                    SAssign(
+                        AAccess(
+                            metrics_merger,
+                            EString(input_)),
+                        swaps_call))
 
-            traffic = EBinOp(traffic, OAdd(), rank_fp)
+            # Compute the time required
+            # TODO: Support more than one tensor per merger
+            assert len(tensors) == 1
 
-            block.add(SAssign(AAccess(*tf_access), traffic))
+            # op_freq = cycles / s * ops / cycle
+            op_freq = self.metrics.get_hardware().get_frequency(einsum) * \
+                merger.get_num_instances()
+            time = EBinOp(
+                EAccess(
+                    metrics_merger,
+                    tensors[0]),
+                ODiv(),
+                EInt(op_freq))
+
+            metrics_time = AAccess(metrics_merger, EString("time"))
+            block.add(SAssign(metrics_time, time))
+            self.fusion.add_component(einsum, merger.get_name())
 
         return block
 
-    def __mem_traffic(self, tensor: Tensor) -> Expression:
+    def __build_sequencers(self) -> Statement:
         """
-        Get the expression for computing the memory traffic for this tensor
+        Add a block to track the sequencers
         """
-        buffer_ = self.metrics.get_on_chip_buffer(tensor)
+        block = SBlock([])
 
-        if isinstance(buffer_, BuffetComponent):
-            args = []
-            args.append(AJust(EVar(tensor.tensor_name())))
-            args.append(AJust(EString(self.metrics.get_on_chip_rank(tensor))))
-            args.append(AJust(EVar(tensor.tensor_name() + "_format")))
+        einsum = self.program.get_equation().get_output().root_name()
+        metrics_einsum = EAccess(EVar("metrics"), EString(einsum))
 
-            return EMethod(EVar("Traffic"), "buffetTraffic", args)
+        for seq in self.metrics.get_hardware().get_components(einsum, SequencerComponent):
+            seq_assn = AAccess(metrics_einsum, EString(seq.get_name()))
+            block.add(SAssign(seq_assn, EDict({})))
+            seq_expr = EAccess(metrics_einsum, EString(seq.get_name()))
 
-        elif isinstance(buffer_, CacheComponent):
-            capacity = buffer_.get_depth() * buffer_.get_width()
+            ranks = []
+            for rank in seq.get_ranks(einsum):
+                ranks.append(rank)
+                trace = self.metrics.get_hardware().get_prefix(einsum) + \
+                    "-" + rank + "-iter.csv"
+                num_iters = EMethod(
+                    EVar("Compute"), "numIters", [
+                        AJust(
+                            EString(trace))])
+                seq_rank = AAccess(seq_expr, EString(rank))
+                block.add(SAssign(seq_rank, num_iters))
 
-            args = []
-            args.append(AJust(EVar(tensor.tensor_name())))
-            args.append(AJust(EString(self.metrics.get_on_chip_rank(tensor))))
-            args.append(AJust(EVar(tensor.tensor_name() + "_format")))
-            args.append(AJust(EInt(capacity)))
+            # Compute time
+            steps: Optional[Expression] = None
+            for rank in ranks:
+                new_steps = EAccess(seq_expr, EString(rank))
+                if steps:
+                    steps = EBinOp(steps, OAdd(), new_steps)
+                else:
+                    steps = new_steps
 
-            return EMethod(EVar("Traffic"), "cacheTraffic", args)
+            assert steps is not None
 
-        else:
-            # This error should be caught by the Hardware constructor
-            raise ValueError(
-                "Unknown MemoryComponent " +
-                repr(buffer_))  # pragma: no cover
+            op_freq = self.metrics.get_hardware().get_frequency(einsum) * \
+                seq.get_num_instances()
+            time = EBinOp(EParens(steps), ODiv(), EInt(op_freq))
 
-    def __merger_metrics(
-            self,
-            component: MergerComponent,
-            binding: dict) -> Statement:
+            metrics_time = AAccess(seq_expr, EString("time"))
+            block.add(SAssign(metrics_time, time))
+            self.fusion.add_component(einsum, seq.get_name())
+
+        return block
+
+    def __build_time(self) -> Statement:
+        """
+        Add the code necessary to compute the final execution time
+        """
+        sblock = SBlock([])
+
+        # Save the Einsum blocks
+        metrics = EVar("metrics")
+        blocks = TransUtils.build_expr(self.fusion.get_blocks())
+        sblock.add(SAssign(AAccess(metrics, EString("blocks")), blocks))
+
+        # Compute the execution time
+        time: Optional[Expression] = None
+        for block in self.fusion.get_blocks():
+
+            # Collect up the statistics for the block
+            component_time: Dict[str, Expression] = {}
+            for einsum in block:
+                metrics_einsum = EAccess(metrics, EString(einsum))
+                for comp in self.fusion.get_components(einsum):
+                    new_time = EAccess(
+                        EAccess(
+                            metrics_einsum,
+                            EString(comp)),
+                        EString("time"))
+
+                    if comp in component_time:
+                        component_time[comp] = EBinOp(
+                            component_time[comp], OAdd(), new_time)
+                    else:
+                        component_time[comp] = new_time
+
+            # Sort components to enable testing
+            comps = sorted(component_time.keys())
+
+            # Compute block time by taking the max
+            block_time: Expression
+            if len(comps) == 0:
+                block_time = EInt(0)
+            elif len(comps) == 1:
+                block_time = component_time[comp]
+            else:
+                comp_args = [AJust(component_time[comp]) for comp in comps]
+                block_time = EFunc("max", comp_args)
+
+            # The execution time is the sum of all of the blocks
+            if time:
+                time = EBinOp(time, OAdd(), block_time)
+            else:
+                time = block_time
+
+        assert time is not None
+
+        sblock.add(SAssign(AAccess(metrics, EString("time")), time))
+
+        return sblock
+
+    def __build_trace_ranks(self) -> Tuple[Statement, bool]:
+        """
+        Add code to trace all necessary ranks
+        Returns (new code, need to register ranks explicitly)
+
+        Note: explicit rank registration is necessary if we have eager loading
+        of fibers
+        """
+        block = SBlock([])
+        einsum = self.program.get_equation().get_output().root_name()
+        loop_order = self.program.get_loop_order().get_ranks()
+
+        traces: Set[Tuple[Optional[str], str, str, bool, bool]] = set()
+        trace: Tuple[Optional[str], str, str, bool, bool]
+
+        register = False
+        self.tree_traces = {rank: {True: set(), False: set()}
+                            for rank in loop_order}
+        available = [(rank, self.program.get_partitioning().get_available(rank))
+                     for rank in reversed(loop_order)]
+
+        for sequencer in self.metrics.get_hardware().get_components(einsum,
+                                                                    SequencerComponent):
+            for rank in sequencer.get_ranks(einsum):
+                trace = (None, rank, "iter", False, True)
+                block.add(self.__add_collection(trace, traces))
+
+        for tensor in self.program.get_equation().get_tensors():
+            tensor_name = tensor.root_name()
+
+            # Collect the necessary traces for each tensor
+            for rank, type_, consumable in self.metrics.get_collected_tensor_info(
+                    tensor_name):
+
+                # If we are collecting the loop's trace
+                if type_ == "iter":
+                    trace = (None, rank, type_, consumable, True)
+                    block.add(self.__add_collection(trace, traces))
+
+                # Otherwise, get the fiber's read (and maybe write)
+                else:
+                    trace = (tensor_name, rank, type_, consumable, True)
+                    block.add(self.__add_collection(trace, traces))
+
+                    if tensor.get_is_output():
+                        trace = (tensor_name, rank, type_, consumable, False)
+                        block.add(self.__add_collection(trace, traces))
+
+                    # Type is fiber if lazy and root of the eager access if
+                    # lazy
+                    if type_ != "fiber":
+
+                        # Register the rank order explicitly
+                        register = True
+
+                        # Eagerly load a subtree right before the given loop
+                        loaded = False
+                        for loop_rank, avail in available:
+                            if type_ in avail:
+                                self.tree_traces[loop_rank][True].add(
+                                    tensor_name)
+                                loaded = True
+                                break
+                        assert loaded
+
+                        # Eagerly store a subtree right before we move onto the
+                        # next subtree
+                        if tensor.get_is_output():
+                            final_tensor = Tensor(
+                                tensor.root_name(), tensor.get_init_ranks())
+                            self.program.apply_all_partitioning(final_tensor)
+                            self.program.get_loop_order().apply(final_tensor)
+
+                            i = final_tensor.get_ranks().index(type_)
+                            if i == 0:
+                                store_rank = loop_order[0]
+                            else:
+                                one_above_rank = final_tensor.get_ranks()[
+                                    i - 1]
+
+                                stored = False
+                                for j, (loop_rank, avail) in enumerate(
+                                        available):
+                                    if one_above_rank in avail:
+                                        stored = True
+                                        break
+                                assert stored
+
+                                # Unreversed index -> len(loop_order) - j - 1
+                                # Store rank is one below -> + 1
+                                store_rank = loop_order[len(loop_order) - j]
+
+                            # Trace the eager tree
+                            self.tree_traces[store_rank][False].add(
+                                tensor_name)
+
+        return block, register
+
+    def __build_traffic(self) -> Statement:
         """
-        Get the merge metrics for this component
+        Add the code to compute traffic
         """
+        block = SBlock([])
         einsum = self.program.get_equation().get_output().root_name()
-        metrics = EAccess(EVar("metrics"), EString(einsum))
 
-        name = EVar(binding["tensor"] + "_" + "".join(binding["init_ranks"]))
-        depth = EInt(binding["swap_depth"])
-        radix = TransUtils.build_expr(component.get_radix())
-        next_latency = TransUtils.build_expr(component.get_next_latency())
+        active_bindings: Dict[str, List[dict]] = {}
+        # Filter out the bindings to ignore
+        for buffer_ in self.metrics.get_hardware().get_components(einsum, BufferComponent):
+            active_bindings[buffer_.get_name()] = []
+            for binding in buffer_.get_bindings()[einsum]:
+                format_ = self.metrics.get_format().get_spec(
+                    binding["tensor"])[binding["format"]]
+                rank = binding["rank"]
+                type_ = binding["type"]
+
+                # First make sure that this binding actually corresponds to
+                # traffic
+                check_cbits = type_ == "coord" or type_ == "elem"
+                check_pbits = type_ == "payload" or type_ == "elem"
+                if check_cbits and (
+                        "cbits" not in format_[rank] or format_[rank]["cbits"] == 0):
+                    # Inconsequential line to make the coverage test go in here
+                    x = 1
+                    continue
+                if check_pbits and (
+                        "pbits" not in format_[rank] or format_[rank]["pbits"] == 0):
+                    # Inconsequential line to make the coverage test go in here
+                    x = 1
+                    continue
+
+                active_bindings[buffer_.get_name()].append(binding)
+
+        metrics_einsum = EAccess(EVar("metrics"), EString(einsum))
+        traffic_dict: Dict[str, Set[str]] = {}
+        for buffer_ in self.metrics.get_hardware().get_components(einsum, BufferComponent):
+            bindings = TransUtils.build_expr(
+                active_bindings[buffer_.get_name()])
+            bindings_var = AVar("bindings")
+
+            block.add(SAssign(bindings_var, bindings))
+
+            # Create the traces for each buffer
+            # TODO: What if the binding is for an unswizzled tensor
+            traces = {}
+            ranks = set()
+            for binding in active_bindings[buffer_.get_name()]:
+                format_ = self.metrics.get_format().get_spec(
+                    binding["tensor"])[binding["format"]]
+                rank = binding["rank"]
+                ranks.add(rank)
+                type_ = binding["type"]
+
+                # Now add the trace
+                trace, create_trace = self.__get_trace(binding, True)
+                block.add(create_trace)
+                traces[(binding["tensor"], rank, type_, "read")] = trace
+                tensor_ir = self.program.get_equation(
+                ).get_tensor(binding["tensor"])
+                if tensor_ir.get_is_output():
+                    trace, create_trace = self.__get_trace(binding, False)
+                    block.add(create_trace)
+                    traces[(binding["tensor"], rank, type_, "write")] = trace
+
+                # Also need to add the evict-on rank to the set of ranks if one
+                # exists
+                if "evict-on" in binding:
+                    ranks.add(binding["evict-on"])
+
+            traces_dict = TransUtils.build_expr(traces)
+            block.add(SAssign(AVar("traces"), traces_dict))
+
+            args = [
+                AJust(
+                    EVar("bindings")),
+                AJust(
+                    EVar("formats")),
+                AJust(
+                    EVar("traces")),
+                AJust(
+                    TransUtils.build_expr(
+                        buffer_.get_width() *
+                        buffer_.get_depth())),
+                AJust(
+                    TransUtils.build_expr(
+                        buffer_.get_width()))]
+
+            # Match ranks not in the loop order to their corresponding rank in
+            # the loop order
+            rank_map = {}
+            for rank in ranks:
+                if rank == "root":
+                    continue
+
+                final_rank = self.program.get_partitioning(
+                ).get_final_rank_id([rank], rank)
+                if final_rank != rank:
+                    rank_map[rank] = final_rank
+
+            if rank_map:
+                args.append(AJust(TransUtils.build_expr(rank_map)))
+
+            if isinstance(buffer_, BuffetComponent):
+                traffic_func = "buffetTraffic"
+            # Buffer is a cache
+            else:
+                traffic_func = "cacheTraffic"
+
+            block.add(
+                SAssign(
+                    AVar("traffic"),
+                    EMethod(
+                        EVar("Traffic"),
+                        traffic_func,
+                        args)))
+
+            # Now add it to the metrics dictionary
+            added = set()
+            for binding in active_bindings[buffer_.get_name()]:
+                tensor = binding["tensor"]
+                rank = binding["rank"]
+                type_ = binding["type"]
+
+                tensor_ir = self.program.get_equation().get_tensor(tensor)
+                src_component = self.metrics.get_source_memory(
+                    buffer_.get_name(), tensor, rank, type_)
+
+                if src_component is None:
+                    continue
+
+                src = src_component.get_name()
+
+                if src not in traffic_dict:
+                    traffic_dict[src] = set()
+                    block.add(
+                        SAssign(
+                            AAccess(
+                                metrics_einsum, EString(src)), EDict(
+                                {})))
+
+                metrics_src = EAccess(metrics_einsum, EString(src))
+                metrics_tensor = EAccess(metrics_src, EString(tensor))
+                if tensor not in traffic_dict[src]:
+                    traffic_dict[src].add(tensor)
+                    block.add(
+                        SAssign(
+                            AAccess(
+                                metrics_src, EString(tensor)), EDict(
+                                {})))
+                    block.add(
+                        SAssign(
+                            AAccess(
+                                metrics_tensor,
+                                EString("read")),
+                            EInt(0)))
+
+                    if tensor_ir.get_is_output():
+                        block.add(
+                            SAssign(
+                                AAccess(
+                                    metrics_tensor,
+                                    EString("write")),
+                                EInt(0)))
+
+                if (src, tensor) not in added:
+                    traffic_access = EAccess(
+                        EAccess(
+                            EVar("traffic"),
+                            EInt(0)),
+                        EString(tensor))
+                    block.add(
+                        SIAssign(
+                            AAccess(
+                                metrics_tensor,
+                                EString("read")),
+                            OAdd(),
+                            EAccess(
+                                traffic_access,
+                                EString("read"))))
+
+                    if tensor_ir.get_is_output():
+                        block.add(
+                            SIAssign(
+                                AAccess(
+                                    metrics_tensor, EString("write")),
+                                OAdd(),
+                                EAccess(traffic_access, EString("write"))))
+
+                    added.add((src, tensor))
+
+        # Compute the time it took to perform this traffic
+        for src, tensors in traffic_dict.items():
+            bits: Optional[Expression] = None
+            metrics_src = EAccess(metrics_einsum, EString(src))
+
+            # Note: not technically necessary, just to make the testing
+            # deterministic
+            sorted_tensors = sorted(tensors)
+
+            for tensor in sorted_tensors:
+                metrics_tensor = EAccess(metrics_src, EString(tensor))
+                new_bits: Expression = EAccess(metrics_tensor, EString("read"))
+
+                if tensor == einsum:
+                    new_bits = EBinOp(
+                        new_bits, OAdd(), EAccess(
+                            metrics_tensor, EString("write")))
+
+                if bits:
+                    bits = EBinOp(bits, OAdd(), new_bits)
+                else:
+                    bits = new_bits
+
+            # Should always have at least some traffic (error above if not)
+            assert bits is not None
+            bits = EParens(bits)
+
+            component = self.metrics.get_hardware().get_component(src)
+            assert isinstance(component, MemoryComponent)
+
+            metrics_time = AAccess(metrics_src, EString("time"))
+            # Note: the current model assumes perfect load balance
+            time = EBinOp(
+                bits,
+                ODiv(),
+                EInt(
+                    component.get_bandwidth() *
+                    component.get_num_instances()))
+
+            block.add(SAssign(metrics_time, time))
+            self.fusion.add_component(einsum, src)
+
+        return block
+
+    def __make_iter_num(self, rank: str) -> Statement:
+        """
+        Save the iteration number if necessary
+        """
+        # We don't need the iteration number if we are not doing an eager write
+        if not self.metrics.get_eager_write():
+            return SBlock([])
+
+        loop_order = self.program.get_loop_order().get_ranks() + ["body"]
+        output = self.program.get_equation().get_output()
+
+        # We want to collect the iteration number for the last loop rank
+        final_tensor = Tensor(output.root_name(), output.get_init_ranks())
+        self.program.apply_all_partitioning(final_tensor)
+        self.program.get_loop_order().apply(final_tensor)
+
+        # We don't need the iteration number of this rank if it is the top rank
+        # since we can never eager access a 0-tensor
+        i = loop_order.index(rank)
+        if i == 0:
+            return SBlock([])
+
+        # We only want the iteration number of the output's bottom rank
+        if loop_order[i - 1] != final_tensor.get_ranks()[-1]:
+            return SBlock([])
 
-        args = [AJust(arg) for arg in [name, depth, radix, next_latency]]
+        iter_var = AVar(final_tensor.get_ranks()[-1].lower() + "_iter_num")
+        iter_num = EMethod(EMethod(EVar("Metrics"), "getIter", []), "copy", [])
 
-        access = AAccess(metrics, EString(name.gen() + " merge ops"))
-        count = EMethod(EVar("Compute"), "swapCount", args)
-        return SAssign(access, count)
+        return SAssign(iter_var, iter_num)
diff --git a/teaal/trans/equation.py b/teaal/trans/equation.py
index 7ab1235..db0b8e2 100644
--- a/teaal/trans/equation.py
+++ b/teaal/trans/equation.py
@@ -28,6 +28,8 @@
 from typing import cast, Dict, List, Optional, Type
 
 from teaal.hifiber import *
+from teaal.ir.component import *
+from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
 from teaal.parse.utils import ParseUtils
@@ -40,11 +42,12 @@ class Equation:
     equation at the bottom of the loop nest
     """
 
-    def __init__(self, program: Program) -> None:
+    def __init__(self, program: Program, metrics: Optional[Metrics]) -> None:
         """
         Construct a new Equation
         """
         self.program = program
+        self.metrics = metrics
 
     def make_eager_inputs(self, rank: str, inputs: List[str]) -> Statement:
         """
@@ -52,7 +55,8 @@ def make_eager_inputs(self, rank: str, inputs: List[str]) -> Statement:
         """
         tensors = [self.program.get_equation().get_tensor(input_)
                    for input_ in inputs]
-        iter_expr = self.__make_input_iter_expr(rank, tensors)
+        _, input_tensors = self.program.get_equation().get_iter(tensors)
+        iter_expr = self.__make_input_iter_expr(rank, input_tensors)
 
         # Use Fiber.fromLazy() to translate
         # Note: Assume that if we are making eager inputs, then we are
@@ -106,29 +110,27 @@ def make_iter_expr(self, rank: str, tensors: List[Tensor]) -> Expression:
         if not tensors:
             raise ValueError("Must iterate over at least one tensor")
 
+        output, inputs = self.program.get_equation().get_iter(tensors)
+
         # If there are no input tensors, we need to iterRangeShapeRef on the
         # output
-        output_tensor = self.__get_output_tensor(tensors)
-        if len(tensors) == 1 and output_tensor:
+        if len(tensors) == 1 and output:
             iter_output = self.__make_output_only_iter_expr(rank)
             return self.__add_enumerate(rank, iter_output)
 
         # Build the expression of the inputs
-        expr = self.__make_input_iter_expr(rank, tensors)
+        expr = self.__make_input_iter_expr(rank, inputs)
 
         # Finally, add in the output
-        if output_tensor:
-            trank = output_tensor.peek()
-            if trank is not None:
-                trank = trank.upper()
-
+        if output:
+            trank = output.peek_clean()
             if trank != rank:
                 raise ValueError(
                     "Cannot project into the output tensor. Replace " +
                     rank + " with " + str(trank) + " in the loop order")
 
             expr = Equation.__add_operator(
-                EVar(output_tensor.fiber_name()), OLtLt(), expr)
+                EVar(output.fiber_name()), OLtLt(), expr)
 
         return self.__add_enumerate(rank, expr)
 
@@ -193,14 +195,13 @@ def make_payload(self, rank: str, tensors: List[Tensor]) -> Payload:
                 "Must have at least one tensor to make the payload")
 
         # Separate the tensors into terms
-        terms = self.__separate_terms(tensors)
-        output_tensor = self.__get_output_tensor(tensors)
+        output, inputs = self.program.get_equation().get_iter(tensors)
 
         payload: Payload
-        if terms:
+        if inputs:
             # Construct the term payloads
             term_payloads = []
-            for term in terms:
+            for term in inputs:
                 payload = PVar(term[-1].fiber_name())
                 for factor in reversed(term[:-1]):
                     payload = PTuple([PVar(factor.fiber_name()), payload])
@@ -212,11 +213,11 @@ def make_payload(self, rank: str, tensors: List[Tensor]) -> Payload:
                 payload = PTuple([PVar("_"), term_payload, payload])
 
             # Put the output on the outside
-            if output_tensor:
-                payload = PTuple([PVar(output_tensor.fiber_name()), payload])
+            if output:
+                payload = PTuple([PVar(output.fiber_name()), payload])
 
-        elif output_tensor:
-            payload = PVar(output_tensor.fiber_name())
+        elif output:
+            payload = PVar(output.fiber_name())
 
         else:
             # We should never get to this state
@@ -317,7 +318,9 @@ def __need_enumerate(self, rank: str) -> bool:
         spacetime = self.program.get_spacetime()
         enum_st = spacetime is not None and spacetime.emit_pos(rank)
 
-        return enum_int or enum_st
+        enum_metrics = self.metrics is None
+
+        return (enum_int or enum_st) and enum_metrics
 
     @staticmethod
     def __frac_coords(sexpr: Basic) -> bool:
@@ -329,16 +332,6 @@ def __frac_coords(sexpr: Basic) -> bool:
 
         return any(Equation.__frac_coords(arg) for arg in sexpr.args)
 
-    def __get_output_tensor(self, tensors: List[Tensor]) -> Optional[Tensor]:
-        """
-        Get the output tensor if it exists
-        """
-        output = self.program.get_equation().get_output()
-        if output in tensors:
-            return output
-        else:
-            return None
-
     def __in_update(self, factor: str) -> bool:
         """
         Returns true if the factor should be included in the update
@@ -350,23 +343,18 @@ def __iter_fiber(self, rank: str, tensor: Tensor) -> Expression:
         """
         Get fiber for iteration (may involve projection)
         """
-        trank = tensor.peek()
-        if trank is None:
-            raise ValueError(
-                "Cannot iterate over payload " +
-                tensor.fiber_name())
+        trank = tensor.peek_clean()
 
         # If this fiber is already over the correct rank, we can iterate over
         # it directly
-        rank = rank.lower()
         if trank == rank:
             return EVar(tensor.fiber_name())
 
         # Otherwise, we need to project
         partitioning = self.program.get_partitioning()
-        root, suffix = partitioning.split_rank_name(rank.upper())
+        root, suffix = partitioning.split_rank_name(rank)
         root = root.lower()
-        troot = partitioning.get_root_name(trank.upper()).lower()
+        troot = partitioning.get_root_name(trank).lower()
 
         # If we are going to project, get the iteration rank in terms of the
         # tensor rank
@@ -384,19 +372,20 @@ def __iter_fiber(self, rank: str, tensor: Tensor) -> Expression:
         # If not, we do not need to translate the halo
         else:
             sexpr = CoordAccess.isolate_rank(sexpr, troot)
-            sexpr = sexpr.subs(troot, trank)
+            sexpr = sexpr.subs(troot, trank.lower())
 
-        lambda_ = ELambda([trank], CoordAccess.build_expr(sexpr))
+        lambda_ = ELambda([trank.lower()], CoordAccess.build_expr(sexpr))
         args = [AParam("trans_fn", lambda_)]
 
         # Build the interval if we need to make sure that the halo does not
         # add extra computation
         if suffix == "":
-            interval = ETuple([EInt(0), EVar(rank.upper())])
+            interval = ETuple([EInt(0), EVar(rank)])
             args.append(AParam("interval", interval))
 
         elif suffix == "0":
-            interval = ETuple([EVar(rank + "_start"), EVar(rank + "_end")])
+            interval = ETuple([EVar(rank.lower() + "_start"),
+                              EVar(rank.lower() + "_end")])
             args.append(AParam("interval", interval))
 
         project = EMethod(EVar(tensor.fiber_name()), "project", args)
@@ -416,20 +405,53 @@ def __iter_fiber(self, rank: str, tensor: Tensor) -> Expression:
     def __make_input_iter_expr(
             self,
             rank: str,
-            tensors: List[Tensor]) -> Expression:
+            tensors: List[List[Tensor]]) -> Expression:
         """
         Make the iteration expression for the inputs
         """
-        # Separate the tensors into terms
-        terms = self.__separate_terms(tensors)
+        leader_follower = False
+        leader = ""
+        if self.metrics is not None:
+            intersector = self.metrics.get_coiter(rank)
+
+            # If this uses leader-follower intersection
+            if isinstance(intersector, LeaderFollowerComponent):
+                leader_follower = True
+
+                einsum = self.program.get_equation().get_output().root_name()
+                for binding in intersector.get_bindings()[einsum]:
+                    if binding["rank"] == rank:
+                        leader = binding["leader"]
+                        break
 
         # Combine terms with intersections
         intersections = []
-        for term in terms:
-            expr = self.__iter_fiber(rank, term[-1])
-            for factor in reversed(term[:-1]):
-                fiber = self.__iter_fiber(rank, factor)
-                expr = Equation.__add_operator(fiber, OAnd(), expr)
+        for term in tensors:
+            expr: Expression
+            if leader_follower:
+                # If there is more than one term, there is ambiguity we are
+                # not capturing
+                assert len(tensors) == 1
+
+                leader_tensor = self.program.get_equation().get_tensor(leader)
+                fiber_args = [self.__iter_fiber(rank, leader_tensor)]
+
+                for factor in term:
+                    if factor.root_name() == leader:
+                        continue
+
+                    # TODO: Only uncompressed fibers can follow
+                    fiber_args.append(self.__iter_fiber(rank, factor))
+
+                args: List[Argument] = [AJust(fiber) for fiber in fiber_args]
+                args.append(AParam("style", EString("leader-follower")))
+                expr = EMethod(EVar("Fiber"), "intersection", args)
+
+            else:
+                expr = self.__iter_fiber(rank, term[-1])
+                for factor in reversed(term[:-1]):
+                    fiber = self.__iter_fiber(rank, factor)
+                    expr = Equation.__add_operator(fiber, OAnd(), expr)
             intersections.append(expr)
 
         # Combine intersections with a union
@@ -438,20 +460,3 @@ def __make_input_iter_expr(
             expr = Equation.__add_operator(intersection, OOr(), expr)
 
         return expr
-
-    def __separate_terms(self, tensors: List[Tensor]) -> List[List[Tensor]]:
-        """
-        Separate a list of tensors according to which term they belong to
-        """
-        # Separate the tensors
-        terms: List[List[Tensor]] = [[]
-                                     for _ in self.program.get_equation().get_term_tensors()]
-        for tensor in tensors:
-            if tensor.get_is_output():
-                continue
-
-            terms[self.program.get_equation().get_factor_order()[
-                tensor.root_name()][0]].append(tensor)
-
-        # Remove any empty lists
-        return [term for term in terms if term]
diff --git a/teaal/trans/graphics.py b/teaal/trans/graphics.py
index 1ce026d..d96827e 100644
--- a/teaal/trans/graphics.py
+++ b/teaal/trans/graphics.py
@@ -25,6 +25,7 @@
 """
 
 from teaal.hifiber import *
+from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
 from teaal.ir.spacetime import SpaceTime
 from teaal.trans.canvas import Canvas
@@ -35,11 +36,12 @@ class Graphics:
     Generate the HiFiber code for displaying tensors
     """
 
-    def __init__(self, program: Program) -> None:
+    def __init__(self, program: Program, metrics: Optional[Metrics]) -> None:
         """
         Construct a graphics object
         """
         self.program = program
+        self.metrics = metrics
         self.canvas = Canvas(program)
 
     def make_body(self) -> Statement:
@@ -49,7 +51,7 @@ def make_body(self) -> Statement:
         body = SBlock([])
         spacetime = self.program.get_spacetime()
 
-        if spacetime is not None:
+        if spacetime is not None and self.metrics is None:
             # If we are using slip, increment the timestamp
             if spacetime.get_slip():
 
@@ -77,7 +79,7 @@ def make_footer(self) -> Statement:
         Create the loop footer for graphics
         """
         spacetime = self.program.get_spacetime()
-        if spacetime is not None:
+        if spacetime is not None and self.metrics is None:
             return self.canvas.display_canvas()
         else:
             return SBlock([])
@@ -90,7 +92,7 @@ def make_header(self) -> Statement:
 
         # If displayable, add the graphics information
         spacetime = self.program.get_spacetime()
-        if spacetime is not None:
+        if spacetime is not None and self.metrics is None:
             header.add(self.canvas.create_canvas())
 
             # Create the timestamp dictionary if we want slip
diff --git a/teaal/trans/header.py b/teaal/trans/header.py
index 28676f5..70007f0 100644
--- a/teaal/trans/header.py
+++ b/teaal/trans/header.py
@@ -25,9 +25,10 @@
 """
 
 from sympy import Symbol
-from typing import Iterable, Set
+from typing import Iterable, Optional, Set
 
 from teaal.hifiber import *
+from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
 from teaal.parse.utils import ParseUtils
@@ -41,15 +42,22 @@ class Header:
     Generate the HiFiber code for loop headers
     """
 
-    def __init__(self, program: Program, partitioner: Partitioner) -> None:
+    def __init__(
+            self,
+            program: Program,
+            metrics: Optional[Metrics],
+            partitioner: Partitioner) -> None:
         """
         Construct a new Header object
         """
         self.program = program
+        self.metrics = metrics
         self.partitioner = partitioner
 
-    @staticmethod
-    def make_get_payload(tensor: Tensor, ranks: Iterable[str]) -> Statement:
+    def make_get_payload(
+            self,
+            tensor: Tensor,
+            ranks: Iterable[str]) -> Statement:
         """
         Make a call to getPayload() or getPayloadRef()
         """
@@ -58,8 +66,15 @@ def make_get_payload(tensor: Tensor, ranks: Iterable[str]) -> Statement:
         else:
             func = "getPayload"
 
-        rank_arg = [AJust(EVar(rank.lower())) for rank in ranks]
-        call = EMethod(EVar(tensor.fiber_name()), func, rank_arg)
+        args: List[Argument] = [AJust(EVar(rank.lower())) for rank in ranks]
+        if self.metrics:
+            args.append(
+                AParam(
+                    "trace",
+                    EString(
+                        "get_payload_" +
+                        tensor.root_name())))
+        call = EMethod(EVar(tensor.fiber_name()), func, args)
 
         for _ in ranks:
             tensor.pop()
@@ -88,7 +103,11 @@ def make_output(self) -> Statement:
         constr = EFunc("Tensor", args)
         return SAssign(AVar(tensor.tensor_name()), constr)
 
-    def make_swizzle(self, tensor: Tensor, type_: str) -> Statement:
+    def make_swizzle(
+            self,
+            tensor: Tensor,
+            ranks: List[str],
+            type_: str) -> Statement:
         """
         Make call to swizzleRanks() (as necessary)
         """
@@ -99,6 +118,8 @@ def make_swizzle(self, tensor: Tensor, type_: str) -> Statement:
             self.program.get_loop_order().apply(tensor)
         elif type_ == "partitioning":
             self.program.apply_partition_swizzling(tensor)
+        elif type_ == "metrics":
+            tensor.swizzle(ranks)
         else:
             raise ValueError("Unknown swizzling reason: " + type_)
 
@@ -147,7 +168,7 @@ def __make_shape(self, args: List[Argument]) -> List[Argument]:
 
             if loop_order.is_ready(
                 part.get_final_rank_id(
-                    output, ranks[i]), pos):
+                    output.get_init_ranks(), ranks[i]), pos):
                 final_pos[ranks[i]] = pos
 
                 i += 1
@@ -167,7 +188,10 @@ def __make_shape(self, args: List[Argument]) -> List[Argument]:
                         avail[i] = True
 
         # If at least one rank is not available, we need an explicit shape
-        if not all(avail):
-            args.append(TransUtils.build_shape(output))
+        if not all(avail) or self.metrics is not None:
+            # TODO: Test that this removes the partitioning
+            unpart_ranks = [part.get_root_name(
+                rank) for rank in output.get_ranks()]
+            args.append(TransUtils.build_shape(unpart_ranks))
 
         return args
diff --git a/teaal/trans/hifiber.py b/teaal/trans/hifiber.py
index 2dcc5d1..025e1bb 100644
--- a/teaal/trans/hifiber.py
+++ b/teaal/trans/hifiber.py
@@ -29,6 +29,7 @@
 from teaal.hifiber import *
 from teaal.ir.flow_graph import FlowGraph
 from teaal.ir.flow_nodes import *
+from teaal.ir.fusion import Fusion
 from teaal.ir.hardware import Hardware
 from teaal.ir.iter_graph import IterationGraph
 from teaal.ir.metrics import Metrics
@@ -64,9 +65,10 @@ def __init__(
         self.hardware: Optional[Hardware] = None
         self.format = format_
         if arch and bindings and arch.get_spec():
-            self.hardware = Hardware(arch, bindings)
+            self.hardware = Hardware(arch, bindings, self.program)
+            self.fusion = Fusion(self.hardware)
 
-        self.trans_utils = TransUtils()
+        self.trans_utils = TransUtils(self.program)
 
         self.hifiber = SBlock([])
         for i in range(len(einsum.get_expressions())):
@@ -83,30 +85,28 @@ def __translate(self, i: int) -> Statement:
         self.metrics: Optional[Metrics] = None
         if self.hardware and self.format:
             self.metrics = Metrics(self.program, self.hardware, self.format)
+            self.fusion.add_einsum(self.program)
 
         # Create the flow graph and get the relevant nodes
         flow_graph = FlowGraph(self.program, self.metrics, ["hoist"])
         nodes = flow_graph.get_sorted()
 
         # Create all relevant translator objects
-        self.graphics = Graphics(self.program)
+        self.graphics = Graphics(self.program, self.metrics)
         self.partitioner = Partitioner(self.program, self.trans_utils)
-        self.header = Header(self.program, self.partitioner)
+        self.header = Header(self.program, self.metrics, self.partitioner)
         self.graph = IterationGraph(self.program)
-        self.eqn = Equation(self.program)
+        self.eqn = Equation(self.program, self.metrics)
 
         if self.metrics:
-            self.collector = Collector(self.program, self.metrics)
+            self.collector = Collector(self.program, self.metrics, self.fusion)
 
-        stmt = self.__trans_nodes(nodes, 0)[1]
+        stmt = self.__trans_nodes(nodes)[1]
 
         self.program.reset()
         return stmt
 
-    def __trans_nodes(self,
-                      nodes: List[Node],
-                      depth: int) -> Tuple[int,
-                                           Statement]:
+    def __trans_nodes(self, nodes: List[Node]) -> Tuple[int, Statement]:
         """
         Recursive function to generate the actual HiFiber program
         """
@@ -115,10 +115,33 @@ def __trans_nodes(self,
         i = 0
         while i < len(nodes):
             node = nodes[i]
-            if isinstance(node, FromFiberNode):
+
+            if isinstance(node, EagerInputNode):
+                code.add(
+                    self.eqn.make_eager_inputs(
+                        node.get_rank(),
+                        node.get_tensors()))
+
+            elif isinstance(node, EndLoopNode):
+                return i + 1, code
+
+            elif isinstance(node, FromFiberNode):
                 tensor = self.program.get_equation().get_tensor(node.get_tensor())
                 code.add(Header.make_tensor_from_fiber(tensor))
 
+            elif isinstance(node, GetPayloadNode):
+                tensor = self.program.get_equation().get_tensor(node.get_tensor())
+                code.add(
+                    self.header.make_get_payload(
+                        tensor, node.get_ranks()))
+
+            elif isinstance(node, GetRootNode):
+                tensor = self.program.get_equation().get_tensor(node.get_tensor())
+                code.add(Header.make_get_root(tensor))
+
+            elif isinstance(node, IntervalNode):
+                code.add(self.eqn.make_interval(node.get_rank()))
+
             elif isinstance(node, LoopNode):
                 # Generate the for loop
                 rank, tensors = self.graph.peek_concord()
@@ -127,26 +150,45 @@ def __trans_nodes(self,
                 payload = self.eqn.make_payload(cast(str, rank), tensors)
 
                 # Recurse for the for loop body
-                j, body = self.__trans_nodes(nodes[(i + 1):], depth + 1)
+                j, body = self.__trans_nodes(nodes[(i + 1):])
                 code.add(SFor(payload, expr, body))
                 i += j
 
+            elif isinstance(node, MetricsNode):
+                if node.get_type() == "Body":
+                    code.add(self.collector.make_body())
+
+                elif node.get_type() == "Dump":
+                    code.add(self.collector.dump())
+
+                elif node.get_type() == "End":
+                    code.add(self.collector.end())
+
+                elif node.get_type() == "Start":
+                    code.add(self.collector.start())
+
+                else:
+                    raise ValueError(
+                        "Unknown node: " +
+                        repr(node))  # pragma: no cover
+
+            elif isinstance(node, MetricsFooterNode):
+                code.add(self.collector.make_loop_footer(node.get_rank()))
+
+            elif isinstance(node, MetricsHeaderNode):
+                code.add(self.collector.make_loop_header(node.get_rank()))
+
             elif isinstance(node, OtherNode):
                 if node.get_type() == "Body":
                     code.add(self.eqn.make_update())
                     code.add(self.graphics.make_body())
 
                 elif node.get_type() == "Footer":
-                    if depth == 0:
-                        code.add(
-                            Footer.make_footer(
-                                self.program,
-                                self.graphics,
-                                self.partitioner))
-
-                    else:
-                        # Pop back up a level and retry this node
-                        return i, code
+                    code.add(
+                        Footer.make_footer(
+                            self.program,
+                            self.graphics,
+                            self.partitioner))
 
                 elif node.get_type() == "Graphics":
                     code.add(self.graphics.make_header())
@@ -168,49 +210,11 @@ def __trans_nodes(self,
 
             elif isinstance(node, SwizzleNode):
                 tensor = self.program.get_equation().get_tensor(node.get_tensor())
-                code.add(self.header.make_swizzle(tensor, node.get_type()))
-
-            elif isinstance(node, GetRootNode):
-                tensor = self.program.get_equation().get_tensor(node.get_tensor())
-                code.add(Header.make_get_root(tensor))
-
-            elif isinstance(node, EagerInputNode):
-                code.add(
-                    self.eqn.make_eager_inputs(
-                        node.get_rank(),
-                        node.get_tensors()))
-
-            elif isinstance(node, IntervalNode):
-                code.add(self.eqn.make_interval(node.get_rank()))
-
-            elif isinstance(node, MetricsNode):
-                if node.get_type() == "Dump":
-                    code.add(self.collector.dump())
-
-                elif node.get_type() == "End":
-                    if depth == 0:
-                        code.add(self.collector.end())
-                    else:
-                        # Pop back up a level and retry this node
-                        return i, code
-
-                elif node.get_type() == "Start":
-                    code.add(self.collector.start())
-
-                else:
-                    raise ValueError(
-                        "Unknown node: " +
-                        repr(node))  # pragma: no cover
-
-            elif isinstance(node, CollectingNode):
                 code.add(
-                    self.collector.set_collecting(
-                        node.get_tensor(),
-                        node.get_rank()))
-
-            elif isinstance(node, GetPayloadNode):
-                tensor = self.program.get_equation().get_tensor(node.get_tensor())
-                code.add(Header.make_get_payload(tensor, node.get_ranks()))
+                    self.header.make_swizzle(
+                        tensor,
+                        node.get_ranks(),
+                        node.get_type()))
 
             else:
                 raise ValueError(
diff --git a/teaal/trans/partitioner.py b/teaal/trans/partitioner.py
index a00c5c6..88aba61 100644
--- a/teaal/trans/partitioner.py
+++ b/teaal/trans/partitioner.py
@@ -396,9 +396,8 @@ def __split_follower(
         # Make sure there is no translation needed between the leader and
         # follower tensors' ranks
         leader_tensor = self.program.get_equation().get_tensor(leader)
-        leader_rank = leader_tensor.peek()
-        assert leader_rank is not None
-        lroot = self.program.get_partitioning().get_root_name(leader_rank.upper())
+        leader_rank = leader_tensor.peek_clean()
+        lroot = self.program.get_partitioning().get_root_name(leader_rank)
         root = self.program.get_partitioning().get_root_name(rank)
         if root != lroot:
             raise ValueError(
diff --git a/teaal/trans/utils.py b/teaal/trans/utils.py
index 3a81e63..ea4abee 100644
--- a/teaal/trans/utils.py
+++ b/teaal/trans/utils.py
@@ -29,6 +29,7 @@
 from typing import Any
 
 from teaal.hifiber import *
+from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
 
 
@@ -37,8 +38,9 @@ class TransUtils:
     Different utilities for generating HiFiber programs
     """
 
-    def __init__(self) -> None:
+    def __init__(self, program: Program) -> None:
         self.count = -1
+        self.program = program
 
     @staticmethod
     def build_expr(obj: Any) -> Expression:
@@ -63,6 +65,10 @@ def build_expr(obj: Any) -> Expression:
                      for key, val in obj.items()}
             return EDict(dict_)
 
+        elif isinstance(obj, tuple):
+            tuple_ = [TransUtils.build_expr(elem) for elem in obj]
+            return ETuple(tuple(tuple_))
+
         else:
             raise ValueError("Unable to translate " +
                              str(obj) + " with type " + str(type(obj)))
@@ -85,12 +91,12 @@ def build_set_rank_ids(tensor: Tensor, name: str) -> Statement:
         return SExpr(set_call)
 
     @staticmethod
-    def build_shape(tensor: Tensor) -> Argument:
+    def build_shape(ranks: Sequence[str]) -> Argument:
         """
         Build the shape argument
         """
-        ranks = [EVar(rank) for rank in tensor.get_ranks()]
-        return AParam("shape", EList(ranks))
+        rank_vars = [EVar(rank) for rank in ranks]
+        return AParam("shape", EList(rank_vars))
 
     @staticmethod
     def build_swizzle(
diff --git a/tests/hifiber/test_op.py b/tests/hifiber/test_op.py
index ad8006e..5cb21ca 100644
--- a/tests/hifiber/test_op.py
+++ b/tests/hifiber/test_op.py
@@ -51,6 +51,11 @@ def test_omul():
     assert mul.gen() == "*"
 
 
+def test_onotin():
+    notin = ONotIn()
+    assert notin.gen() == "not in"
+
+
 def test_oor():
     or_ = OOr()
     assert or_.gen() == "|"
diff --git a/tests/integration/demo.yaml b/tests/integration/demo.yaml
index 097b804..72e2543 100644
--- a/tests/integration/demo.yaml
+++ b/tests/integration/demo.yaml
@@ -1,24 +1,66 @@
 einsum:
   declaration:
-    I: [B, C, H, W]
-    F: [C, M, R, S]
-    O: [B, M, P, Q]
+    SOB: [UA, UB]
+    T: [UA, UB, K]
+    I: [K]
+    OB: []
   expressions:
-    - O[b, m, p, q] = I[b, c, p+r, q+s]*F[c, m, r, s]
-mapping:
-  rank-order:
-    I: [B, C, H, W]
-    F: [M, C, R, S]
-    O: [B, M, P, Q]
-  partitioning:
-    O:
-      M:                              # filter partitioning
-        - uniform_shape(32)           # 2 filters happening simultaneously per array
-        - uniform_shape(16)           # 16 filters happening in interleaving fashion per PE
-      B:
-        - uniform_shape(4)
-      C:
-        - uniform_shape(6)            # 2 channels running simultaneously per PE
-        - uniform_shape(3)            # 3 channels running per PE
-  loop-order:
-    O: [B1, C2, M2, B0, M1, P, C1, R, Q, S, M0, C0]
+    - SOB[ua, ub] = T[ua, ub, k] * I[k]
+    - OB[] = SOB[ua, ub]
+# einsum:
+#   declaration:
+#     A: [S]
+#     Z: [T]
+#   expressions:
+#     - Z[t] = A[2 * t]
+# format:
+#   A:
+#     default:
+#       rank-order: [S]
+#       S:
+#         format: C
+#         pbits: 32
+#   Z:
+#     default:
+#       rank-order: [T]
+#       T:
+#         format: C
+#         pbits: 32
+# architecture:
+#   accel:
+#   - name: System
+#     local:
+#     - name: MainMemory
+#       class: DRAM
+#     subtree:
+#     - name: Chip
+#       local:
+#       - name: LLB
+#         class: Cache
+#         attributes:
+#           width: 32
+#           depth: 1024
+# bindings:
+#   Z:
+#   - config: accel
+#     prefix: tmp/demo
+#   - component: MainMemory
+#     bindings:
+#     - tensor: A
+#       rank: S
+#       type: payload
+#       format: default
+#     - tensor: Z
+#       rank: T
+#       type: payload
+#       format: default
+#   - component: LLB
+#     bindings:
+#     - tensor: A
+#       rank: S
+#       type: payload
+#       format: default
+#     - tensor: Z
+#       rank: T
+#       type: payload
+#       format: default
diff --git a/tests/integration/extensor-energy.yaml b/tests/integration/extensor-energy.yaml
new file mode 100644
index 0000000..b37b80f
--- /dev/null
+++ b/tests/integration/extensor-energy.yaml
@@ -0,0 +1,320 @@
+einsum:
+  declaration:
+    A: [K, M]
+    B: [K, N]
+    Z: [M, N]
+  expressions:
+    - Z[m,n] = A[k,m] * B[k,n]
+mapping:
+  rank-order:
+    A: [K, M]
+    B: [K, N]
+    Z: [M, N]
+  partitioning:
+    Z:
+      K: [uniform_shape(K1), uniform_shape(K0)]
+      M: [uniform_shape(M1), uniform_shape(M0)]
+      N: [uniform_shape(N1), uniform_shape(N0)]
+  loop-order:
+    Z: [N2, K2, M2, M1, N1, K1, M0, N0, K0]
+  spacetime:
+    Z:
+      space: [K1]
+      time: [N2, K2, M2, M1, N1, M0, N0, K0]
+format:
+  A:
+    default:
+      rank-order: [K2, M2, M1, K1, M0, K0]
+      # These formats do not matter because there are no cbits or pbits
+      K2:
+        format: C
+      M2:
+        format: C
+      M1:
+        format: C
+      # M1 and K1 metadata are stored as a tuple (M1, K1)
+      # of size (32 bits, 32 bits)
+      K1:
+        format: C
+        cbits: 64
+      # Microtiles are in CSF-like format
+      M0:
+        format: C
+        cbits: 32
+        pbits: 32
+      K0:
+        format: C
+        cbits: 32
+        pbits: 64
+
+  B:
+    default:
+      rank-order: [N2, K2, N1, K1, N0, K0]
+      # These formats do not matter because there are no cbits or pbits
+      N2:
+        format: C
+      K2:
+        format: C
+      # B is CSF the rest of the way down
+      N1:
+        format: C
+        cbits: 32
+        pbits: 32
+      K1:
+        format: C
+        cbits: 32
+        pbits: 32
+      N0:
+        format: C
+        cbits: 32
+        pbits: 32
+      K0:
+        format: C
+        cbits: 32
+        pbits: 64
+  Z:
+    default:
+      rank-order: [N2, M2, M1, N1, M0, N0]
+      # These ranks do not matter because there are no cbits or pbits
+      N2:
+        format: U
+      M2:
+        format: U
+      # There is never any traffic counted for the M1 and N1 ranks
+      M1:
+        format: U
+      N1:
+        format: U
+      M0:
+        format: U
+      # The bottom ranks are in a COO-like format (M0, N0) of size
+      # (32 bits, 32 bits)
+      N0:
+        format: C
+        cbits: 64
+        pbits: 64
+architecture:
+  Accelerator:
+  - name: System
+    attributes:
+      clock_frequency: 1000000000 # 1 GHz = 1000000000 Hz
+    local:
+    - name: MainMemory
+      class: DRAM
+      attributes:
+        bandwidth: 586314575512 # BW is 68.256 GB/s * 2^30 B/GB * 8 bits/B
+    - name: K2Intersect
+      class: Intersector
+      attributes:
+        type: skip-ahead
+    - name: TopSequencer
+      class: Sequencer
+      attributes:
+        num_ranks: 3
+    subtree:
+    - name: Chip
+      local:
+      - name: LLB
+        class: Buffet
+        attributes:
+          width: 64
+          depth: 3932160 # 30 MB / 8B/line
+          bandwidth: 9223372036854775807 # Max int64 (i.e., inf)
+      - name: K1Intersect
+        class: Intersector
+        attributes:
+          type: skip-ahead
+      - name: MiddleSequencer
+        class: Sequencer
+        attributes:
+          num_ranks: 3
+      subtree:
+      - name: PE[0..127] # 128 PEs
+        local:
+        - name: PEB
+          class: Buffet
+          attributes:
+            width: 64
+            depth: 8192 # 64kB / 8B/line
+        - name: K0Intersection
+          class: Intersector
+          attributes:
+            type: skip-ahead
+        - name: BottomSequencer
+          class: Sequencer
+          attributes:
+            num_ranks: 3
+        - name: FPMul
+          class: Compute
+          attributes:
+            type: mul
+        - name: FPAdd
+          class: Compute
+          attributes:
+            type: add
+bindings:
+  Z:
+  - config: Accelerator
+    prefix: tmp/extensor_energy
+  - component: MainMemory
+    bindings:
+    - tensor: A
+      rank: K1
+      type: coord
+      format: default
+    - tensor: A
+      rank: M0
+      type: coord
+      format: default
+    - tensor: A
+      rank: M0
+      type: payload
+      format: default
+    - tensor: A
+      rank: K0
+      type: coord
+      format: default
+    - tensor: A
+      rank: K0
+      type: payload
+      format: default
+    - tensor: B
+      rank: N1
+      type: coord
+      format: default
+    - tensor: B
+      rank: N1
+      type: payload
+      format: default
+    - tensor: B
+      rank: K1
+      type: coord
+      format: default
+    - tensor: B
+      rank: K1
+      type: payload
+      format: default
+    - tensor: B
+      rank: N0
+      type: coord
+      format: default
+    - tensor: B
+      rank: N0
+      type: payload
+      format: default
+    - tensor: B
+      rank: K0
+      type: coord
+      format: default
+    - tensor: B
+      rank: K0
+      type: payload
+      format: default
+    - tensor: Z
+      rank: N0
+      type: coord
+      format: default
+    - tensor: Z
+      rank: N0
+      type: payload
+      format: default
+  - component: K2Intersect
+    bindings:
+    - rank: K2
+  - component: TopSequencer
+    bindings:
+    - rank: N2
+    - rank: K2
+    - rank: M2
+  - component: LLB
+    bindings:
+    - tensor: A
+      rank: K1
+      type: coord
+      evict-on: M2
+      format: default
+      style: lazy
+    - tensor: A
+      rank: M0
+      type: coord
+      evict-on: M2
+      format: default
+      style: eager
+    - tensor: B
+      rank: N1
+      type: coord
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: N1
+      type: payload
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: K1
+      type: coord
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: K1
+      type: payload
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: N0
+      type: coord
+      evict-on: K2
+      format: default
+      style: eager
+    - tensor: Z
+      rank: M0
+      type: coord
+      evict-on: M2
+      format: default
+      style: eager
+  - component: K1Intersect
+    bindings:
+    - rank: K1
+  - component: MiddleSequencer
+    bindings:
+    - rank: M1
+    - rank: N1
+    - rank: K1
+  - component: PEB
+    bindings:
+    - tensor: A
+      rank: M0
+      type: coord
+      evict-on: K1
+      format: default
+      style: eager
+    - tensor: B
+      rank: N0
+      type: coord
+      evict-on: K1
+      format: default
+      style: eager
+    - tensor: Z
+      rank: M0
+      type: coord
+      evict-on: N1
+      format: default
+      style: eager
+  - component: K0Intersection
+    bindings:
+    - rank: K0
+  - component: BottomSequencer
+    bindings:
+    - rank: M0
+    - rank: N0
+    - rank: K0
+  - component: FPMul
+    bindings:
+    - op: mul
+  - component: FPAdd
+    bindings:
+    - op: add
diff --git a/tests/integration/extensor.yaml b/tests/integration/extensor.yaml
new file mode 100644
index 0000000..4a4728f
--- /dev/null
+++ b/tests/integration/extensor.yaml
@@ -0,0 +1,272 @@
+einsum:
+  declaration:
+    A: [K, M]
+    B: [K, N]
+    Z: [M, N]
+  expressions:
+    - Z[m,n] = A[k,m] * B[k,n]
+mapping:
+  rank-order:
+    A: [K, M]
+    B: [K, N]
+    Z: [M, N]
+  partitioning:
+    Z:
+      K: [uniform_shape(K1), uniform_shape(K0)]
+      M: [uniform_shape(M1), uniform_shape(M0)]
+      N: [uniform_shape(N1), uniform_shape(N0)]
+  loop-order:
+    Z: [N2, K2, M2, M1, N1, K1, M0, N0, K0]
+  spacetime:
+    Z:
+      space: [K1]
+      time: [N2, K2, M2, M1, N1, M0, N0, K0]
+format:
+  A:
+    default:
+      rank-order: [K2, M2, M1, K1, M0, K0]
+      # These formats do not matter because there are no cbits or pbits
+      K2:
+        format: C
+      M2:
+        format: C
+      M1:
+        format: C
+      # M1 and K1 metadata are stored as a tuple (M1, K1)
+      # of size (32 bits, 32 bits)
+      K1:
+        format: C
+        cbits: 64
+      # Microtiles are in CSF-like format
+      M0:
+        format: C
+        cbits: 32
+        pbits: 32
+      K0:
+        format: C
+        cbits: 32
+        pbits: 64
+
+  B:
+    default:
+      rank-order: [N2, K2, N1, K1, N0, K0]
+      # These formats do not matter because there are no cbits or pbits
+      N2:
+        format: C
+      K2:
+        format: C
+      # B is CSF the rest of the way down
+      N1:
+        format: C
+        cbits: 32
+        pbits: 32
+      K1:
+        format: C
+        cbits: 32
+        pbits: 32
+      N0:
+        format: C
+        cbits: 32
+        pbits: 32
+      K0:
+        format: C
+        cbits: 32
+        pbits: 64
+  Z:
+    default:
+      rank-order: [N2, M2, M1, N1, M0, N0]
+      # These ranks do not matter because there are no cbits or pbits
+      N2:
+        format: U
+      M2:
+        format: U
+      # There is never any traffic counted for the M1 and N1 ranks
+      M1:
+        format: U
+      N1:
+        format: U
+      M0:
+        format: U
+      # The bottom ranks are in a COO-like format (M0, N0) of size
+      # (32 bits, 32 bits)
+      N0:
+        format: C
+        cbits: 64
+        pbits: 64
+architecture:
+  Accelerator:
+  - name: System
+    attributes:
+      clock_frequency: 1000000000 # 1 GHz = 1000000000 Hz
+    local:
+    - name: MainMemory
+      class: DRAM
+      attributes:
+        bandwidth: 586314575512 # BW is 68.256 GB/s * 2^30 B/GB * 8 bits/B
+    - name: K2Intersect
+      class: Intersector
+      attributes:
+        type: skip-ahead
+    subtree:
+    - name: Chip
+      local:
+      - name: LLB
+        class: Buffet
+        attributes:
+          width: 64
+          depth: 3932160 # 30 MB / 8B/line
+      - name: K1Intersect
+        class: Intersector
+        attributes:
+          type: skip-ahead
+      subtree:
+      - name: PE[0..127] # 128 PEs
+        local:
+        - name: PEB
+          class: Buffet
+          attributes:
+            width: 64
+            depth: 8192 # 64kB / 8B/line
+        - name: K0Intersection
+          class: Intersector
+          attributes:
+            type: skip-ahead
+        - name: FPMul
+          class: Compute
+          attributes:
+            type: mul
+        - name: FPAdd
+          class: Compute
+          attributes:
+            type: add
+bindings:
+  Z:
+  - config: Accelerator
+    prefix: tmp/extensor
+  - component: MainMemory
+    bindings:
+    - tensor: A
+      rank: K1
+      type: coord
+      format: default
+    - tensor: A
+      rank: M0
+      type: coord
+      format: default
+    - tensor: A
+      rank: M0
+      type: payload
+      format: default
+    - tensor: A
+      rank: K0
+      type: coord
+      format: default
+    - tensor: A
+      rank: K0
+      type: payload
+      format: default
+    - tensor: B
+      rank: N1
+      type: coord
+      format: default
+    - tensor: B
+      rank: N1
+      type: payload
+      format: default
+    - tensor: B
+      rank: K1
+      type: coord
+      format: default
+    - tensor: B
+      rank: K1
+      type: payload
+      format: default
+    - tensor: B
+      rank: N0
+      type: coord
+      format: default
+    - tensor: B
+      rank: N0
+      type: payload
+      format: default
+    - tensor: B
+      rank: K0
+      type: coord
+      format: default
+    - tensor: B
+      rank: K0
+      type: payload
+      format: default
+    - tensor: Z
+      rank: N0
+      type: coord
+      format: default
+    - tensor: Z
+      rank: N0
+      type: payload
+      format: default
+  - component: K2Intersect
+    bindings:
+    - rank: K2
+  - component: LLB
+    bindings:
+    - tensor: A
+      rank: K1
+      type: coord
+      evict-on: M2
+      format: default
+      style: lazy
+    - tensor: A
+      rank: M0
+      type: coord
+      evict-on: M2
+      format: default
+      style: eager
+    - tensor: B
+      rank: N1
+      type: coord
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: N1
+      type: payload
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: K1
+      type: coord
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: K1
+      type: payload
+      evict-on: K2
+      format: default
+      style: lazy
+    - tensor: B
+      rank: N0
+      type: coord
+      evict-on: K2
+      format: default
+      style: eager
+    - tensor: Z
+      rank: M0
+      type: coord
+      evict-on: M2
+      format: default
+      style: eager
+  - component: K1Intersect
+    bindings:
+    - rank: K1
+  - component: K0Intersection
+    bindings:
+    - rank: K0
+  - component: FPMul
+    bindings:
+    - op: mul
+  - component: FPAdd
+    bindings:
+    - op: add
diff --git a/tests/integration/gamma.yaml b/tests/integration/gamma.yaml
index 4f80205..d534b4b 100644
--- a/tests/integration/gamma.yaml
+++ b/tests/integration/gamma.yaml
@@ -4,155 +4,238 @@ einsum:
     B: [K, N]
     T: [K, M, N]
     Z: [M, N]
-
   expressions:
     - T[k,m,n] = take(A[k,m], B[k,n], 1)
-    - Z[m,n] = T[k,m,n] * A[k,m]
-
+    - Z[m,n] = T[k,m,n]*A[k,m]
 mapping:
   rank-order:
     A: [M, K]
     B: [K, N]
     T: [M, K, N]
     Z: [M, N]
-
   loop-order:
     T: [M, K, N]
     Z: [M, N, K]
-
+  spacetime:
+    T:
+      space: [M]
+      time: [K, N]
+    Z:
+      space: [M]
+      time: [N, K]
+format:
+  A:
+    default:
+      rank-order: [M, K]
+      M:
+        format: U
+        pbits: 32
+      K:
+        format: C
+        cbits: 32
+        pbits: 64
+  B:
+    default:
+      rank-order: [K, N]
+      K:
+        format: U
+        pbits: 32
+      N:
+        format: C
+        cbits: 32
+        pbits: 64
+  Z:
+    default:
+      rank-order: [M, N]
+      M:
+        format: U
+        pbits: 32
+      N:
+        format: C
+        cbits: 32
+        pbits: 64
 architecture:
-  subtree:
+  Accelerator:
   - name: System
     attributes:
-      clock_frequency: 1000000000
-
+      clock_frequency: 1000000000 # 1 GHz = 1 * 10^9 Hz = 1000000000 Hz
     local:
     - name: MainMemory
       class: DRAM
       attributes:
-        datawidth: 8
-        bandwidth: 128
-
+        bandwidth: 1099511627776 # 128 GB/s = 128 * 2^30 * 8 bits/s
     subtree:
     - name: Chip
-
       local:
-      - name: FiberCache # 3MB FiberCache
+      - name: FiberCache
         class: Cache
         attributes:
-          width: 8
-          depth: 3145728
-
+          width: 64 # Block size is not mentioned in the paper; minimum correct value
+          depth: 393216 # 3 MB / 8B = 393216 lines
       subtree:
       - name: PE[0..31] # 32 PEs
-
+        local:
+        - name: Stage0RegFile
+          class: Buffet
+          attributes:
+            width: 64
+            depth: inf # Not specified by Gamma
+        - name: Stage1RegFile
+          class: Buffet
+          attributes:
+            width: 64
+            depth: inf # Not specified by Gamma
         subtree:
-        - name: Stage0
-
+        - name: Stage0[0..31]
           local:
-          - name: RegFile0
-            class: Buffet
-
-          - name: Intersection
-            class: LeaderFollower
-
-        - name: Stage0to1
-
+          - name: Intersect
+            class: Intersector
+            attributes:
+              type: leader-follower
+        - name: Stage0to1[0..31]
           local:
           - name: HighRadixMerger
             class: Merger
             attributes:
-              radix: 64
-              next_latency: 1
-
-        - name: Stage1
-
+              inputs: 64
+              comparator_radix: 64
+              outputs: 1
+              order: fifo
+              reduce: False
+        - name: Stage1[0..31]
           local:
-          - name: RegFile1
-            class: Buffet
-
-          - name: MAC
+          - name: FPMul
             class: compute
+            attributes:
+              type: mul
+          - name: FPAdd
+            class: compute
+            attributes:
+              type: add
 bindings:
-- name: MainMemory
-  bindings:
-  - tensor: A
-    rank: root
-  - tensor: B
-    rank: root
-  - tensor: Z
-    rank: root
-
-- name: FiberCache
-  bindings:
-  - tensor: B
-    rank: K
-
-- name: RegFile0
-  bindings:
-  - tensor: A
-    rank: M
-  - tensor: B
-    rank: N
-  - tensor: T
-    rank: M
-
-- name: Intersection
-  bindings:
-  - einsum: T
-    rank: K
-    leader: A
-
-- name: HighRadixMerger
-  bindings:
-  # T[M, K, N] -> T[M, N, K]
-  - tensor: T
-    init_ranks: [M, K, N]
-    swap_depth: 1
-
-- name: RegFile1
-  bindings:
-  - tensor: A
-    rank: M
-  - tensor: T
-    rank: M
-  - tensor: Z
-    rank: N
-
-- name: MAC
-  bindings:
-  - einsum: Z
-    op: mul
-  - einsum: Z
-    op: add
-
-format:
-  A:
-    M:
-      format: U
-      rhbits: 32
-      pbits: 32
-    K:
-      format: C
-      cbits: 32
-      pbits: 64
-
-  B:
-    K:
-      format: U
-      rhbits: 32
-      pbits: 32
-    N:
-      format: C
-      cbits: 32
-      pbits: 64
-
+  T:
+  - config: Accelerator
+    prefix: tmp/gamma_T
+  - component: MainMemory
+    bindings:
+    - tensor: A
+      rank: M
+      type: payload
+      format: default
+    - tensor: A
+      rank: K
+      type: coord
+      format: default
+    - tensor: A
+      rank: K
+      type: payload
+      format: default
+    - tensor: B
+      rank: K
+      type: payload
+      format: default
+    - tensor: B
+      rank: N
+      type: coord
+      format: default
+    - tensor: B
+      rank: N
+      type: payload
+      format: default
+  - component: FiberCache
+    bindings:
+    - tensor: B
+      rank: K
+      type: payload
+      format: default
+    - tensor: B
+      rank: N
+      type: coord
+      format: default
+    - tensor: B
+      rank: N
+      type: payload
+      format: default
+  - component: Stage0RegFile
+    bindings:
+    - tensor: A
+      rank: M
+      type: payload
+      format: default
+      evict-on: root
+    - tensor: A
+      rank: K
+      type: coord
+      format: default
+      evict-on: M
+    - tensor: A
+      rank: K
+      type: payload
+      format: default
+      evict-on: M
+  - component: Intersect
+    bindings:
+    - rank: K
+      leader: A
   Z:
-    M:
-      format: U
-      rhbits: 32
-      pbits: 32
-    N:
-      format: C
-      cbits: 32
-      pbits: 64
+  - config: Accelerator
+    prefix: tmp/gamma_Z
+  - component: MainMemory
+    bindings:
+    - tensor: Z
+      rank: M
+      type: payload
+      format: default
+    - tensor: Z
+      rank: N
+      type: coord
+      format: default
+    - tensor: Z
+      rank: N
+      type: payload
+      format: default
+  - component: Stage0RegFile
+    bindings:
+    - tensor: A
+      rank: M
+      type: payload
+      format: default
+      evict-on: root
+    - tensor: A
+      rank: K
+      type: coord
+      format: default
+      evict-on: M
+    - tensor: A
+      rank: K
+      type: payload
+      format: default
+      evict-on: M
+  - component: Stage1RegFile
+    bindings:
+    - tensor: Z
+      rank: M
+      type: payload
+      format: default
+      evict-on: root
+    - tensor: Z
+      rank: N
+      type: coord
+      format: default
+      evict-on: M
+    - tensor: Z
+      rank: N
+      type: payload
+      format: default
+      evict-on: M
+  - component: HighRadixMerger
+    bindings:
+    - tensor: T
+      init-ranks: [M, K, N]
+      final-ranks: [M, N, K]
+  - component: FPMul
+    bindings:
+    - op: mul
+  - component: FPAdd
+    bindings:
+    - op: add
diff --git a/tests/integration/outerspace.yaml b/tests/integration/outerspace.yaml
index 69b8a08..bee355e 100644
--- a/tests/integration/outerspace.yaml
+++ b/tests/integration/outerspace.yaml
@@ -2,142 +2,328 @@ einsum:
   declaration:
     A: [K, M]
     B: [K, N]
-    T: [K, M, N]
+    T0: [K, M, N]
+    T1: [K, M, N]
     Z: [M, N]
   expressions:
-    - T[k, m, n] = A[k, m] * B[k, n]
-    - Z[m, n] = T[k, m, n]
-
+    - T0[k, m, n] = A[k, m] * B[k, n]
+    - T1[k, m, n] = T0[k, m, n]
+    - Z[m, n] = T1[k, m, n]
 mapping:
   rank-order:
     A: [K, M]
     B: [K, N]
-    T: [M, K, N]
+    T0: [M, K, N]
+    T1: [M, K, N]
     Z: [M, N]
   loop-order:
-    T: [K, M, N]
+    T0: [K, M, N]
+    T1: [M, K, N]
     Z: [M, N, K]
-
+  spacetime:
+    T0:
+      space: [M]
+      time: [K, N]
+    T1:
+      space: [M]
+      time: [K, N]
+    Z:
+      space: [M]
+      time: [N, K]
+format:
+  A:
+    default:
+      rank-order: [K, M]
+      K:
+        format: U
+        pbits: 32
+      M:
+        format: C
+        cbits: 32
+        pbits: 64
+  B:
+    default:
+      rank-order: [K, N]
+      K:
+        format: U
+        pbits: 32
+      N:
+        format: C
+        cbits: 32
+        pbits: 64
+  T0:
+    LinkedLists:
+      rank-order: [M, K, N]
+      M:
+          format: U
+          pbits: 32
+      K:
+          format: C
+          pbits: 32
+      N:
+          format: C
+          layout: interleaved
+          cbits: 32
+          pbits: 64
+  T1:
+    LinkedLists:
+      rank-order: [M, K, N]
+      M:
+          format: U
+          pbits: 32
+      K:
+          format: C
+          pbits: 32
+      N:
+          format: C
+          layout: interleaved
+          cbits: 32
+          pbits: 64
+  Z:
+    default:
+      rank-order: [M, N]
+      M:
+        format: U
+        pbits: 32
+      N:
+        format: C
+        cbits: 32
+        pbits: 64
 architecture:
-  subtree:
+  MultiplyPhase:
   - name: System
     attributes:
-      clock_frequency: 1500000000
-
+      clock_frequency: 1500000000 # 1.5 GHz = 1.5 * 10^9 Hz = 1500000000 Hz
     local:
     - name: MainMemory
       class: DRAM
       attributes:
-        datawidth: 8
-        bandwidth: 85.333
-
+        bandwidth: 1099511627776 # 128 GB/s = 128 * 2^30 * 8 bits/s
     subtree:
     - name: Chip
       subtree:
-      - name: PT[0..15]
-
+      - name: PT[0..15] # 16 PTs
         local:
-        - name: CacheSPM
+        - name: Cache
           class: Cache
           attributes:
-            width: 512
-            depth: inf
-
-          subtree:
-          - name: PE[0..256] # 16 PEs per PT
-            local:
-            - name: RegFile
-              class: Buffet
-
-            - name: Sort
-              class: Merger
-              attributes:
-                radix: inf
-                next_latency: N
-
-            - name: Compute
-              class: compute
-
+            width: 64
+            depth: 2048 # 16kB / 8B = 2048 lines
+        subtree:
+        - name: PE[0..256] # 16 PEs per PT
+          local:
+          - name: RegFile
+            class: Buffet
+            attributes:
+              width: 64 # Block size is not mentioned in the paper; minimum correct value
+              depth: 128 # 1kB / 8B = 128 lines
+          - name: FPMul
+            class: compute
+            attributes:
+              type: mul
+  MergePhase:
+  - name: System
+    attributes:
+      clock_frequency: 1500000000 # 1.5 GHz = 1.5 * 10^9 Hz = 1500000000 Hz
+    local:
+    - name: MainMemory
+      class: DRAM
+      attributes:
+        bandwidth: 1099511627776 # 128 GB/s = 128 * 2^30 * 8 bits/s
+    subtree:
+    - name: Chip
+      subtree:
+      - name: PT[0..15] # 16 PTs
+        subtree:
+        - name: PE[0..128]
+          local:
+          - name: SPM
+            class: Buffet
+            attributes:
+              width: 96
+              depth: 171 # 2kB / 12B = 170.666
+          - name: SortHW
+            class: Merger
+            attributes:
+              inputs: inf
+              comparator_radix: inf
+              outputs: 1
+              order: fifo
+              reduce: False
+          - name: RegFile
+            class: Buffet
+            attributes:
+              width: 64 # Block size is not mentioned in the paper; minimum correct value
+              depth: 128 # 1kB / 8B = 16 lines
+          - name: FPAdd
+            class: compute
+            attributes:
+              type: add
 bindings:
-  - name: MainMemory
+  T0:
+  - config: MultiplyPhase
+    prefix: tmp/outerspace_T0
+  - component: MainMemory
     bindings:
     - tensor: A
-      rank: root
+      rank: K
+      type: payload
+      format: default
+    - tensor: A
+      rank: M
+      type: coord
+      format: default
+    - tensor: A
+      rank: M
+      type: payload
+      format: default
     - tensor: B
-      rank: root
-    - tensor: T
-      rank: root
-    - tensor: Z
-      rank: root
-
-  - name: CacheSPM
+      rank: K
+      type: payload
+      format: default
+    - tensor: B
+      rank: N
+      type: coord
+      format: default
+    - tensor: B
+      rank: N
+      type: payload
+      format: default
+  - component: Cache
     bindings:
     - tensor: B
-      ranks: root
-
-  - name: RegFile
+      rank: N
+      type: coord
+      format: default
+    - tensor: B
+      rank: N
+      type: payload
+      format: default
+  - component: RegFile
     bindings:
+    - tensor: A
+      rank: K
+      type: payload
+      evict-on: root
+      format: default
+    - tensor: A
+      rank: M
+      type: coord
+      evict-on: K
+      format: default
     - tensor: A
       rank: M
+      type: payload
+      evict-on: K
+      format: default
     - tensor: B
+      rank: K
+      type: payload
+      evict-on: root
+      format: default
+  - component: FPMul
+    bindings:
+    - op: mul
+  T1:
+  - config: MergePhase
+    prefix: tmp/outerspace_T1
+  - component: MainMemory
+    bindings:
+    - tensor: T0
+      rank: M
+      type: payload
+      format: LinkedLists
+    - tensor: T0
+      rank: K
+      type: payload
+      format: LinkedLists
+    - tensor: T0
+      rank: N
+      type: elem
+      format: LinkedLists
+    - tensor: T1
+      rank: M
+      type: payload
+      format: LinkedLists
+    - tensor: T1
+      rank: K
+      type: payload
+      format: LinkedLists
+    - tensor: T1
+      rank: N
+      type: elem
+      format: LinkedLists
+  - component: SPM
+    bindings:
+    - tensor: T0
+      rank: M
+      type: payload
+      format: LinkedLists
+      evict-on: root
+    - tensor: T0
+      rank: K
+      type: payload
+      format: LinkedLists
+      evict-on: M
+    - tensor: T0
+      rank: N
+      type: elem
+      format: LinkedLists
+      evict-on: K
+    - tensor: T1
+      rank: M
+      type: payload
+      format: LinkedLists
+      evict-on: root
+    - tensor: T1
+      rank: K
+      type: payload
+      format: LinkedLists
+      evict-on: M
+    - tensor: T1
       rank: N
-    - tensor: T
-      rank: root
+      type: elem
+      format: LinkedLists
+      evict-on: K
+  Z:
+  - config: MergePhase
+    prefix: tmp/outerspace_Z
+  - component: MainMemory
+    bindings:
+    - tensor: Z
+      rank: M
+      type: payload
+      format: default
     - tensor: Z
       rank: N
-
-  - name: Sort
+      type: coord
+      format: default
+    - tensor: Z
+      rank: N
+      type: payload
+      format: default
+  - component: SortHW
     bindings:
-    - tensor: T
-      init_ranks: [M, K, N]
-      swap_depth: 1
-
-  - name: Compute
+    - tensor: T1
+      init-ranks: [M, K, N]
+      final-ranks: [M, N, K]
+  - component: RegFile
     bindings:
-    - einsum: T
-      op: mul
-    - einsum: Z
-      op: add
-
-format:
-  A:
-    M:
-      format: U
-      rhbits: 32
-      pbits: 32
-    K:
-      format: C
-      cbits: 32
-      pbits: 64
-
-  B:
-    K:
-      format: U
-      rhbits: 32
-      pbits: 32
-    N:
-      format: C
-      cbits: 32
-      pbits: 64
-
-  T:
-    M:
-      format: U
-      pbits: 32
-    K:
-      format: C
-      pbits: 32
-    N:
-      format: C
-      cbits: 32
-      pbits: 64
-
-  Z:
-    M:
-      format: U
-      rhbits: 32
-      pbits: 32
-    N:
-      format: C
-      cbits: 32
-      pbits: 64
+    - tensor: Z
+      rank: M
+      type: payload
+      format: default
+      evict-on: root
+    - tensor: Z
+      rank: N
+      type: coord
+      format: default
+      evict-on: M
+    - tensor: Z
+      rank: N
+      type: payload
+      format: default
+      evict-on: M
+  - component: FPAdd
+    bindings:
+    - op: add
diff --git a/tests/integration/sigma.yaml b/tests/integration/sigma.yaml
new file mode 100644
index 0000000..8456032
--- /dev/null
+++ b/tests/integration/sigma.yaml
@@ -0,0 +1,117 @@
+einsum:
+  declaration:
+    A: [K, M]
+    B: [K, N]
+    Z: [M, N]
+  expressions:
+    - Z[m, n] = A[k, m] * B[k, n]
+mapping:
+  rank-order:
+    A: [K, M]
+    B: [K, N]
+    Z: [M, N]
+  partitioning:
+    Z:
+      K:
+        - uniform_shape(128)
+      (M, K0):
+        - flatten()
+      MK0:
+        - uniform_occupancy(A.16384)
+  loop-order:
+    Z: [K1, MK01, N, MK00]
+  spacetime:
+    Z:
+      space: [MK00]
+      time: [K1, MK01, N]
+architecture:
+  Accelerator:
+  - name: System
+    attributes:
+      clock_frequency: 500000000 # 500 MHz = 500000000 Hz
+    local:
+    - name: MainMemory
+      class: DRAM
+      attributes:
+        bandwidth: 8796093022208 # 1024 GB/s * 2^30 B/GB * 8 bits/B
+    subtree:
+    - name: Chip
+      local:
+      - name: DataSRAMBanks
+        class: Buffet
+        attributes:
+          width: 32 # Not specified; minimum possible
+          depth: 8388608 # 32MB / 4B/line = 8388608 lines
+          bandwidth:  8246337208320 # 960 GB/s * 2^30 B/GB * 8 bits/B
+      subtree:
+      - name: FlexDPE[0..127] # 128 FlexDPEs
+        subtree:
+        - name: PE[0..16383] # 128 PEs per FlexDPE
+          local:
+          - name: RegFile
+            class: Buffet
+            attributes:
+              width: 4096 # Distribution network width: 128 * 32 bits
+              depth: 256 # 128 * 128 PEs * 32 bits * 2 inputs / width
+          - name: Multiplier
+            class: Compute
+            attributes:
+              type: mul
+format:
+  A:
+    flattened:
+      rank-order: [K1, MK01, MK00]
+      K1:
+        format: U
+      MK01:
+        format: U
+      MK00:
+        # TODO: Support B format
+        format: C
+        pbits: 32
+  B:
+    partitioned:
+      rank-order: [K1, N, K0]
+      K1:
+        format: U
+      N:
+        format: U
+      K0:
+        format: U
+        pbits: 32
+
+bindings:
+  Z:
+  - config: Accelerator
+    prefix: tmp/sigma
+  - component: DataSRAMBanks
+    bindings:
+    - tensor: A
+      rank: MK00
+      type: payload
+      evict-on: root
+      format: flattened
+      style: eager
+    - tensor: B
+      rank: K0
+      type: payload
+      evict-on: root
+      format: partitioned
+      style: eager
+  - component: RegFile
+    bindings:
+    - tensor: A
+      rank: MK00
+      format: flattened
+      type: payload
+      evict-on: MK01
+      style: eager
+    - tensor: B
+      rank: K0
+      format: partitioned
+      type: payload
+      evict-on: N
+      style: eager
+  - component: Multiplier
+    bindings:
+    - op: mul
diff --git a/tests/integration/test_arch.yaml b/tests/integration/test_arch.yaml
index 1eead4c..a29d1bb 100644
--- a/tests/integration/test_arch.yaml
+++ b/tests/integration/test_arch.yaml
@@ -1,22 +1,53 @@
-  architecture:
-    subtree:
-    - name: System
+architecture:
+  Config0:
+  - name: System
+    attributes:
+      clock_frequency: 1000000000
+
+    local:
+    - name: Memory
+      class: DRAM
       attributes:
-        clock_frequency: 1000000000
+        datawidth: 8
+        bandwidth: 128
+
+    subtree:
+    - name: PE[0..7]
 
       local:
-      - name: Memory
-        class: DRAM
+      - name: Registers
+        class: Buffet
+
+      - name: MAC
+        class: compute
         attributes:
-          datawidth: 8
-          bandwidth: 128
+          type: mul
 
-      subtree:
-      - name: PE[0..7]
+  Config1:
+  - name: System
+    attributes:
+      clock_frequency: 1000000000
 
-        local:
-        - name: Registers
-          class: Buffet
+    local:
+    - name: Memory
+      class: DRAM
+      attributes:
+        datawidth: 8
+        bandwidth: 128
 
-        - name: MAC
-          class: compute
+    subtree:
+    - name: PE[0..7]
+
+      local:
+      - name: Registers
+        class: Buffet
+
+      - name: MAC0
+        class: compute
+        attributes:
+          type: mul
+
+      - name: MAC1
+        class: compute
+        attributes:
+          type: add
diff --git a/tests/integration/test_bindings.yaml b/tests/integration/test_bindings.yaml
index abb98c7..6e5a254 100644
--- a/tests/integration/test_bindings.yaml
+++ b/tests/integration/test_bindings.yaml
@@ -1,19 +1,34 @@
 bindings:
-  - name: Memory
+  Z:
+  - config: Config0
+    prefix: tmp/Z
+  - component: Memory
     bindings:
     - tensor: A
-      rank: root
+      format: A_default
+      rank: M
+      type: payload
+
     - tensor: Z
-      rank: root
+      format: Z_default
+      rank: M
+      type: payload
 
-  - name: Registers
+  - component: Registers
     bindings:
     - tensor: A
+      format: A_default
       rank: M
+      type: payload
+      style: eager
+      evict-on: M
+
     - tensor: Z
+      format: Z_default
       rank: M
+      type: payload
+      evict-on: root
 
-  - name: MAC
+  - component: MAC
     bindings:
-    - einsum: Z
-      op: add
+    - op: add
diff --git a/tests/integration/test_integration.py b/tests/integration/test_integration.py
index 2ae362f..6059872 100644
--- a/tests/integration/test_integration.py
+++ b/tests/integration/test_integration.py
@@ -41,4 +41,6 @@ def test_integration():
         output = str(HiFiber(einsum, mapping))
 
         hifiber = read_hifiber(filename + ".py")
+        if output != hifiber:
+            print(output)
         assert output == hifiber, test_name + " integration test failed!"
\ No newline at end of file
diff --git a/tests/ir/test_component.py b/tests/ir/test_component.py
index 89eb55b..1ee82a9 100644
--- a/tests/ir/test_component.py
+++ b/tests/ir/test_component.py
@@ -1,95 +1,566 @@
+import pytest
+
 from teaal.ir.component import *
 
 
 def test_component_get_name():
-    component = Component("Test", {}, [])
+    component = Component("Test", 1, {}, {})
     assert component.get_name() == "Test"
 
 
+def test_component_get_num_instances():
+    component = Component("Test", 5, {}, {})
+    assert component.get_num_instances() == 5
+
+
 def test_component_eq():
-    component0 = Component("Test", {"attr0": 5}, [])
-    component1 = Component("Test", {"attr0": 5}, [])
+    component0 = Component("Test", 1, {"attr0": 5}, {})
+    component1 = Component("Test", 1, {"attr0": 5}, {})
 
     assert component0 == component1
     assert component0 != "foo"
 
 
+def test_component_hash():
+    set_ = set()
+    set_.add(Component("foo", 1, {}, {"Z": [{"foo": "bar"}]}))
+
+    assert Component("foo", 1, {}, {"Z": [{"foo": "bar"}]}) in set_
+    assert "" not in set_
+
+
 def test_component_repr():
-    component = Component("Test", {"attrs0": 5}, [])
-    assert repr(component) == "(Component, Test, {'attrs0': 5}, {})"
+    component = Component("Test", 1, {"attrs0": 5}, {"Z": [{"foo": "bar"}]})
+    assert repr(component) == "(Component, Test, 1, {'Z': [{'foo': 'bar'}]})"
 
 
-def test_component_subclass_repr():
-    bindings = [{"einsum": "Z", "op": "add"}, {"einsum": "Z", "op": "mul"}]
-    compute = ComputeComponent("MAC", {}, bindings)
+def test_component_get_bindings():
+    component = Component("Test", 1, {"attrs0": 5}, {"Z": [{"foo": "bar"}]})
 
-    assert repr(
-        compute) == "(ComputeComponent, MAC, {}, {'Z': [{'op': 'add'}, {'op': 'mul'}]})"
+    assert component.get_bindings() == {"Z": [{"foo": "bar"}]}
 
 
-def test_compute_component():
-    bindings = [{"einsum": "Z", "op": "add"}, {"einsum": "Z", "op": "mul"}]
-    compute = ComputeComponent("MAC", {}, bindings)
+def test_functional_component():
+    bindings = {"Z": [{"op": "add"}], "T": [{"op": "mul"}]}
+    compute = FunctionalComponent("MAC", 1, {}, bindings)
+
+    assert compute.get_bindings() == bindings
+
+    assert repr(compute) in {
+        "(FunctionalComponent, MAC, 1, {'T': [{'op': 'mul'}], 'Z': [{'op': 'add'}]})",
+        "(FunctionalComponent, MAC, 1, {'Z': [{'op': 'add'}], 'T': [{'op': 'mul'}]})"}
+
+
+def test_memory_attr_errs():
+    with pytest.raises(ValueError) as excinfo:
+        MemoryComponent("Mem", 1, {"bandwidth": "foo"}, {})
+    assert str(excinfo.value) == "Bad bandwidth foo for Memory Mem"
+
+    memory = MemoryComponent("Mem", 1, {}, {})
+    with pytest.raises(ValueError) as excinfo:
+        memory.get_bandwidth()
+    assert str(excinfo.value) == "Bandwidth unspecified for component Mem"
+
+
+def test_memory_binding_errs():
+    binding = {"Z": [{"rank": "M", "type": "elem", "format": "default"}]}
+    with pytest.raises(ValueError) as excinfo:
+        MemoryComponent("Mem", 1, {"bandwidth": 256}, binding)
+    assert str(
+        excinfo.value) == "Tensor not specified for Einsum Z in binding to Mem"
+
+    binding = {"Z": [{"tensor": "A", "type": "elem", "format": "default"}]}
+    with pytest.raises(ValueError) as excinfo:
+        MemoryComponent("Mem", 1, {"bandwidth": 256}, binding)
+    assert str(
+        excinfo.value) == "Rank not specified for tensor A in Einsum Z in binding to Mem"
+
+    binding = {"Z": [{"tensor": "A", "rank": "M", "format": "default"}]}
+    with pytest.raises(ValueError) as excinfo:
+        MemoryComponent("Mem", 1, {"bandwidth": 256}, binding)
+    assert str(
+        excinfo.value) == "Type not specified for tensor A in Einsum Z in binding to Mem"
 
-    assert compute.get_bindings("Z") == [{"op": "add"}, {"op": "mul"}]
-    assert compute.get_bindings("T") == []
+    binding = {"Z": [{"tensor": "A", "rank": "M",
+                      "type": "foo", "format": "default"}]}
+    with pytest.raises(ValueError) as excinfo:
+        MemoryComponent("Mem", 1, {"bandwidth": 256}, binding)
+    assert str(
+        excinfo.value) in {
+        "Type foo for Mem on tensor A in Einsum Z not one of {'coord', 'elem', 'payload'}",
+        "Type foo for Mem on tensor A in Einsum Z not one of {'coord', 'payload', 'elem'}",
+        "Type foo for Mem on tensor A in Einsum Z not one of {'payload', 'coord', 'elem'}",
+        "Type foo for Mem on tensor A in Einsum Z not one of {'payload', 'elem', 'coord'}",
+        "Type foo for Mem on tensor A in Einsum Z not one of {'elem', 'coord', 'payload'}",
+        "Type foo for Mem on tensor A in Einsum Z not one of {'elem', 'payload', 'coord'}"}
+
+    binding = {"Z": [{"tensor": "A", "rank": "M", "type": "elem"}]}
+    with pytest.raises(ValueError) as excinfo:
+        MemoryComponent("Mem", 1, {"bandwidth": 256}, binding)
+    assert str(
+        excinfo.value) == "Format not specified for tensor A in Einsum Z in binding to Mem"
+
+    bindings = {"Z": [{"tensor": "A", "rank": "M", "type": "payload", "format": "default"},
+                      {"tensor": "A", "rank": "M", "type": "payload", "format": "default"}]}
+    memory = MemoryComponent("Memory", 1, {"bandwidth": 256}, bindings)
+    with pytest.raises(ValueError) as excinfo:
+        memory.get_binding("Z", "A", "M", "payload", "default")
+    assert str(
+        excinfo.value) == "Multiple bindings for [('einsum', 'Z'), ('tensor', 'A'), ('rank', 'M'), ('type', 'payload'), ('format', 'default')]"
 
 
 def test_memory_component():
-    memory = MemoryComponent("Memory", {}, [{"tensor": "A", "rank": "M"}])
+    bindings = {"Z": [{"tensor": "A", "rank": "M",
+                       "type": "payload", "format": "default"}]}
+    memory = MemoryComponent("Memory", 1, {"bandwidth": 256}, bindings)
+
+    assert memory.get_bandwidth() == 256
+
+    assert memory.get_binding(
+        "Z",
+        "A",
+        "M",
+        "payload",
+        "default") == bindings["Z"][0]
+    assert memory.get_binding("Z", "B", "M", "payload", "default") is None
+    assert memory.get_binding("T", "A", "M", "payload", "default") is None
+
+    assert repr(
+        memory) == "(MemoryComponent, Memory, 1, {'Z': [{'tensor': 'A', 'rank': 'M', 'type': 'payload', 'format': 'default'}]}, 256)"
+
+
+def test_buffer_attr_errs():
+    buffer_ = BufferComponent("Buf", 1, {"width": 8}, {})
+    with pytest.raises(ValueError) as excinfo:
+        buffer_.get_depth()
+    assert str(excinfo.value) == "Depth unspecified for component Buf"
 
-    assert memory.get_binding("A") == "M"
-    assert memory.get_binding("B") is None
+    with pytest.raises(ValueError) as excinfo:
+        BufferComponent("Buf", 1, {"depth": "foo", "width": 8}, {})
+    assert str(excinfo.value) == "Bad depth foo for Buffer Buf"
 
-    assert repr(memory) == "(MemoryComponent, Memory, {}, {'A': 'M'})"
+    buffer_ = BufferComponent("Buf", 1, {"depth": 256}, {})
+    with pytest.raises(ValueError) as excinfo:
+        buffer_.get_width()
+    assert str(excinfo.value) == "Width unspecified for component Buf"
+
+    with pytest.raises(ValueError) as excinfo:
+        BufferComponent("Buf", 1, {"depth": 256, "width": "foo"}, {})
+    assert str(excinfo.value) == "Bad width foo for Buffer Buf"
+
+
+def test_buffer_component():
+    attrs = {"width": 8, "depth": 3 * 2 ** 20}
+    buffer_ = BufferComponent("Buf", 1, attrs, {})
+
+    assert buffer_.get_width() == 8
+    assert buffer_.get_depth() == 3 * 2 ** 20
+
+    assert repr(buffer_) == "(BufferComponent, Buf, 1, {}, None, 3145728, 8)"
+
+
+def test_buffet_binding_errs():
+    attrs = {"width": 8, "depth": 3 * 2 ** 20}
+
+    bindings = {"Z": [{"tensor": "A", "rank": "M",
+                       "type": "payload", "format": "default", "style": "foo"}]}
+    with pytest.raises(ValueError) as excinfo:
+        BuffetComponent("LLB", 1, attrs, bindings)
+    assert str(
+        excinfo.value) == "Evict-on not specified for tensor A in Einsum Z in binding to LLB"
+
+    bindings = {"Z": [{"tensor": "A",
+                       "rank": "M",
+                       "type": "payload",
+                       "format": "default",
+                       "style": "foo",
+                       "evict-on": "root"}]}
+    with pytest.raises(ValueError) as excinfo:
+        BuffetComponent("LLB", 1, attrs, bindings)
+    assert str(
+        excinfo.value) in {
+        "Style foo for LLB on tensor A in Einsum Z not one of {'eager', 'lazy'}",
+        "Style foo for LLB on tensor A in Einsum Z not one of {'lazy', 'eager'}"}
 
 
 def test_buffet_component():
-    bindings = [{"tensor": "A", "rank": "M"}]
-    buffet = BuffetComponent("LLB", {}, bindings)
+    attrs = {"width": 8, "depth": 3 * 2 ** 20}
+    bindings = {"Z": [{"tensor": "A",
+                       "rank": "M",
+                       "type": "payload",
+                       "format": "default",
+                       "style": "eager",
+                       "evict-on": "root"}]}
+    buffet = BuffetComponent("LLB", 1, attrs, bindings)
+
+    assert buffet.get_binding(
+        "Z",
+        "A",
+        "M",
+        "payload",
+        "default") == bindings["Z"][0]
+
+    bindings = {"Z": [{"tensor": "A",
+                       "rank": "M",
+                       "type": "payload",
+                       "format": "default",
+                       "evict-on": "root"}]}
+    buffet = BuffetComponent("LLB", 1, attrs, bindings)
+
+    bindings_corr = {"tensor": "A",
+                     "rank": "M",
+                     "type": "payload",
+                     "format": "default",
+                     "style": "lazy",
+                     "evict-on": "root"}
+
+    assert buffet.get_binding(
+        "Z",
+        "A",
+        "M",
+        "payload",
+        "default") == bindings_corr
+
+
+def test_buffet_component_expand_eager():
+    attrs = {"width": 8, "depth": 3 * 2 ** 20}
+    bindings = {"Z": [{"tensor": "A",
+                       "rank": "M",
+                       "type": "coord",
+                       "format": "default",
+                       "style": "eager",
+                       "evict-on": "N"}]}
+    buffet = BuffetComponent("LLB", 1, attrs, bindings)
+
+    ranks = ["J", "M", "K", "O"]
+    types = [["elem"], ["coord", "payload"], ["coord", "payload"], ["elem"]]
+    buffet.expand_eager("Z", "A", "default", ranks, types)
+
+    expanded_bindings = {"Z": [{"tensor": "A",
+                                "rank": "M",
+                                "type": "coord",
+                                "format": "default",
+                                "style": "eager",
+                                "evict-on": "N",
+                                "root": "M"},
+                               {"tensor": "A",
+                                "rank": "M",
+                                "type": "payload",
+                                "format": "default",
+                                "style": "eager",
+                                "evict-on": "N",
+                                "root": "M"},
+                               {"tensor": "A",
+                                "rank": "K",
+                                "type": "coord",
+                                "format": "default",
+                                "style": "eager",
+                                "evict-on": "N",
+                                "root": "M"},
+                               {"tensor": "A",
+                                "rank": "K",
+                                "type": "payload",
+                                "format": "default",
+                                "style": "eager",
+                                "evict-on": "N",
+                                "root": "M"},
+                               {"tensor": "A",
+                                "rank": "O",
+                                "type": "elem",
+                                "format": "default",
+                                "style": "eager",
+                                "evict-on": "N",
+                                "root": "M"}]}
+    assert buffet.get_bindings() == expanded_bindings
+
+    buffet.expand_eager("Z", "B", "default", ["N", "K"], [[], []])
+    assert buffet.get_bindings() == expanded_bindings
+
+    ranks = ["J", "M", "K", "O"]
+    types = [["elem"], ["coord", "payload"], ["coord", "payload"], ["elem"]]
+    buffet.expand_eager("Z", "A", "foo", ranks, types)
+
+    assert buffet.get_bindings() == expanded_bindings
 
 
 def test_cache_component():
     attrs = {"width": 8, "depth": 3 * 2 ** 20}
-    bindings = [{"tensor": "A", "rank": "M"}]
-    cache = CacheComponent("FiberCache", attrs, bindings)
+    bindings = {"Z": [{"tensor": "A", "rank": "M",
+                       "type": "payload", "format": "default"}]}
+    cache = CacheComponent("FiberCache", 1, attrs, bindings)
+
+
+def test_compute_attr_errs():
+    with pytest.raises(ValueError) as excinfo:
+        ComputeComponent("FU", 1, {}, [])
+    assert str(excinfo.value) == "Type unspecified for component FU"
+
+    with pytest.raises(ValueError) as excinfo:
+        ComputeComponent("FU", 1, {"type": None}, [])
+    assert str(excinfo.value) == "Bad type None for Compute FU"
+
+    with pytest.raises(ValueError) as excinfo:
+        ComputeComponent("FU", 1, {"type": "foo"}, [])
+    assert str(
+        excinfo.value) in {
+        "foo is not a valid value for attribute type of class Compute. Choose one of {'mul', 'add'}",
+        "foo is not a valid value for attribute type of class Compute. Choose one of {'add', 'mul'}"}
 
-    assert cache.get_depth() == 3 * 2 ** 20
-    assert cache.get_width() == 8
+
+def test_compute_component():
+    attrs = {"type": "mul"}
+    compute = ComputeComponent("FU", 1, attrs, {})
+
+    assert compute.get_type() == "mul"
+
+    assert repr(compute) == "(ComputeComponent, FU, 1, {}, mul)"
 
 
 def test_dram_component():
-    bindings = [{"tensor": "A", "rank": "M"}]
-    dram = DRAMComponent("DRAM", {"datawidth": 8, "bandwidth": 128}, bindings)
+    bindings = {"Z": [{"tensor": "A", "rank": "M",
+                       "type": "payload", "format": "default"}]}
+    dram = DRAMComponent(
+        "DRAM", 1, {
+            "datawidth": 8, "bandwidth": 128}, bindings)
+
+
+def test_intersector_component_binding_errs():
+    bindings = {"Z": [{"foo": "bar"}]}
+    with pytest.raises(ValueError) as excinfo:
+        IntersectorComponent("Intersection", 1, {}, bindings)
+    assert str(
+        excinfo.value) == "Rank unspecified in Einsum Z in binding to Intersection"
+
+
+def test_intersector_component():
+    bindings = {"Z": [{"rank": "K"}]}
+    intersector = IntersectorComponent("Intersection", 1, {}, bindings)
 
-    assert dram.get_bandwidth() == 128
-    assert dram.get_datawidth() == 8
+
+def test_leader_follower_component_binding_errs():
+    bindings = {"Z": [{"rank": "K"}]}
+    with pytest.raises(ValueError) as excinfo:
+        LeaderFollowerComponent("Intersection", 1, {}, bindings)
+    assert str(
+        excinfo.value) == "Leader unspecified in Einsum Z in binding to Intersection"
 
 
 def test_leader_follower_component():
-    bindings = [{"einsum": "Z", "rank": "K"}]
-    leader_follower = LeaderFollowerComponent("Intersection", {}, bindings)
+    bindings = {"Z": [{"rank": "K", "leader": "A"}]}
+    leader_follower = LeaderFollowerComponent("Intersection", 1, {}, bindings)
+
+
+def test_merger_attr_errs():
+    attrs = {
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger0", 1, attrs, [])
+    assert str(excinfo.value) == "Inputs unspecified for component Merger0"
+
+    attrs = {
+        "inputs": "foo",
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(excinfo.value) == "Bad inputs foo for Merger Merger1"
+
+    attrs = {"inputs": 64, "outputs": 2, "order": "opt", "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger0", 1, attrs, [])
+    assert str(
+        excinfo.value) == "Comparator radix unspecified for component Merger0"
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": "foo",
+        "outputs": 2,
+        "order": "opt",
+        "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(excinfo.value) == "Bad comparator_radix foo for Merger Merger1"
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": "foo",
+        "order": "opt",
+        "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(excinfo.value) == "Bad outputs foo for Merger Merger1"
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": None,
+        "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(excinfo.value) == "Bad order None for Merger Merger1"
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "foo",
+        "reduce": False}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(
+        excinfo.value) in {
+        "foo is not a valid value for attribute order of class Merger. Choose one of {'opt', 'fifo'}",
+        "foo is not a valid value for attribute order of class Merger. Choose one of {'fifo', 'opt'}"}
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": 2}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(excinfo.value) == "Bad reduce 2 for Merger Merger1"
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": True}
+    with pytest.raises(NotImplementedError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, [])
+    assert str(excinfo.value) == "Concurrent merge and reduction not supported"
+
+
+def test_merger_binding_errs():
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": False}
+    binding = {
+        "Z": [{"init-ranks": ["M", "K", "N"], "final-ranks": ["M", "N", "K"]}]}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, binding)
+    assert str(
+        excinfo.value) == "Tensor not specified for Einsum Z in binding to Merger1"
+
+    binding = {"Z": [{"tensor": "T", "final-ranks": ["M", "N", "K"]}]}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, binding)
+    assert str(
+        excinfo.value) == "Initial ranks not specified for tensor T in Einsum Z in binding to Merger1"
+
+    binding = {"Z": [{"tensor": "T", "init-ranks": ["M", "N", "K"]}]}
+    with pytest.raises(ValueError) as excinfo:
+        MergerComponent("Merger1", 1, attrs, binding)
+    assert str(
+        excinfo.value) == "Final ranks not specified for tensor T in Einsum Z in binding to Merger1"
+
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": False}
+    binding = {"Z": [{"tensor": "T",
+                      "init-ranks": ["M",
+                                     "K",
+                                     "N"],
+                      "final-ranks": ["M",
+                                      "N",
+                                      "K"]},
+                     {"tensor": "T",
+                      "init-ranks": ["K",
+                                     "M",
+                                     "N"],
+                      "final-ranks": ["M",
+                                      "N",
+                                      "K"]}]}
+    merger = MergerComponent("Merger1", 1, attrs, binding)
+    with pytest.raises(ValueError) as excinfo:
+        merger.get_init_ranks("Z", "T", ["M", "N", "K"])
+    assert str(
+        excinfo.value) == "Merge binding from both ['M', 'K', 'N'] and ['K', 'M', 'N'] to ['M', 'N', 'K']"
 
 
 def test_merger_component():
-    attrs = {"radix": 64, "next_latency": 1}
-    binding = [{"tensor": "T", "init_ranks": ["M", "K", "N"], "swap_depth": 1}]
-    merger = MergerComponent("HighRadixMerger", attrs, binding)
-
-    bindings = [{"tensor": "T", "init_ranks": [
-        "M", "K", "N"], "final_ranks": ["M", "N", "K"], "swap_depth": 1}]
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 32,
+        "outputs": 2,
+        "order": "opt",
+        "reduce": False}
+    binding = {"Z": [{"tensor": "T", "init-ranks": [
+        "M", "K", "N"], "final-ranks": ["M", "N", "K"]}]}
+    merger = MergerComponent("Merger0", 1, attrs, binding)
+
+    bindings = {"Z": [{"tensor": "T", "init-ranks": [
+        "M", "K", "N"], "final-ranks": ["M", "N", "K"]}]}
     assert merger.get_bindings() == bindings
 
-    assert merger.get_next_latency() == 1
-    assert merger.get_radix() == 64
+    assert merger.get_inputs() == 64
+    assert merger.get_comparator_radix() == 32
+    assert merger.get_outputs() == 2
+    assert merger.get_order() == "opt"
+    assert merger.get_reduce() == False
+
+    assert merger.get_init_ranks("Z", "T", ["M", "N", "K"]) == ["M", "K", "N"]
+    assert merger.get_init_ranks("T", "T", ["M", "K", "N"]) is None
+    assert merger.get_init_ranks("Z", "A", ["M", "K"]) is None
+
+    assert repr(
+        merger) == "(MergerComponent, Merger0, 1, {'Z': [{'tensor': 'T', 'init-ranks': ['M', 'K', 'N'], 'final-ranks': ['M', 'N', 'K']}]}, 64, 32, 2, opt, False)"
+
+    attrs = {"inputs": 200, "comparator_radix": 2}
+    merger = MergerComponent("Merger1", 1, attrs, binding)
+
+    assert merger.get_inputs() == 200
+    assert merger.get_comparator_radix() == 2
+    assert merger.get_outputs() == 1
+    assert merger.get_order() == "fifo"
+    assert merger.get_reduce() == False
+
 
-    merger = MergerComponent(
-        "Sort", {"radix": "inf", "next_latency": "N"}, binding)
+def test_sequencer_component_no_num_ranks():
+    with pytest.raises(ValueError) as excinfo:
+        SequencerComponent("Seq", 1, {}, {})
 
-    assert merger.get_next_latency() == "N"
-    assert merger.get_radix() == float("inf")
+    assert str(excinfo.value) == "Number of ranks unspecified for sequencer Seq"
+
+
+def test_sequencer_component_too_many_ranks():
+    attrs = {"num_ranks": 1}
+    bindings = {"Z": [{"rank": "K"}, {"rank": "M"}]}
+    with pytest.raises(ValueError) as excinfo:
+        SequencerComponent("Seq", 1, attrs, bindings)
+
+    assert str(
+        excinfo.value) == "Too many ranks bound to sequencer Seq during Einsum Z"
+
+
+def test_sequencer_component():
+    attrs = {"num_ranks": 2}
+    bindings = {"Z": [{"rank": "K"}, {"rank": "M"}]}
+    sequencer = SequencerComponent("Seq", 1, attrs, bindings)
+
+    assert sequencer.get_ranks("Z") == ["K", "M"]
 
 
 def test_skip_ahead_component():
-    bindings = [{"einsum": "Z", "rank": "K2"}]
-    skip_ahead = SkipAheadComponent("K2Intersection", {}, bindings)
+    bindings = {"Z": [{"rank": "K2"}]}
+    skip_ahead = SkipAheadComponent("K2Intersection", 1, {}, bindings)
+
+
+def test_two_finger_intersector():
+    bindings = {"Z": [{"rank": "K2"}]}
+    skip_ahead = TwoFingerComponent("K2Intersection", 1, {}, bindings)
diff --git a/tests/ir/test_equation.py b/tests/ir/test_equation.py
index a3961fa..d0801c2 100644
--- a/tests/ir/test_equation.py
+++ b/tests/ir/test_equation.py
@@ -70,13 +70,12 @@ def test_repeated_tensor():
 def test_get_factor_order():
     equation = create_complex()
     assert equation.get_factor_order() == {
-        "A": (
-            0, 1), "C": (
-            0, 2), "d": (
-                0, 0), "B": (
-                    1, 0), "T": (
-                        1, 1), "e": (
-                            1, 2)}
+        "A": (0, 1),
+        "C": (0, 2),
+        "d": (0, 0),
+        "B": (1, 0),
+        "T": (1, 1),
+        "e": (1, 2)}
 
 
 def test_get_in_update():
@@ -85,6 +84,19 @@ def test_get_in_update():
         False, False, True]]
 
 
+def test_get_iter():
+    equation = create_complex()
+    A = equation.get_tensor("A")
+    B = equation.get_tensor("B")
+    C = equation.get_tensor("C")
+    T = equation.get_tensor("T")
+    Z = equation.get_tensor("Z")
+
+    assert equation.get_iter([A, B]) == (None, [[A], [B]])
+    assert equation.get_iter([C, T, B, Z]) == (Z, [[C], [B, T]])
+    assert equation.get_iter([A, C]) == (None, [[A, C]])
+
+
 def test_get_output():
     equation = create_matmul()
     tensor = equation.get_output()
diff --git a/tests/ir/test_flow_graph.py b/tests/ir/test_flow_graph.py
index e80d647..c0295a2 100644
--- a/tests/ir/test_flow_graph.py
+++ b/tests/ir/test_flow_graph.py
@@ -10,6 +10,24 @@
 from teaal.parse import *
 
 
+def print_errs(graph, corr):
+    print("In Graph")
+    for edge in graph.edges:
+        if edge not in corr.edges:
+            print("    corr.add_edge", end="(")
+            print(type(edge[0]).__name__, end="(")
+            print(str(list(edge[0]._Node__key()))[1:-1], end="), ")
+            print(type(edge[1]).__name__, end="(")
+            print(str(list(edge[1]._Node__key()))[1:-1], end="))\n")
+
+    print("In Corr")
+    for edge in corr.edges:
+        if edge not in graph.edges:
+            print(edge)
+
+    print("---")
+
+
 def build_program_no_loops():
     einsum = Einsum.from_file("tests/integration/test_translate_no_loops.yaml")
     mapping = Mapping.from_file(
@@ -65,7 +83,41 @@ def build_gamma():
 
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    hardware = Hardware(arch, bindings, program)
+
+    format_ = Format.from_str(yaml)
+
+    return program, hardware, format_
+
+
+def build_extensor():
+    with open("tests/integration/extensor.yaml", "r") as f:
+        yaml = f.read()
+
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    hardware = Hardware(arch, bindings, program)
+
+    format_ = Format.from_str(yaml)
+
+    return program, hardware, format_
+
+
+def build_extensor_energy():
+    with open("tests/integration/extensor-energy.yaml", "r") as f:
+        yaml = f.read()
+
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    hardware = Hardware(arch, bindings, program)
 
     format_ = Format.from_str(yaml)
 
@@ -83,6 +135,8 @@ def test_graph_no_loops():
     corr.add_edge(GetRootNode("A", []), OtherNode("Body"))
     corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -102,7 +156,10 @@ def test_graph():
     corr.add_edge(LoopNode("N"), LoopNode("K"))
     corr.add_edge(LoopNode("N"), OtherNode("Body"))
     corr.add_edge(LoopNode("K"), OtherNode("Body"))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K"))
+    corr.add_edge(EndLoopNode("K"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), OtherNode("Footer"))
     corr.add_edge(SwizzleNode(
         "A", ["M", "K"], "loop-order"), GetRootNode("A", ["M", "K"]))
     corr.add_edge(SwizzleNode(
@@ -112,6 +169,8 @@ def test_graph():
     corr.add_edge(SwizzleNode(
         "B", ["N", "K"], "loop-order"), OtherNode("Graphics"))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -135,7 +194,10 @@ def test_graph_loop_order():
     corr.add_edge(LoopNode("M"), LoopNode("N"))
     corr.add_edge(LoopNode("M"), OtherNode("Body"))
     corr.add_edge(LoopNode("N"), OtherNode("Body"))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), EndLoopNode("K"))
+    corr.add_edge(EndLoopNode("K"), OtherNode("Footer"))
     corr.add_edge(SwizzleNode(
         "A", ["K", "M"], "loop-order"), GetRootNode("A", ["K", "M"]))
     corr.add_edge(SwizzleNode(
@@ -145,6 +207,8 @@ def test_graph_loop_order():
     corr.add_edge(SwizzleNode(
         "B", ["K", "N"], "loop-order"), OtherNode("Graphics"))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -175,7 +239,13 @@ def test_graph_static_parts():
     corr.add_edge(OtherNode("Graphics"), LoopNode("K2"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
     corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['M', 'N1', 'N0']))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K0"))
+    corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0"))
+    corr.add_edge(EndLoopNode("N0"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1"))
+    corr.add_edge(EndLoopNode("N1"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), EndLoopNode("K2"))
+    corr.add_edge(EndLoopNode("K2"), OtherNode("Footer"))
     corr.add_edge(GetRootNode("Z", ['M', 'N1', 'N0']), LoopNode("M"))
     corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics"))
     corr.add_edge(
@@ -220,6 +290,9 @@ def test_graph_static_parts():
         SwizzleNode(
             "B", [
                 "K2", "N1", "K1", "N0", "K0"], "loop-order"), OtherNode("Graphics"))
+
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -255,7 +328,13 @@ def test_graph_dyn_parts():
     corr.add_edge(OtherNode("Graphics"), LoopNode("K2"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
     corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['M', 'N1', 'N0']))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K0"))
+    corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0"))
+    corr.add_edge(EndLoopNode("N0"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1"))
+    corr.add_edge(EndLoopNode("N1"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), EndLoopNode("K2"))
+    corr.add_edge(EndLoopNode("K2"), OtherNode("Footer"))
     corr.add_edge(GetRootNode("Z", ['M', 'N1', 'N0']), LoopNode("M"))
     corr.add_edge(
         PartNode(
@@ -344,6 +423,8 @@ def test_graph_dyn_parts():
             "B", [
                 "K1", "N0", "K0"]))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -376,7 +457,13 @@ def test_graph_mixed_parts():
     corr.add_edge(OtherNode("Graphics"), LoopNode("K3"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
     corr.add_edge(OtherNode("Output"), GetRootNode("Z", ["M", "N"]))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K0"))
+    corr.add_edge(EndLoopNode("K0"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), EndLoopNode("K2"))
+    corr.add_edge(EndLoopNode("K2"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), EndLoopNode("K3"))
+    corr.add_edge(EndLoopNode("K3"), OtherNode("Footer"))
     corr.add_edge(GetRootNode("Z", ["M", "N"]), LoopNode("M"))
     corr.add_edge(PartNode("A", ("K",)), OtherNode("Graphics"))
     corr.add_edge(PartNode("A", ("K",)), PartNode("A", ("K2I",)))
@@ -471,6 +558,8 @@ def test_graph_mixed_parts():
             "B", [
                 "K1", "N", "K0"]))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -503,7 +592,11 @@ def test_graph_static_flattening():
     corr.add_edge(OtherNode("Graphics"), LoopNode("K1"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
     corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['N', 'M']))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("MK00"))
+    corr.add_edge(EndLoopNode("MK00"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("MK01"))
+    corr.add_edge(EndLoopNode("MK01"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), OtherNode("Footer"))
     corr.add_edge(GetRootNode("Z", ['N', 'M']), LoopNode("N"))
     corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics"))
     corr.add_edge(
@@ -556,6 +649,8 @@ def test_graph_static_flattening():
     corr.add_edge(GetPayloadNode("Z", ['M']), OtherNode("Body"))
     corr.add_edge(GetPayloadNode("B", ['K0']), OtherNode("Body"))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -591,7 +686,12 @@ def test_graph_dyn_flattening():
     corr.add_edge(OtherNode('Graphics'), LoopNode('M1'))
     corr.add_edge(OtherNode('Output'), OtherNode('Graphics'))
     corr.add_edge(OtherNode('Output'), GetRootNode('Z', ['M1', 'N', 'M0']))
-    corr.add_edge(OtherNode('Body'), OtherNode('Footer'))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("M0K00"))
+    corr.add_edge(EndLoopNode("M0K00"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("M0K01"))
+    corr.add_edge(EndLoopNode("M0K01"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), EndLoopNode("M1"))
+    corr.add_edge(EndLoopNode("M1"), OtherNode("Footer"))
     corr.add_edge(GetRootNode('Z', ['M1', 'N', 'M0']), LoopNode('M1'))
     corr.add_edge(
         PartNode(
@@ -666,6 +766,8 @@ def test_graph_dyn_flattening():
     corr.add_edge(GetPayloadNode('Z', ['M0']), OtherNode('Body'))
     corr.add_edge(GetPayloadNode('B', ['K0']), OtherNode('Body'))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -684,7 +786,9 @@ def test_graph_conv():
     corr.add_edge(OtherNode("Graphics"), LoopNode("W"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
     corr.add_edge(OtherNode("Output"), GetRootNode("O", ['Q']))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("Q"))
+    corr.add_edge(EndLoopNode("Q"), EndLoopNode("W"))
+    corr.add_edge(EndLoopNode("W"), OtherNode("Footer"))
     corr.add_edge(GetRootNode("O", ['Q']), LoopNode("Q"))
     corr.add_edge(GetRootNode("I", ['W']), LoopNode("W"))
     corr.add_edge(GetRootNode("F", ['S']), LoopNode("Q"))
@@ -707,6 +811,8 @@ def test_graph_conv():
             ["S"]))
     corr.add_edge(SwizzleNode("F", ["S"], "loop-order"), OtherNode("Graphics"))
 
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
@@ -734,7 +840,11 @@ def test_graph_conv_part():
     corr.add_edge(OtherNode("Graphics"), LoopNode("Q2"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
     corr.add_edge(OtherNode("Output"), GetRootNode("O", ["Q2", "Q1", "Q0"]))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("Q0"))
+    corr.add_edge(EndLoopNode("Q0"), EndLoopNode("S"))
+    corr.add_edge(EndLoopNode("S"), EndLoopNode("Q1"))
+    corr.add_edge(EndLoopNode("Q1"), EndLoopNode("Q2"))
+    corr.add_edge(EndLoopNode("Q2"), OtherNode("Footer"))
     corr.add_edge(GetRootNode("O", ["Q2", "Q1", "Q0"]), LoopNode("Q2"))
     corr.add_edge(PartNode("I", ("W",)), OtherNode("Graphics"))
     corr.add_edge(PartNode("I", ("W",)), PartNode("I", ("W1I",)))
@@ -786,10 +896,67 @@ def test_graph_conv_part():
             "I", [
                 "W1", "W0"]))
 
+    print_errs(graph, corr)
+
+    assert nx.is_isomorphic(graph, corr)
+
+
+def test_graph_metrics_no_loops():
+    yaml = """
+    einsum:
+      declaration:
+        Z: []
+      expressions:
+        - Z[] = a
+    architecture:
+      accel:
+      - name: empty
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+    format:
+      Z:
+        default:
+          rank-order: []
+    """
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    hardware = Hardware(arch, bindings, program)
+
+    format_ = Format.from_str(yaml)
+
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+    graph = FlowGraph(program, metrics, []).get_graph()
+
+    corr = nx.DiGraph()
+
+    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Body"), MetricsNode("End"))
+    corr.add_edge(OtherNode("Graphics"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
+    corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
+    corr.add_edge(OtherNode("Output"), GetRootNode("Z", []))
+    corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
+    corr.add_edge(MetricsNode("Start"), OtherNode("Body"))
+    corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
+    corr.add_edge(GetRootNode("Z", []), OtherNode("Body"))
+    corr.add_edge(OtherNode('Graphics'), MetricsNode('Body'))
+    corr.add_edge(MetricsNode('Start'), MetricsNode('Body'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsNode('Body'), MetricsNode('End'))
+
+    print_errs(graph, corr)
+
     assert nx.is_isomorphic(graph, corr)
 
 
-def test_graph_metrics():
+def test_graph_metrics_T():
     program, hardware, format_ = build_gamma()
     program.add_einsum(0)
     metrics = Metrics(program, hardware, format_)
@@ -801,29 +968,590 @@ def test_graph_metrics():
     corr.add_edge(LoopNode("K"), LoopNode("N"))
     corr.add_edge(LoopNode("K"), OtherNode("Body"))
     corr.add_edge(LoopNode("N"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("K"))
+    corr.add_edge(EndLoopNode("K"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), OtherNode("Footer"))
     corr.add_edge(OtherNode("Graphics"), LoopNode("M"))
     corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
     corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
-    corr.add_edge(OtherNode("Output"), GetRootNode("T", ["M", "K", "N"]))
-    corr.add_edge(OtherNode("Body"), OtherNode("Footer"))
-    corr.add_edge(OtherNode("Body"), MetricsNode("End"))
+    corr.add_edge(OtherNode("Output"), GetRootNode("T", ['M', 'K', 'N']))
     corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
     corr.add_edge(MetricsNode("Start"), LoopNode("M"))
     corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
-    corr.add_edge(GetRootNode("T", ["M", "K", "N"]), LoopNode("M"))
+    corr.add_edge(GetRootNode("T", ['M', 'K', 'N']), LoopNode("M"))
+    corr.add_edge(
+        SwizzleNode("A", ['M', 'K'], "loop-order"),
+        GetRootNode("A", ['M', 'K']))
+    corr.add_edge(
+        SwizzleNode("A", ['M', 'K'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(GetRootNode("A", ['M', 'K']), LoopNode("M"))
     corr.add_edge(SwizzleNode(
-        "A", ["M", "K"], "loop-order"), GetRootNode("A", ["M", "K"]))
+        "B", ['K', 'N'], "loop-order"), GetRootNode("B", ['K', 'N']))
+    corr.add_edge(
+        SwizzleNode("B", ['K', 'N'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(GetRootNode("B", ['K', 'N']), LoopNode("K"))
+    corr.add_edge(LoopNode('M'), MetricsHeaderNode('K'))
+    corr.add_edge(LoopNode('K'), MetricsHeaderNode('N'))
+    corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('M'))
+    corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('M'))
+    corr.add_edge(MetricsHeaderNode('M'), LoopNode('M'))
+    corr.add_edge(MetricsHeaderNode('K'), LoopNode('K'))
+    corr.add_edge(MetricsHeaderNode('N'), LoopNode('N'))
+    corr.add_edge(LoopNode('N'), MetricsNode('Body'))
+    corr.add_edge(EndLoopNode('N'), MetricsFooterNode('N'))
+    corr.add_edge(EndLoopNode('K'), MetricsFooterNode('K'))
+    corr.add_edge(EndLoopNode('M'), MetricsFooterNode('M'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsFooterNode('N'), EndLoopNode('K'))
+    corr.add_edge(MetricsFooterNode('K'), EndLoopNode('M'))
+    corr.add_edge(MetricsFooterNode('M'), OtherNode('Footer'))
+    corr.add_edge(MetricsFooterNode('M'), MetricsNode('End'))
+    corr.add_edge(EndLoopNode("M"), MetricsNode("End"))
+
+    print_errs(graph, corr)
+
+    assert nx.is_isomorphic(graph, corr)
+
+
+def test_graph_metrics_Z():
+    program, hardware, format_ = build_gamma()
+    program.add_einsum(1)
+    metrics = Metrics(program, hardware, format_)
+    graph = FlowGraph(program, metrics, []).get_graph()
+
+    corr = nx.DiGraph()
+
+    corr.add_edge(LoopNode("M"), LoopNode("N"))
+    corr.add_edge(LoopNode("M"), LoopNode("K"))
+    corr.add_edge(LoopNode("N"), LoopNode("K"))
+    corr.add_edge(LoopNode("N"), OtherNode("Body"))
+    corr.add_edge(LoopNode("K"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K"))
+    corr.add_edge(EndLoopNode("K"), EndLoopNode("N"))
+    corr.add_edge(EndLoopNode("N"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Graphics"), LoopNode("M"))
+    corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
+    corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
+    corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['M', 'N']))
+    corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
+    corr.add_edge(MetricsNode("Start"), LoopNode("M"))
+    corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
+    corr.add_edge(GetRootNode("Z", ['M', 'N']), LoopNode("M"))
     corr.add_edge(SwizzleNode(
-        "A", ["M", "K"], "loop-order"), OtherNode("Graphics"))
-    corr.add_edge(GetRootNode("A", ["M", "K"]), LoopNode("M"))
+        "T", ['M', 'N', 'K'], "loop-order"), GetRootNode("T", ['M', 'N', 'K']))
+    corr.add_edge(
+        SwizzleNode(
+            "T", [
+                'M', 'N', 'K'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(GetRootNode("T", ['M', 'N', 'K']), LoopNode("M"))
+    corr.add_edge(
+        SwizzleNode(
+            "T", [
+                'M', 'K', 'N'], "metrics"), SwizzleNode(
+            "T", [
+                'M', 'N', 'K'], "loop-order"))
     corr.add_edge(SwizzleNode(
-        "B", ["K", "N"], "loop-order"), GetRootNode("B", ["K", "N"]))
+        "A", ['M', 'K'], "loop-order"), GetRootNode("A", ['M', 'K']))
+    corr.add_edge(
+        SwizzleNode(
+            "A", [
+                'M', 'K'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(GetRootNode("A", ['M', 'K']), LoopNode("M"))
+    corr.add_edge(LoopNode('M'), MetricsHeaderNode('N'))
+    corr.add_edge(LoopNode('N'), MetricsHeaderNode('K'))
+    corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('M'))
+    corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('M'))
+    corr.add_edge(MetricsHeaderNode('M'), LoopNode('M'))
+    corr.add_edge(MetricsHeaderNode('N'), LoopNode('N'))
+    corr.add_edge(MetricsHeaderNode('K'), LoopNode('K'))
+    corr.add_edge(LoopNode('K'), MetricsNode('Body'))
+    corr.add_edge(EndLoopNode('K'), MetricsFooterNode('K'))
+    corr.add_edge(EndLoopNode('N'), MetricsFooterNode('N'))
+    corr.add_edge(EndLoopNode('M'), MetricsFooterNode('M'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsFooterNode('K'), EndLoopNode('N'))
+    corr.add_edge(MetricsFooterNode('N'), EndLoopNode('M'))
+    corr.add_edge(MetricsFooterNode('M'), OtherNode('Footer'))
+    corr.add_edge(MetricsFooterNode('M'), MetricsNode('End'))
+    corr.add_edge(EndLoopNode("M"), MetricsNode("End"))
+
+    print_errs(graph, corr)
+
+    assert nx.is_isomorphic(graph, corr)
+
+
+def test_graph_metrics_extensor():
+    program, hardware, format_ = build_extensor()
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+    graph = FlowGraph(program, metrics, []).get_graph()
+
+    corr = nx.DiGraph()
+
+    corr.add_edge(LoopNode("N2"), LoopNode("K2"))
+    corr.add_edge(LoopNode("N2"), LoopNode("M2"))
+    corr.add_edge(LoopNode("K2"), LoopNode("M2"))
+    corr.add_edge(LoopNode("K2"), LoopNode("N1"))
+    corr.add_edge(LoopNode("M2"), LoopNode("M1"))
+    corr.add_edge(LoopNode("M1"), LoopNode("N1"))
+    corr.add_edge(LoopNode("M1"), LoopNode("K1"))
+    corr.add_edge(LoopNode("N1"), LoopNode("K1"))
+    corr.add_edge(LoopNode("N1"), LoopNode("M0"))
+    corr.add_edge(LoopNode("K1"), LoopNode("M0"))
+    corr.add_edge(LoopNode("K1"), LoopNode("N0"))
+    corr.add_edge(LoopNode("M0"), LoopNode("N0"))
+    corr.add_edge(LoopNode("M0"), LoopNode("K0"))
+    corr.add_edge(LoopNode("N0"), LoopNode("K0"))
+    corr.add_edge(LoopNode("N0"), OtherNode("Body"))
+    corr.add_edge(LoopNode("K0"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K0"))
+    corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0"))
+    corr.add_edge(EndLoopNode("N0"), EndLoopNode("M0"))
+    corr.add_edge(EndLoopNode("M0"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1"))
+    corr.add_edge(EndLoopNode("N1"), EndLoopNode("M1"))
+    corr.add_edge(EndLoopNode("M1"), EndLoopNode("M2"))
+    corr.add_edge(EndLoopNode("M2"), EndLoopNode("K2"))
+    corr.add_edge(EndLoopNode("K2"), EndLoopNode("N2"))
+    corr.add_edge(EndLoopNode("N2"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
+    corr.add_edge(OtherNode("Graphics"), LoopNode("N2"))
+    corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
+    corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
+    corr.add_edge(OtherNode("Output"), GetRootNode(
+        "Z", ['N2', 'M2', 'M1', 'N1', 'M0', 'N0']))
+    corr.add_edge(MetricsNode("Start"), LoopNode("N2"))
+    corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
+    corr.add_edge(
+        GetRootNode(
+            "Z", [
+                'N2', 'M2', 'M1', 'N1', 'M0', 'N0']), LoopNode("N2"))
+    corr.add_edge(PartNode("A", ('M',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "A", ('M',)), SwizzleNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"))
+    corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "A", ('K',)), SwizzleNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"))
+    corr.add_edge(SwizzleNode("A",
+                              ['K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"),
+                  GetRootNode("A", ['K2', 'M2', 'M1', 'K1', 'M0', 'K0']))
+    corr.add_edge(
+        SwizzleNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(
+        GetRootNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0']), LoopNode("K2"))
+    corr.add_edge(PartNode("B", ('K',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "B", ('K',)), SwizzleNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"))
+    corr.add_edge(PartNode("B", ('N',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "B", ('N',)), SwizzleNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"))
+    corr.add_edge(SwizzleNode("B",
+                              ['N2', 'K2', 'N1', 'K1', 'N0', 'K0'],
+                              "loop-order"),
+                  GetRootNode("B",
+                              ['N2', 'K2', 'N1', 'K1', 'N0', 'K0']))
+    corr.add_edge(
+        SwizzleNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(
+        GetRootNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0']), LoopNode("N2"))
+    corr.add_edge(LoopNode('N2'), MetricsHeaderNode('K2'))
+    corr.add_edge(LoopNode('K2'), MetricsHeaderNode('M2'))
+    corr.add_edge(LoopNode('M2'), MetricsHeaderNode('M1'))
+    corr.add_edge(LoopNode('M1'), MetricsHeaderNode('N1'))
+    corr.add_edge(LoopNode('N1'), MetricsHeaderNode('K1'))
+    corr.add_edge(LoopNode('K1'), MetricsHeaderNode('M0'))
+    corr.add_edge(LoopNode('M0'), MetricsHeaderNode('N0'))
+    corr.add_edge(LoopNode('N0'), MetricsHeaderNode('K0'))
+    corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('N2'))
+    corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('N2'))
+    corr.add_edge(MetricsHeaderNode('N2'), LoopNode('N2'))
+    corr.add_edge(MetricsHeaderNode('K2'), LoopNode('K2'))
+    corr.add_edge(MetricsHeaderNode('M2'), LoopNode('M2'))
+    corr.add_edge(MetricsHeaderNode('M1'), LoopNode('M1'))
+    corr.add_edge(MetricsHeaderNode('N1'), LoopNode('N1'))
+    corr.add_edge(MetricsHeaderNode('K1'), LoopNode('K1'))
+    corr.add_edge(MetricsHeaderNode('M0'), LoopNode('M0'))
+    corr.add_edge(MetricsHeaderNode('N0'), LoopNode('N0'))
+    corr.add_edge(MetricsHeaderNode('K0'), LoopNode('K0'))
+    corr.add_edge(LoopNode('K0'), MetricsNode('Body'))
+    corr.add_edge(EndLoopNode('K0'), MetricsFooterNode('K0'))
+    corr.add_edge(EndLoopNode('N0'), MetricsFooterNode('N0'))
+    corr.add_edge(EndLoopNode('M0'), MetricsFooterNode('M0'))
+    corr.add_edge(EndLoopNode('K1'), MetricsFooterNode('K1'))
+    corr.add_edge(EndLoopNode('N1'), MetricsFooterNode('N1'))
+    corr.add_edge(EndLoopNode('M1'), MetricsFooterNode('M1'))
+    corr.add_edge(EndLoopNode('M2'), MetricsFooterNode('M2'))
+    corr.add_edge(EndLoopNode('K2'), MetricsFooterNode('K2'))
+    corr.add_edge(EndLoopNode('N2'), MetricsFooterNode('N2'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsFooterNode('K0'), EndLoopNode('N0'))
+    corr.add_edge(MetricsFooterNode('N0'), EndLoopNode('M0'))
+    corr.add_edge(MetricsFooterNode('M0'), EndLoopNode('K1'))
+    corr.add_edge(MetricsFooterNode('K1'), EndLoopNode('N1'))
+    corr.add_edge(MetricsFooterNode('N1'), EndLoopNode('M1'))
+    corr.add_edge(MetricsFooterNode('M1'), EndLoopNode('M2'))
+    corr.add_edge(MetricsFooterNode('M2'), EndLoopNode('K2'))
+    corr.add_edge(MetricsFooterNode('K2'), EndLoopNode('N2'))
+    corr.add_edge(MetricsFooterNode('N2'), OtherNode('Footer'))
+    corr.add_edge(MetricsFooterNode('N2'), MetricsNode('End'))
+    corr.add_edge(EndLoopNode("N2"), MetricsNode("End"))
+
+    print_errs(graph, corr)
+
+    assert nx.is_isomorphic(graph, corr)
+
+
+def test_graph_metrics_extensor_energy():
+    program, hardware, format_ = build_extensor_energy()
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+    graph = FlowGraph(program, metrics, []).get_graph()
+
+    corr = nx.DiGraph()
+
+    corr.add_edge(LoopNode("N2"), LoopNode("K2"))
+    corr.add_edge(LoopNode("N2"), LoopNode("M2"))
+    corr.add_edge(LoopNode("K2"), LoopNode("M2"))
+    corr.add_edge(LoopNode("K2"), LoopNode("N1"))
+    corr.add_edge(LoopNode("M2"), LoopNode("M1"))
+    corr.add_edge(LoopNode("M1"), LoopNode("N1"))
+    corr.add_edge(LoopNode("M1"), LoopNode("K1"))
+    corr.add_edge(LoopNode("N1"), LoopNode("K1"))
+    corr.add_edge(LoopNode("N1"), LoopNode("M0"))
+    corr.add_edge(LoopNode("K1"), LoopNode("M0"))
+    corr.add_edge(LoopNode("K1"), LoopNode("N0"))
+    corr.add_edge(LoopNode("M0"), LoopNode("N0"))
+    corr.add_edge(LoopNode("M0"), LoopNode("K0"))
+    corr.add_edge(LoopNode("N0"), LoopNode("K0"))
+    corr.add_edge(LoopNode("N0"), OtherNode("Body"))
+    corr.add_edge(LoopNode("K0"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("K0"))
+    corr.add_edge(EndLoopNode("K0"), EndLoopNode("N0"))
+    corr.add_edge(EndLoopNode("N0"), EndLoopNode("M0"))
+    corr.add_edge(EndLoopNode("M0"), EndLoopNode("K1"))
+    corr.add_edge(EndLoopNode("K1"), EndLoopNode("N1"))
+    corr.add_edge(EndLoopNode("N1"), EndLoopNode("M1"))
+    corr.add_edge(EndLoopNode("M1"), EndLoopNode("M2"))
+    corr.add_edge(EndLoopNode("M2"), EndLoopNode("K2"))
+    corr.add_edge(EndLoopNode("K2"), EndLoopNode("N2"))
+    corr.add_edge(EndLoopNode("N2"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
+    corr.add_edge(OtherNode("Graphics"), LoopNode("N2"))
+    corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
+    corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
+    corr.add_edge(OtherNode("Output"), GetRootNode(
+        "Z", ['N2', 'M2', 'M1', 'N1', 'M0', 'N0']))
+    corr.add_edge(MetricsNode("Start"), LoopNode("N2"))
+    corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
+    corr.add_edge(
+        GetRootNode(
+            "Z", [
+                'N2', 'M2', 'M1', 'N1', 'M0', 'N0']), LoopNode("N2"))
+    corr.add_edge(PartNode("A", ('M',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "A", ('M',)), SwizzleNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"))
+    corr.add_edge(PartNode("A", ('K',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "A", ('K',)), SwizzleNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"))
+    corr.add_edge(SwizzleNode("A",
+                              ['K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"),
+                  GetRootNode("A", ['K2', 'M2', 'M1', 'K1', 'M0', 'K0']))
+    corr.add_edge(
+        SwizzleNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(
+        GetRootNode(
+            "A", [
+                'K2', 'M2', 'M1', 'K1', 'M0', 'K0']), LoopNode("K2"))
+    corr.add_edge(PartNode("B", ('K',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "B", ('K',)), SwizzleNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"))
+    corr.add_edge(PartNode("B", ('N',)), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode(
+            "B", ('N',)), SwizzleNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"))
+    corr.add_edge(SwizzleNode("B",
+                              ['N2', 'K2', 'N1', 'K1', 'N0', 'K0'],
+                              "loop-order"),
+                  GetRootNode("B",
+                              ['N2', 'K2', 'N1', 'K1', 'N0', 'K0']))
+    corr.add_edge(
+        SwizzleNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(
+        GetRootNode(
+            "B", [
+                'N2', 'K2', 'N1', 'K1', 'N0', 'K0']), LoopNode("N2"))
+    corr.add_edge(LoopNode('N2'), MetricsHeaderNode('K2'))
+    corr.add_edge(LoopNode('K2'), MetricsHeaderNode('M2'))
+    corr.add_edge(LoopNode('M2'), MetricsHeaderNode('M1'))
+    corr.add_edge(LoopNode('M1'), MetricsHeaderNode('N1'))
+    corr.add_edge(LoopNode('N1'), MetricsHeaderNode('K1'))
+    corr.add_edge(LoopNode('K1'), MetricsHeaderNode('M0'))
+    corr.add_edge(LoopNode('M0'), MetricsHeaderNode('N0'))
+    corr.add_edge(LoopNode('N0'), MetricsHeaderNode('K0'))
+    corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('N2'))
+    corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('N2'))
+    corr.add_edge(MetricsHeaderNode('N2'), LoopNode('N2'))
+    corr.add_edge(MetricsHeaderNode('K2'), LoopNode('K2'))
+    corr.add_edge(MetricsHeaderNode('M2'), LoopNode('M2'))
+    corr.add_edge(MetricsHeaderNode('M1'), LoopNode('M1'))
+    corr.add_edge(MetricsHeaderNode('N1'), LoopNode('N1'))
+    corr.add_edge(MetricsHeaderNode('K1'), LoopNode('K1'))
+    corr.add_edge(MetricsHeaderNode('M0'), LoopNode('M0'))
+    corr.add_edge(MetricsHeaderNode('N0'), LoopNode('N0'))
+    corr.add_edge(MetricsHeaderNode('K0'), LoopNode('K0'))
+    corr.add_edge(LoopNode('K0'), MetricsNode('Body'))
+    corr.add_edge(EndLoopNode('K0'), MetricsFooterNode('K0'))
+    corr.add_edge(EndLoopNode('N0'), MetricsFooterNode('N0'))
+    corr.add_edge(EndLoopNode('M0'), MetricsFooterNode('M0'))
+    corr.add_edge(EndLoopNode('K1'), MetricsFooterNode('K1'))
+    corr.add_edge(EndLoopNode('N1'), MetricsFooterNode('N1'))
+    corr.add_edge(EndLoopNode('M1'), MetricsFooterNode('M1'))
+    corr.add_edge(EndLoopNode('M2'), MetricsFooterNode('M2'))
+    corr.add_edge(EndLoopNode('K2'), MetricsFooterNode('K2'))
+    corr.add_edge(EndLoopNode('N2'), MetricsFooterNode('N2'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsFooterNode('K0'), EndLoopNode('N0'))
+    corr.add_edge(MetricsFooterNode('N0'), EndLoopNode('M0'))
+    corr.add_edge(MetricsFooterNode('M0'), EndLoopNode('K1'))
+    corr.add_edge(MetricsFooterNode('K1'), EndLoopNode('N1'))
+    corr.add_edge(MetricsFooterNode('N1'), EndLoopNode('M1'))
+    corr.add_edge(MetricsFooterNode('M1'), EndLoopNode('M2'))
+    corr.add_edge(MetricsFooterNode('M2'), EndLoopNode('K2'))
+    corr.add_edge(MetricsFooterNode('K2'), EndLoopNode('N2'))
+    corr.add_edge(MetricsFooterNode('N2'), OtherNode('Footer'))
+    corr.add_edge(MetricsFooterNode('N2'), MetricsNode('End'))
+    corr.add_edge(EndLoopNode("N2"), MetricsNode("End"))
+
+    print_errs(graph, corr)
+
+    assert nx.is_isomorphic(graph, corr)
+
+
+def test_graph_metrics_swizzle_for_part():
+    yaml = """
+    einsum:
+      declaration:
+        Z: []
+        A: [K, M]
+      expressions:
+        - Z[] = A[k, m]
+    mapping:
+      partitioning:
+        Z:
+          (M, K): [flatten()]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: Merger
+          class: Merger
+          attributes:
+            inputs: 16
+            comparator_radix: 16
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Merger
+        bindings:
+        - tensor: A
+          init-ranks: [K, M]
+          final-ranks: [M, K]
+    format:
+      A:
+        default:
+          rank-order: [MK]
+          MK:
+            format: C
+            pbits: 64
+      Z:
+        default:
+          rank-order: []
+    """
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    hardware = Hardware(arch, bindings, program)
+
+    format_ = Format.from_str(yaml)
+
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+    graph = FlowGraph(program, metrics, []).get_graph()
+
+    corr = nx.DiGraph()
+
+    corr.add_edge(LoopNode("MK"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("MK"))
+    corr.add_edge(EndLoopNode("MK"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Graphics"), LoopNode("MK"))
+    corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
+    corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
+    corr.add_edge(OtherNode("Output"), GetRootNode("Z", []))
+    corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
+    corr.add_edge(MetricsNode("Start"), LoopNode("MK"))
+    corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
+    corr.add_edge(GetRootNode("Z", []), OtherNode("Body"))
+    corr.add_edge(
+        SwizzleNode("A", ['M', 'K'], "partitioning"),
+        PartNode("A", ('M', 'K')))
+    corr.add_edge(PartNode("A", ('M', 'K')), OtherNode("Graphics"))
+    corr.add_edge(
+        PartNode("A", ('M', 'K')),
+        SwizzleNode("A", ['MK'], "loop-order"))
+    corr.add_edge(
+        SwizzleNode("A", ['K', 'M'], "metrics"),
+        SwizzleNode("A", ['M', 'K'], "partitioning"))
+    corr.add_edge(
+        SwizzleNode("A", ['MK'], "loop-order"),
+        GetRootNode("A", ['MK']))
+    corr.add_edge(
+        SwizzleNode("A", ['MK'], "loop-order"),
+        OtherNode("Graphics"))
+    corr.add_edge(GetRootNode("A", ['MK']), LoopNode("MK"))
+    corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('MK'))
+    corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('MK'))
+    corr.add_edge(MetricsHeaderNode('MK'), LoopNode('MK'))
+    corr.add_edge(LoopNode('MK'), MetricsNode('Body'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsFooterNode('MK'), MetricsNode('End'))
+    corr.add_edge(EndLoopNode('MK'), MetricsFooterNode('MK'))
+    corr.add_edge(MetricsFooterNode('MK'), OtherNode('Footer'))
+    corr.add_edge(EndLoopNode("MK"), MetricsNode("End"))
+
+    print_errs(graph, corr)
+
+    assert nx.is_isomorphic(graph, corr)
+
+
+def test_graph_metrics_trace_output():
+    yaml = """
+    einsum:
+      declaration:
+        Z: [K, M]
+        A: [K, M]
+      expressions:
+        - Z[k, m] = A[k, m]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: Buffer
+          class: Buffet
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Buffer
+        bindings:
+        - tensor: Z
+          rank: K
+          type: payload
+          style: eager
+          evict-on: root
+          format: default
+    format:
+      Z:
+        default:
+          rank-order: [K, M]
+          K:
+            format: C
+            pbits: 32
+          M:
+            format: C
+            cbits: 32
+            pbits: 64
+    """
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    hardware = Hardware(arch, bindings, program)
+
+    format_ = Format.from_str(yaml)
+
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+    graph = FlowGraph(program, metrics, []).get_graph()
+
+    corr = nx.DiGraph()
+
+    corr.add_edge(LoopNode("K"), LoopNode("M"))
+    corr.add_edge(LoopNode("M"), OtherNode("Body"))
+    corr.add_edge(OtherNode("Body"), EndLoopNode("M"))
+    corr.add_edge(EndLoopNode("M"), EndLoopNode("K"))
+    corr.add_edge(EndLoopNode("K"), OtherNode("Footer"))
+    corr.add_edge(OtherNode("Footer"), MetricsNode("Dump"))
+    corr.add_edge(OtherNode("Graphics"), LoopNode("K"))
+    corr.add_edge(OtherNode("Graphics"), MetricsNode("Start"))
+    corr.add_edge(OtherNode("Output"), OtherNode("Graphics"))
+    corr.add_edge(OtherNode("Output"), GetRootNode("Z", ['K', 'M']))
+    corr.add_edge(MetricsNode("Start"), LoopNode("K"))
+    corr.add_edge(MetricsNode("End"), OtherNode("Footer"))
+    corr.add_edge(GetRootNode("Z", ['K', 'M']), LoopNode("K"))
     corr.add_edge(SwizzleNode(
-        "B", ["K", "N"], "loop-order"), OtherNode("Graphics"))
-    corr.add_edge(GetRootNode("B", ["K", "N"]), LoopNode("K"))
-    corr.add_edge(SwizzleNode("B", ['K', 'N'],
-                  "loop-order"), CollectingNode("B", "K"))
-    corr.add_edge(CollectingNode("B", "K"), MetricsNode("Start"))
+        "A", ['K', 'M'], "loop-order"), GetRootNode("A", ['K', 'M']))
+    corr.add_edge(
+        SwizzleNode(
+            "A", ['K', 'M'], "loop-order"), OtherNode("Graphics"))
+    corr.add_edge(GetRootNode("A", ['K', 'M']), LoopNode("K"))
+    corr.add_edge(LoopNode('K'), MetricsHeaderNode('M'))
+    corr.add_edge(OtherNode('Graphics'), MetricsHeaderNode('K'))
+    corr.add_edge(MetricsNode('Start'), MetricsHeaderNode('K'))
+    corr.add_edge(MetricsHeaderNode('K'), LoopNode('K'))
+    corr.add_edge(MetricsHeaderNode('M'), LoopNode('M'))
+    corr.add_edge(LoopNode('M'), MetricsNode('Body'))
+    corr.add_edge(MetricsNode('Body'), OtherNode('Body'))
+    corr.add_edge(MetricsFooterNode('K'), MetricsNode('End'))
+    corr.add_edge(EndLoopNode('M'), MetricsFooterNode('M'))
+    corr.add_edge(EndLoopNode('K'), MetricsFooterNode('K'))
+    corr.add_edge(MetricsFooterNode('M'), EndLoopNode('K'))
+    corr.add_edge(MetricsFooterNode('K'), OtherNode('Footer'))
+    corr.add_edge(EndLoopNode("K"), MetricsNode("End"))
+
+    print_errs(graph, corr)
 
     assert nx.is_isomorphic(graph, corr)
 
diff --git a/tests/ir/test_flow_nodes.py b/tests/ir/test_flow_nodes.py
index a86af77..d6d47f7 100644
--- a/tests/ir/test_flow_nodes.py
+++ b/tests/ir/test_flow_nodes.py
@@ -1,13 +1,6 @@
 from teaal.ir.flow_nodes import *
 
 
-def test_collecting_node():
-    assert repr(CollectingNode("A", "K")) == "(CollectingNode, A, K)"
-
-    assert CollectingNode("A", "K").get_tensor() == "A"
-    assert CollectingNode("A", "K").get_rank() == "K"
-
-
 def test_eager_input_node():
     assert repr(EagerInputNode("Q1", ["I", "J"])
                 ) == "(EagerInputNode, Q1, ['I', 'J'])"
@@ -16,6 +9,12 @@ def test_eager_input_node():
     assert EagerInputNode("Q1", ["I", "J"]).get_tensors() == ["I", "J"]
 
 
+def test_end_loop_node():
+    assert repr(EndLoopNode("K1")) == "(EndLoopNode, K1)"
+
+    assert EndLoopNode("K1").get_rank() == "K1"
+
+
 def test_fiber_node():
     assert repr(FiberNode("a_k")) == "(FiberNode, a_k)"
 
@@ -55,6 +54,18 @@ def test_loop_node():
     assert LoopNode("K1").get_rank() == "K1"
 
 
+def test_metrics_footer_node():
+    assert repr(MetricsFooterNode("K1")) == "(MetricsFooterNode, K1)"
+
+    assert MetricsFooterNode("K1").get_rank() == "K1"
+
+
+def test_metrics_header_node():
+    assert repr(MetricsHeaderNode("K1")) == "(MetricsHeaderNode, K1)"
+
+    assert MetricsHeaderNode("K1").get_rank() == "K1"
+
+
 def test_metrics_node():
     assert repr(MetricsNode("Start")) == "(MetricsNode, Start)"
 
diff --git a/tests/ir/test_fusion.py b/tests/ir/test_fusion.py
new file mode 100644
index 0000000..a343624
--- /dev/null
+++ b/tests/ir/test_fusion.py
@@ -0,0 +1,304 @@
+import pytest
+
+from teaal.ir.fusion import Fusion
+from teaal.ir.hardware import Hardware
+from teaal.ir.program import Program
+from teaal.parse import *
+
+
+def make_yaml(spacetime, bindings):
+    yaml = """
+    einsum:
+      declaration:
+        A: [K, M]
+        B: [K, N]
+        T: [K, M, N]
+        C: [M, N]
+        Z: [M, N]
+      expressions:
+      - T[k, m, n] = A[k, m] * B[k, n]
+      - Z[m, n] = T[k, m, n] * C[m, n]
+    mapping:
+      loop-order:
+        T: [M, K, N]
+        Z: [M, K, N]
+      spacetime:""" + spacetime + """
+    format:
+      # TODO: allow empty format
+      Z:
+        default:
+          rank-order: [M, N]
+          M:
+            format: C
+          N:
+            format: C
+            pbits: 32
+    architecture:
+      configA:
+      - name: System
+        local:
+        - name: FPMul0
+          class: compute
+          attributes:
+            type: mul
+        - name: FPMul1
+          class: compute
+          attributes:
+            type: mul
+      configB:
+      - name: System
+        local:
+        - name: FPMul
+          class: compute
+          attributes:
+            type: mul
+    bindings:""" + bindings
+
+    return yaml
+
+
+def parse_yamls(yaml):
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+    program.add_einsum(0)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    hardware = Hardware(arch, bindings, program)
+
+    format_ = Format.from_str(yaml)
+
+    return program, hardware, format_
+
+
+def test_no_spacetime():
+    spacetime = ""
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      Z:
+      - config: configB
+        prefix: tmp/Z
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+
+    with pytest.raises(ValueError) as excinfo:
+        fusion.add_einsum(program)
+    assert str(
+        excinfo.value) == "Undefined spacetime for Einsum T"
+
+
+def test_add_einsum_diff_configs():
+    spacetime = """
+        T:
+          space: [N]
+          time: [M, K]
+        Z:
+          space: [N]
+          time: [M, K]
+    """
+
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      Z:
+      - config: configB
+        prefix: tmp/Z
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+    fusion.add_einsum(program)
+
+    program.add_einsum(1)
+    fusion.add_einsum(program)
+
+    assert fusion.get_blocks() == [["T"], ["Z"]]
+
+
+def test_add_einsum_diff_temporal_ranks():
+    spacetime = """
+        T:
+          space: [N]
+          time: [M, K]
+        Z:
+          space: [K]
+          time: [M, N]
+    """
+
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      Z:
+      - config: configA
+        prefix: tmp/Z
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+    fusion.add_einsum(program)
+
+    program.add_einsum(1)
+    fusion.add_einsum(program)
+
+    assert fusion.get_blocks() == [["T"], ["Z"]]
+
+
+def test_add_einsum_diff_temporal_ranks():
+    spacetime = """
+        T:
+          space: [N]
+          time: [M, K]
+        Z:
+          space: [K]
+          time: [M, N]
+    """
+
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      Z:
+      - config: configA
+        prefix: tmp/Z
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+    fusion.add_einsum(program)
+
+    program.add_einsum(1)
+    fusion.add_einsum(program)
+
+    assert fusion.get_blocks() == [["T"], ["Z"]]
+
+
+def test_add_einsum_overlapping_components():
+    spacetime = """
+        T:
+          space: [N]
+          time: [M, K]
+        Z:
+          space: [K]
+          time: [M, N]
+    """
+
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      - component: FPMul0
+        bindings:
+        - op: mul
+      Z:
+      - config: configA
+        prefix: tmp/Z
+      - component: FPMul0
+        bindings:
+        - op: mul
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+    fusion.add_einsum(program)
+
+    program.add_einsum(1)
+    fusion.add_einsum(program)
+
+    assert fusion.get_blocks() == [["T"], ["Z"]]
+
+
+def test_add_einsum_fused():
+    spacetime = """
+        T:
+          space: [N]
+          time: [M, K]
+        Z:
+          space: [N]
+          time: [M, K]
+    """
+
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      - component: FPMul0
+        bindings:
+        - op: mul
+      Z:
+      - config: configA
+        prefix: tmp/Z
+      - component: FPMul1
+        bindings:
+        - op: mul
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+    fusion.add_einsum(program)
+
+    program.add_einsum(1)
+    fusion.add_einsum(program)
+
+    assert fusion.get_blocks() == [["T", "Z"]]
+
+
+def test_add_components():
+    spacetime = """
+        T:
+          space: [N]
+          time: [M, K]
+        Z:
+          space: [N]
+          time: [M, K]
+    """
+
+    bindings = """
+      T:
+      - config: configA
+        prefix: tmp/T
+      - component: FPMul0
+        bindings:
+        - op: mul
+      - component: FPMul1
+        bindings:
+        - op: mul
+      Z:
+      - config: configA
+        prefix: tmp/Z
+    """
+    yaml = make_yaml(spacetime, bindings)
+
+    program, hardware, format_ = parse_yamls(yaml)
+    fusion = Fusion(hardware)
+
+    program.add_einsum(0)
+    fusion.add_einsum(program)
+    fusion.add_component("T", "FPMul0")
+    fusion.add_component("T", "FPMul1")
+
+    assert fusion.get_components("T") == ["FPMul0", "FPMul1"]
diff --git a/tests/ir/test_hardware.py b/tests/ir/test_hardware.py
index b88b78d..306e495 100644
--- a/tests/ir/test_hardware.py
+++ b/tests/ir/test_hardware.py
@@ -3,334 +3,521 @@
 from teaal.ir.component import *
 from teaal.ir.hardware import Hardware
 from teaal.ir.level import Level
+from teaal.ir.program import Program
 from teaal.parse import *
 
 
+def build_outerspace_yaml():
+    with open("tests/integration/outerspace.yaml", "r") as f:
+        return f.read()
+
+
+def parse_yamls(yaml):
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    program = Program(einsum, mapping)
+    program.add_einsum(0)
+
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+
+    return Hardware(arch, bindings, program)
+
+
 def test_no_arch():
-    arch = Architecture.from_str("")
-    bindings = Bindings.from_str("")
+    yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
+    bindings:
+      Z:
+      - config: arch
+        prefix: tmp/Z
+    """
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
 
     with pytest.raises(ValueError) as excinfo:
-        Hardware(arch, bindings)
+        Hardware(arch, bindings, program)
     assert str(excinfo.value) == "Empty architecture specification"
 
 
 def test_bad_arch():
     yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
     architecture:
-      subtree:
-      - name: foo
+      config0:
+      -   name: foo
       - name: bar
+    bindings:
+      Z:
+      - config: config0
+        prefix: tmp/Z
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
 
     with pytest.raises(ValueError) as excinfo:
-        Hardware(arch, bindings)
-    assert str(excinfo.value) == "Architecture must have a single root level"
+        Hardware(arch, bindings, program)
+    assert str(
+        excinfo.value) == "Configuration config0 must have a single root level"
 
 
 def test_bad_component():
     yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
     architecture:
-      subtree:
+      accel:
       - name: System
         local:
         - name: BAD
           class: foo
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
 
     with pytest.raises(ValueError) as excinfo:
-        Hardware(arch, bindings)
+        Hardware(arch, bindings, program)
     assert str(excinfo.value) == "Unknown class: foo"
 
 
+def test_bad_intersector():
+    yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
+    architecture:
+      accel:
+      - name: System
+        local:
+        - name: BAD
+          class: Intersector
+          attributes:
+            type: foo
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+    """
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+
+    with pytest.raises(ValueError) as excinfo:
+        Hardware(arch, bindings, program)
+    assert str(excinfo.value) == "Unknown intersection type: foo"
+
+
 def test_no_binding():
     yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
     architecture:
-      subtree:
+      arch:
       - name: System
         local:
         - name: Cache
           class: Cache
+    bindings:
+      Z:
+      - config: arch
+        prefix: tmp/Z
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    hardware = Hardware(arch, bindings, program)
 
-    cache = CacheComponent("Cache", {}, [])
+    cache = CacheComponent("Cache", 1, {}, {})
     assert hardware.get_component("Cache") == cache
 
 
 def test_get_component():
     yaml = """
+    einsum:
+      declaration:
+        A: [K, M]
+        B: [K, N]
+        T: [K, M, N]
+        Z: [M, N]
+      expressions:
+        - T[k,m,n] = take(A[k,m], B[k,n], 1)
+        - Z[m,n] = T[k,m,n] * A[k,m]
+    mapping:
+      rank-order:
+        A: [M, K]
+        B: [K, N]
+        T: [M, K, N]
+        Z: [M, N]
+      partitioning:
+        T:
+          M: [uniform_occupancy(A.32)]
+          K: [uniform_occupancy(A.64)]
+        Z:
+          M: [uniform_occupancy(A.32)]
+          K: [uniform_occupancy(A.64)]
+      loop-order:
+        T: [M1, M0, K1, K0, N]
+        Z: [M1, M0, K1, N, K0]
+      spacetime:
+        T:
+          space: [M0, K1]
+          time: [M1, K0, N]
+        Z:
+          space: [M0, K1]
+          time: [M1, N, K0]
     architecture:
-      subtree:
+      Accelerator:
       - name: Base
         local:
         - name: LLB
           class: Buffet
+          attributes:
+            width: 64
+            depth: 3145728
 
         - name: FiberCache
           class: Cache
           attributes:
-            width: 8
+            width: 64
             depth: 3145728
 
         - name: Compute
           class: Compute
+          attributes:
+            type: mul
 
         - name: Memory
           class: DRAM
           attributes:
-            datawidth: 8
             bandwidth: 128
 
         - name: LFIntersect
-          class: LeaderFollower
+          class: Intersector
+          attributes:
+            type: leader-follower
 
         - name: HighRadixMerger
           class: Merger
           attributes:
-            radix: 64
-            next_latency: 1
+            inputs: 64
+            comparator_radix: 64
+            outputs: 1
+            order: fifo
+            reduce: False
+
+        - name: TopSequencer
+          class: Sequencer
+          attributes:
+            num_ranks: 3
 
         - name: SAIntersect
-          class: SkipAhead
+          class: Intersector
+          attributes:
+            type: skip-ahead
+
+        - name: TFIntersect
+          class: Intersector
+          attributes:
+            type: two-finger
 
     bindings:
-      - name: LLB
+      T:
+      - config: Accelerator
+        prefix: tmp/T
+      - component: LLB
         bindings:
         - tensor: A
           rank: K2
+          format: default
+          type: payload
+          evict-on: root
         - tensor: B
           rank: K2
-        - tensor: Z
-          rank: N2
+          format: default
+          type: payload
+          evict-on: root
 
-      - name: FiberCache
+      - component: FiberCache
         bindings:
         - tensor: B
           rank: K
+          format: default
+          type: payload
 
-      - name: Compute
-        bindings:
-        - einsum: Z
-          op: mul
-        - einsum: Z
-          op: add
-
-      - name: Memory
+      - component: Memory
         bindings:
         - tensor: A
-          rank: root
+          rank: K2
+          format: default
+          type: payload
         - tensor: B
-          rank: root
-        - tensor: Z
-          rank: root
+          rank: K2
+          format: default
+          type: payload
 
-      - name: LFIntersect
+      - component: LFIntersect
         bindings:
-        - einsum: T
-          rank: K
+        - rank: K
           leader: A
 
-      - name: HighRadixMerger
+      Z:
+      - config: Accelerator
+        prefix: tmp/Z
+      - component: LLB
+        bindings:
+        - tensor: Z
+          rank: N2
+          format: default
+          type: payload
+          evict-on: root
+
+      - component: Compute
+        bindings:
+        - op: mul
+
+      - component: Memory
+        bindings:
+        - tensor: Z
+          rank: N2
+          format: default
+          type: payload
+
+      - component: HighRadixMerger
         bindings:
         - tensor: T
-          init_ranks: [M, K, N]
-          swap_depth: 1
+          init-ranks: [M, K, N]
+          final-ranks: [M, N, K]
 
-      - name: SAIntersect
+      - component: TopSequencer
         bindings:
-        - einsum: Z
-          rank: K2
+        - rank: M2
+        - rank: K2
+        - rank: N1
+
+      - component: SAIntersect
+        bindings:
+        - rank: K2
+
+      - component: TFIntersect
+        bindings:
+        - rank: K1
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    hardware = Hardware(arch, bindings, program)
 
     def assert_component(type_, name, attrs):
-        binding = bindings.get(name)
-        component = type_(name, attrs, binding)
+        binding = bindings.get_component(name)
+        component = type_(name, 1, attrs, binding)
+
         assert hardware.get_component(name) == component
 
-    assert_component(BuffetComponent, "LLB", {})
+    attrs = {"width": 64, "depth": 3145728}
+    assert_component(BuffetComponent, "LLB", attrs)
 
-    attrs = {"width": 8, "depth": 3145728}
+    attrs = {"width": 64, "depth": 3145728}
     assert_component(CacheComponent, "FiberCache", attrs)
 
-    assert_component(ComputeComponent, "Compute", {})
+    assert_component(ComputeComponent, "Compute", {"type": "mul"})
 
     attrs = {"datawidth": 8, "bandwidth": 128}
     assert_component(DRAMComponent, "Memory", attrs)
 
     assert_component(LeaderFollowerComponent, "LFIntersect", {})
 
-    attrs = {"radix": 64, "next_latency": 1}
+    attrs = {
+        "inputs": 64,
+        "comparator_radix": 64,
+        "outputs": 1,
+        "order": "fifo",
+        "reduce": False
+    }
     assert_component(MergerComponent, "HighRadixMerger", attrs)
 
-    assert_component(SkipAheadComponent, "SAIntersect", {})
-
+    attrs = {"num_ranks": 3}
+    assert_component(SequencerComponent, "TopSequencer", attrs)
 
-def test_bad_compute_path():
-    yaml = """
-    architecture:
-      subtree:
-      - name: System
-
-        subtree:
-        - name: Stage0
-          local:
-          - name: BAD0
-            class: compute
-
-        - name: Stage1
-          local:
-          - name: BAD1
-            class: compute
-
-    bindings:
-      - name: BAD0
-        bindings:
-        - einsum: Z
-          op: mul
-      - name: BAD1
-        bindings:
-        - einsum: Z
-          op: add
-    """
-    arch = Architecture.from_str(yaml)
-    bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
-
-    with pytest.raises(ValueError) as excinfo:
-        hardware.get_compute_path("Z")
-    assert str(excinfo.value) == "Only one compute path allowed per einsum"
-
-
-def test_get_compute_path():
-    arch = Architecture.from_file("tests/integration/test_arch.yaml")
-    bindings = Bindings.from_file("tests/integration/test_bindings.yaml")
-    hardware = Hardware(arch, bindings)
-
-    system = hardware.get_tree()
-    pe = system.get_subtrees()[0]
+    assert_component(SkipAheadComponent, "SAIntersect", {})
 
-    assert hardware.get_compute_path("Z") == [system, pe]
-    assert hardware.get_compute_path("T") == []
+    assert_component(TwoFingerComponent, "TFIntersect", {})
 
 
-def test_get_compute_components():
+def test_get_components():
     yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+        X: [M]
+        A: [K, M]
+        D: [J, M]
+      expressions:
+      - Z[m] = A[k, m]
+      - X[m] = D[j, m]
+
     architecture:
-      subtree:
+      accel:
       - name: System
 
         local:
         - name: Intersect0
-          class: SkipAhead
+          class: Intersector
+          attributes:
+            type: skip-ahead
 
         subtree:
         - name: PE
 
           local:
           - name: Intersect1
-            class: SkipAhead
+            class: Intersector
+            attributes:
+              type: skip-ahead
 
           - name: MAC
             class: compute
+            attributes:
+              type: add
 
     bindings:
-      - name: Intersect0
+      Z:
+      - config: accel
+        prefix: tmp/Z
+
+      - component: Intersect0
         bindings:
-        - einsum: Z
-          rank: K
+        - rank: K
 
-      - name: Intersect1
+      - component: MAC
         bindings:
-        - einsum: X
-          rank: J
+        - op: add
 
-      - name: MAC
+      X:
+      - config: accel
+        prefix: tmp/X
+      - component: Intersect1
         bindings:
-        - einsum: Z
-          op: add
+        - rank: J
+
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    hardware = Hardware(arch, bindings, program)
 
     intersect = SkipAheadComponent(
-        "Intersect0", {}, bindings.get("Intersect0"))
-    mac = ComputeComponent("MAC", {}, bindings.get("MAC"))
+        "Intersect0", 1, {}, bindings.get_component("Intersect0"))
+    mac = ComputeComponent("MAC", 1,
+                           {"type": "add"},
+                           bindings.get_component("MAC"))
+
+    assert hardware.get_components(
+        "Z", FunctionalComponent) == [
+        intersect, mac]
+
 
-    assert hardware.get_compute_components("Z") == [intersect, mac]
+def test_get_config():
+    yaml = build_outerspace_yaml()
+    hardware = parse_yamls(yaml)
 
+    assert hardware.get_config("T0") == "MultiplyPhase"
+    assert hardware.get_config("T1") == "MergePhase"
+    assert hardware.get_config("Z") == "MergePhase"
 
-def test_get_merger_components():
+
+def test_get_frequency_unspecified():
     yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
     architecture:
-      subtree:
+      accel:
       - name: System
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+    """
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    hardware = Hardware(arch, bindings, program)
 
-        subtree:
-        - name: SwapStage0
-          local:
-          - name: Merger0
-            class: Merger
-            attributes:
-              radix: 64
-              next_latency: 1
-
-        - name: ComputeStage
-          local:
-          - name: Compute
-            class: compute
+    with pytest.raises(ValueError) as excinfo:
+        hardware.get_frequency("Z")
+    assert str(excinfo.value) == "Unspecified clock frequency for config accel"
 
-        - name: SwapStage1
-          local:
-          - name: Merger1
-            class: Merger
-            attributes:
-              radix: 64
-              next_latency: 1
 
+def test_get_frequency_bad():
+    yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+      expressions:
+      - Z[m] = a
+    architecture:
+      accel:
+      - name: System
+        attributes:
+          clock_frequency: foo
     bindings:
-      - name: Merger0
-        bindings:
-        - tensor: T
-          init_ranks: [M, K, N]
-          swap_depth: 1
-
-      - name: Compute
-        bindings:
-        - einsum: Z
-          op: add
-
-      - name: Merger1
-        bindings:
-        - tensor: Z
-          init_ranks: [N, M]
-          swap_depth: 0
+      Z:
+      - config: accel
+        prefix: tmp/Z
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    hardware = Hardware(arch, bindings, program)
+
+    with pytest.raises(ValueError) as excinfo:
+        hardware.get_frequency("Z")
+    assert str(excinfo.value) == "Bad clock frequency for config accel"
 
-    attrs = {"radix": 64, "next_latency": 1}
-    merger0 = MergerComponent("Merger0", attrs, bindings.get("Merger0"))
-    merger1 = MergerComponent("Merger1", attrs, bindings.get("Merger1"))
 
-    assert hardware.get_merger_components() == [merger0, merger1]
+def test_get_frequency():
+    yaml = build_outerspace_yaml()
+    hardware = parse_yamls(yaml)
+
+    assert hardware.get_frequency("Z") == 1500000000
 
 
 def test_get_traffic_path_multiple_bindings():
     yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+        A: [M]
+      expressions:
+      - Z[m] = A[m]
+
     architecture:
-      subtree:
+      accel:
       - name: BAD
 
         local:
@@ -342,36 +529,56 @@ def test_get_traffic_path_multiple_bindings():
 
         - name: Compute
           class: compute
+          attributes:
+            type: add
 
     bindings:
-      - name: Memory0
+      Z:
+      - config: accel
+        prefix: tmp/Z
+
+      - component: Memory0
         bindings:
         - tensor: A
-          rank: root
+          rank: M
+          type: payload
+          format: default
 
-      - name: Memory1
+      - component: Memory1
         bindings:
         - tensor: A
-          rank: root
+          rank: M
+          type: payload
+          format: default
 
-      - name: Compute
+      - component: Compute
         bindings:
-        - einsum: Z
-          op: add
+        - op: add
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    program.add_einsum(0)
+    hardware = Hardware(arch, bindings, program)
 
     with pytest.raises(ValueError) as excinfo:
-        hardware.get_traffic_path("Z", "A")
-    assert str(excinfo.value) == "Multiple bindings for einsum Z and tensor A"
+        hardware.get_traffic_path("A", "M", "payload", "default")
+    assert str(excinfo.value) == "Multiple traffic paths for tensor A in Einsum Z"
 
 
 def test_get_traffic_path():
     yaml = """
+    einsum:
+      declaration:
+        A: [M]
+        B: [M, K]
+        X: [M]
+        Z: [M]
+      expressions:
+      - X[m] = A[m] * B[m, k]
+      - Z[m] = A[m] + B[m]
     architecture:
-      subtree:
+      accel:
       - name: System
 
         local:
@@ -382,7 +589,9 @@ def test_get_traffic_path():
         - name: Stages
           local:
           - name: Intersection
-            class: SkipAhead
+            class: Intersector
+            attributes:
+              type: skip-ahead
 
           - name: LLB
             class: Buffet
@@ -395,6 +604,8 @@ def test_get_traffic_path():
 
             - name: MAC0
               class: compute
+              attributes:
+                type: mul
 
           - name: Stage1
             local:
@@ -403,6 +614,8 @@ def test_get_traffic_path():
 
             - name: MAC1
               class: compute
+              attributes:
+                type: mul
 
           - name: Stage2
             local:
@@ -411,76 +624,160 @@ def test_get_traffic_path():
 
             - name: MAC2
               class: compute
+              attributes:
+                type: mul
 
     bindings:
-      - name: Memory
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Memory
         bindings:
         - tensor: A
-          rank: root
+          rank: M
+          format: default
+          type: payload
+          evict-on: root
+
         - tensor: Z
-          rank: root
+          rank: M
+          format: default
+          type: payload
+          evict-on: root
 
-      - name: S0B
+      - component: S0B
         bindings:
         - tensor: A
           rank: M
+          format: default
+          type: payload
+          evict-on: root
         - tensor: Z
           rank: M
+          format: default
+          type: payload
+          evict-on: root
 
-      - name: MAC0
+      - component: MAC0
         bindings:
-        - einsum: A
-          op: mul
+        - op: mul
 
-      - name: S1B
+      - component: S1B
         bindings:
         - tensor: Z
           rank: M
-
-      - name: MAC1
+          format: default
+          type: coord
+          evict-on: root
+
+      X:
+      - config: accel
+        prefix: tmp/X
+      - component: MAC1
         bindings:
-        - einsum: X
-          op: add
+        - op: add
 
-      - name: S2B
+      - component: S2B
         bindings:
         - tensor: A
           rank: M
-        - tensor: Z
+          format: default
+          type: payload
+          evict-on: root
+        - tensor: X
           rank: M
-
-      - name: MAC2
-        bindings:
-        - einsum: Z
-          op: add
+          format: default
+          type: payload
+          evict-on: root
     """
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    program.add_einsum(1)
+    hardware = Hardware(arch, bindings, program)
+
+    mem = DRAMComponent("Memory", 1, {}, bindings.get_component("Memory"))
+    s0b = BuffetComponent("S0B", 1, {}, bindings.get_component("S0B"))
+    s1b = BuffetComponent("S1B", 1, {}, bindings.get_component("S1B"))
+    s2b = BuffetComponent("S2B", 1, {}, bindings.get_component("S2B"))
+
+    assert hardware.get_traffic_path(
+        "A", "M", "payload", "default") == [(mem, "lazy"), (s0b, "lazy")]
+    assert hardware.get_traffic_path(
+        "Z", "M", "payload", "default") == [(mem, "lazy"), (s0b, "lazy")]
+    assert hardware.get_traffic_path(
+        "Z", "M", "coord", "default") == [(s1b, "lazy")]
+
+    program.add_einsum(0)
+    assert hardware.get_traffic_path("B", "M", "payload", "default") == []
+
 
-    mem = DRAMComponent("Memory", {}, bindings.get("Memory"))
-    s0b = BuffetComponent("S0B", {}, bindings.get("S0B"))
-    s1b = BuffetComponent("S1B", {}, bindings.get("S1B"))
-    s2b = BuffetComponent("S2B", {}, bindings.get("S2B"))
+def test_get_traffic_eager():
+    extensor = "tests/integration/extensor.yaml"
+    arch = Architecture.from_file(extensor)
+    bindings = Bindings.from_file(extensor)
+    program = Program(Einsum.from_file(extensor), Mapping.from_file(extensor))
+    program.add_einsum(0)
+    hardware = Hardware(arch, bindings, program)
 
-    assert hardware.get_traffic_path("A", "A") == [mem, s0b]
-    assert hardware.get_traffic_path("Z", "A") == [mem, s2b]
-    assert hardware.get_traffic_path("Z", "Z") == [mem, s2b]
-    assert hardware.get_traffic_path("X", "B") == []
+    dram = hardware.get_component("MainMemory")
+    llb = hardware.get_component("LLB")
+
+    ranks = ["K2", "M2", "M1", "K1", "M0", "K0"]
+    types = [[], [], [], ["coord"], ["coord", "payload"], ["coord", "payload"]]
+    llb.expand_eager("Z", "A", "default", ranks, types)
+
+    assert hardware.get_traffic_path(
+        "A", "K1", "coord", "default") == [
+        (dram, "lazy"), (llb, "lazy")]
+    assert hardware.get_traffic_path(
+        "A", "K0", "coord", "default") == [
+        (dram, "lazy"), (llb, "M0")]
+
+
+def test_get_prefix():
+    gamma = "tests/integration/gamma.yaml"
+    arch = Architecture.from_file(gamma)
+    bindings = Bindings.from_file(gamma)
+    program = Program(Einsum.from_file(gamma), Mapping.from_file(gamma))
+    hardware = Hardware(arch, bindings, program)
+
+    assert hardware.get_prefix("T") == "tmp/gamma_T"
+    assert hardware.get_prefix("Z") == "tmp/gamma_Z"
 
 
 def test_get_tree():
+    yaml = """
+    einsum:
+      declaration:
+        A: [M]
+        Z: [M]
+      expressions:
+      - Z[m] = A[m]
+    """
     arch = Architecture.from_file("tests/integration/test_arch.yaml")
     bindings = Bindings.from_file("tests/integration/test_bindings.yaml")
-    hardware = Hardware(arch, bindings)
-
-    regs = BuffetComponent("Registers", {}, bindings.get("Registers"))
-    mac = ComputeComponent("MAC", {}, bindings.get("MAC"))
+    program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+    hardware = Hardware(arch, bindings, program)
+
+    regs = BuffetComponent(
+        "Registers", 8,
+        {},
+        bindings.get_component("Registers"))
+    mac = ComputeComponent("MAC", 8,
+                           {"type": "mul"},
+                           bindings.get_component("MAC"))
     pe = Level("PE", 8, {}, [regs, mac], [])
 
     mem_attrs = {"datawidth": 8, "bandwidth": 128}
-    mem = DRAMComponent("Memory", mem_attrs, bindings.get("Memory"))
+    mem = DRAMComponent(
+        "Memory",
+        1,
+        mem_attrs,
+        bindings.get_component("Memory"))
     attrs = {"clock_frequency": 10 ** 9}
 
     tree = Level("System", 1, attrs, [mem], [pe])
+
+    program.add_einsum(0)
     assert hardware.get_tree() == tree
diff --git a/tests/ir/test_level.py b/tests/ir/test_level.py
index dac2bd2..a368182 100644
--- a/tests/ir/test_level.py
+++ b/tests/ir/test_level.py
@@ -4,8 +4,9 @@
 
 def build_local():
     attrs = {"datawidth": 8, "bandwidth": 128}
-    bindings = [{"tensor": "A", "rank": "M"}]
-    return DRAMComponent("DRAM", attrs, bindings)
+    bindings = {"Z": [{"tensor": "A", "rank": "M",
+                       "format": "default", "type": "payload"}]}
+    return DRAMComponent("DRAM", 1, attrs, bindings)
 
 
 def build_level():
@@ -19,7 +20,7 @@ def build_level():
 
 
 def build_subtree():
-    return Level("PE", 8, {}, [ComputeComponent("MAC", {}, [])], [])
+    return Level("PE", 8, {}, [FunctionalComponent("MAC", 8, {}, {})], [])
 
 
 def test_get_attr():
@@ -53,6 +54,6 @@ def test_eq():
 
 def test_repr():
     level = build_level()
-    repr_ = "(Level, System, 1, {'clock_frequency': 1000000000}, [(DRAMComponent, DRAM, {'datawidth': 8, 'bandwidth': 128}, {'A': 'M'})], [(Level, PE, 8, {}, [(ComputeComponent, MAC, {}, {})], [])])"
+    repr_ = "(Level, System, 1, {'clock_frequency': 1000000000}, [(DRAMComponent, DRAM, 1, {'Z': [{'tensor': 'A', 'rank': 'M', 'format': 'default', 'type': 'payload'}]}, 128)], [(Level, PE, 8, {}, [(FunctionalComponent, MAC, 8, {})], [])])"
 
     assert repr(level) == repr_
diff --git a/tests/ir/test_metrics.py b/tests/ir/test_metrics.py
index 9a9ad24..a61fa0c 100644
--- a/tests/ir/test_metrics.py
+++ b/tests/ir/test_metrics.py
@@ -8,17 +8,27 @@
 from teaal.parse import *
 
 
+def build_extensor_yaml():
+    with open("tests/integration/extensor.yaml", "r") as f:
+        return f.read()
+
+
+def build_extensor_energy_yaml():
+    with open("tests/integration/extensor-energy.yaml", "r") as f:
+        return f.read()
+
+
 def build_gamma_yaml():
     with open("tests/integration/gamma.yaml", "r") as f:
         return f.read()
 
 
-def build_metrics():
-    yaml = build_gamma_yaml()
-    return Metrics(*build_program_hardware(yaml))
+def build_sigma_yaml():
+    with open("tests/integration/sigma.yaml", "r") as f:
+        return f.read()
 
 
-def build_program_hardware(yaml):
+def parse_yamls(yaml):
     einsum = Einsum.from_str(yaml)
     mapping = Mapping.from_str(yaml)
     program = Program(einsum, mapping)
@@ -26,398 +36,1019 @@ def build_program_hardware(yaml):
 
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
-
     format_ = Format.from_str(yaml)
 
-    return program, hardware, format_
+    return program, arch, bindings, format_
 
 
-def test_check_configuration_no_dyn_part():
+def test_used_traffic_paths():
     yaml = """
     einsum:
       declaration:
-        A: [M]
-        Z: [M]
+        A: [M, N]
+        Z: [M, N]
       expressions:
-        - Z[m] = A[m]
-    mapping:
-      partitioning:
-        Z:
-          M: [uniform_occupancy(A.10)]
-
+      - Z[m, n] = A[m, n]
     architecture:
-      subtree:
+      accel:
       - name: System
         local:
-        - name: Compute
-          class: compute
-
-    binding:
-    - name: Compute
-      bindings:
-      - einsum: Z
-        op: add
+        - name: Memory
+          class: DRAM
+        subtree:
+        - name: PE
+          local:
+          - name: Registers
+            class: Buffet
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Memory
+        bindings:
+        - tensor: A
+          rank: N
+          type: payload
+          format: default0
+        - tensor: A
+          rank: N
+          type: payload
+          format: default1
+      - component: Registers
+        bindings:
+        - tensor: A
+          rank: N
+          type: payload
+          format: default0
+          evict-on: M
+        - tensor: A
+          rank: N
+          type: payload
+          format: default1
+          evict-on: M
+    format:
+      A:
+        default0:
+          rank-order: [M, N]
+          M:
+            format: U
+          N:
+            format: U
+            pbits: 32
+        default1:
+          rank-order: [M, N]
+          M:
+            format: U
+          N:
+            format: U
+            pbits: 32
     """
-    program, hardware, format_ = build_program_hardware(yaml)
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
 
-    with pytest.raises(NotImplementedError):
+    with pytest.raises(ValueError) as excinfo:
         Metrics(program, hardware, format_)
+    assert str(
+        excinfo.value) in {
+        "Multiple potential formats {'default0', 'default1'} for tensor A in Einsum Z",
+        "Multiple potential formats {'default1', 'default0'} for tensor A in Einsum Z"}
 
 
-def test_check_configuration_three_tensors():
+def test_expand_eager():
+    program, arch, bindings, format_ = parse_yamls(build_extensor_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    bindings = {'Z': [
+        {'tensor': 'A', 'rank': 'K1', 'type': 'coord', 'evict-on': 'M2', 'format': 'default', 'style': 'lazy'},
+        {'tensor': 'A', 'rank': 'M0', 'type': 'coord', 'evict-on': 'M2', 'format': 'default', 'style': 'eager', 'root': 'M0'},
+        {'tensor': 'B', 'rank': 'N1', 'type': 'coord', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'},
+        {'tensor': 'B', 'rank': 'N1', 'type': 'payload', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'},
+        {'tensor': 'B', 'rank': 'K1', 'type': 'coord', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'},
+        {'tensor': 'B', 'rank': 'K1', 'type': 'payload', 'evict-on': 'K2', 'format': 'default', 'style': 'lazy'},
+        {'tensor': 'B', 'rank': 'N0', 'type': 'coord', 'evict-on': 'K2', 'format': 'default', 'style': 'eager', 'root': 'N0'},
+        {'tensor': 'Z', 'rank': 'M0', 'type': 'coord', 'evict-on': 'M2', 'format': 'default', 'style': 'eager', 'root': 'M0'},
+        {'tensor': 'Z', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'N0', 'type': 'coord'},
+        {'tensor': 'Z', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'N0', 'type': 'payload'},
+        {'tensor': 'A', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'M0', 'type': 'payload'},
+        {'tensor': 'A', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'K0', 'type': 'coord'},
+        {'tensor': 'A', 'evict-on': 'M2', 'style': 'eager', 'format': 'default', 'root': 'M0', 'rank': 'K0', 'type': 'payload'},
+        {'tensor': 'B', 'evict-on': 'K2', 'style': 'eager', 'format': 'default', 'root': 'N0', 'rank': 'N0', 'type': 'payload'},
+        {'tensor': 'B', 'evict-on': 'K2', 'style': 'eager', 'format': 'default', 'root': 'N0', 'rank': 'K0', 'type': 'coord'},
+        {'tensor': 'B', 'evict-on': 'K2', 'style': 'eager', 'format': 'default', 'root': 'N0', 'rank': 'K0', 'type': 'payload'}]}
+
+    assert hardware.get_component("LLB").get_bindings()["Z"] == bindings["Z"]
+
+
+def test_expand_eager_elem():
     yaml = """
     einsum:
       declaration:
-        A: [M]
-        B: [M]
-        C: [M]
-        Z: [M]
+        Z: []
+        A: [K, M]
+        B: [K]
       expressions:
-        - Z[m] = A[m] * B[m] * C[m]
-
+      - Z[] = A[k, m] * B[k]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Compute
-          class: compute
-
-    binding:
-    - name: Compute
-      bindings:
-      - einsum: Z
-        op: add
+        - name: Buffer
+          class: Buffet
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Buffer
+        bindings:
+        - tensor: A
+          rank: K
+          type: payload
+          evict-on: root
+          format: default
+          style: eager
+    format:
+      A:
+        default:
+          rank-order: [K, M]
+          K:
+            format: C
+            pbits: 32
+          M:
+            format: C
+            cbits: 32
+            pbits: 32
+            layout: interleaved
     """
-    program, hardware, format_ = build_program_hardware(yaml)
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    with pytest.raises(NotImplementedError):
-        Metrics(program, hardware, format_)
+    bindings = {'Z': [{'tensor': 'A',
+                       'rank': 'K',
+                       'type': 'payload',
+                       'evict-on': 'root',
+                       'format': 'default',
+                       'style': 'eager',
+                       'root': 'K'},
+                      {'tensor': 'A',
+                       'evict-on': 'root',
+                       'style': 'eager',
+                       'format': 'default',
+                       'root': 'K',
+                       'rank': 'M',
+                       'type': 'elem'}]}
+
+    assert hardware.get_component("Buffer").get_bindings() == bindings
+
+
+def test_get_coiter():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_coiter("K") == hardware.get_component("Intersect")
 
 
-def test_not_loaded_on_chip():
+def test_get_coiter_traces_leader_follower():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_coiter_traces("Intersect", "K") == ["intersect_2"]
+
+
+def test_get_coiter_traces_two_finger_more_than_two():
     yaml = """
     einsum:
       declaration:
-        Z: [M]
+        Z: []
+        A: [K]
+        B: [K]
+        C: [K]
       expressions:
-
-        - Z[m] = a
-
+      - Z[] = A[k] * B[k] * C[k]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Memory
-          class: DRAM
-
-        subtree:
-        - name: PE
-          local:
-          - name: MAC
-            class: compute
-
+        - name: Intersect
+          class: Intersector
+          attributes:
+            type: two-finger
     bindings:
-    - name: Memory
-      bindings:
-      - tensor: Z
-        rank: M
-
-    - name: MAC
-      bindings:
-      - einsum: Z
-        op: add
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Intersect
+        bindings:
+        - rank: K
+    # TODO: Allow the format to be empty
+    format:
+      Z:
+        default:
+          rank-order: []
     """
-    program, hardware, format_ = build_program_hardware(yaml)
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
 
-    with pytest.raises(ValueError) as excinfo:
+    with pytest.raises(NotImplementedError):
         Metrics(program, hardware, format_)
-    assert str(excinfo.value) == "Tensor Z never buffered on chip"
 
 
-def test_not_implemented_root_not_in_dram():
+def test_get_coiter_traces_two_finger():
     yaml = """
     einsum:
       declaration:
-        Z: [M]
+        Z: []
+        A: [K]
+        B: [K]
       expressions:
-        - Z[m] = a
-
+      - Z[] = A[k] * B[k]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Memory
-          class: DRAM
+        - name: Intersect
+          class: Intersector
+          attributes:
+            type: two-finger
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Intersect
+        bindings:
+        - rank: K
+    # TODO: Allow the format to be empty
+    format:
+      Z:
+        default:
+          rank-order: []
+    """
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-        subtree:
-        - name: PE
-          local:
-          - name: Buffer
-            class: Buffet
+    assert metrics.get_coiter_traces("Intersect", "K") == [
+        "intersect_0", "intersect_1"]
+
+
+def test_get_collected_iter_info():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_collected_iter_info() == set()
+
+    program, arch, bindings, format_ = parse_yamls(
+        build_extensor_energy_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_collected_iter_info() == {
+        "N2", "K2", "M2", "M1", "N1", "K1", "M0", "N0", "K0"}
+
+
+def test_get_collected_tensor_info():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_collected_tensor_info("A") == {("K", "fiber", False), (
+        "M", "iter", False), ("M", "fiber", False), ("K", "iter", False), ("K", "fiber", True)}
+    assert metrics.get_collected_tensor_info("B") == {(
+        "N", "iter", False), ("K", "fiber", False), ("N", "fiber", False), ("K", "iter", False)}
+    assert metrics.get_collected_tensor_info("T") == set()
+
+    program.reset()
+    program.add_einsum(1)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_collected_tensor_info("A") == {(
+        "K", "fiber", False), ("M", "iter", False), ("M", "fiber", False), ("K", "iter", False)}
+    assert metrics.get_collected_tensor_info("T") == set()
+    assert metrics.get_collected_tensor_info("Z") == {(
+        "M", "iter", False), ("N", "iter", False), ("M", "fiber", False), ("N", "fiber", False)}
 
-          - name: MAC
-            class: compute
 
+def test_get_collected_tensor_info_eager():
+    program, arch, bindings, format_ = parse_yamls(build_extensor_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_collected_tensor_info("A") == {('M0', 'M0', False), ('K1', 'fiber', True), (
+        'K2', 'fiber', True), ('K1', 'fiber', False), ('K0', 'M0', False), ('K0', 'fiber', True)}
+    assert metrics.get_collected_tensor_info("B") == {
+        ('N1', 'fiber', False),
+        ('K0', 'N0', False),
+        ('N1', 'iter', False),
+        ('N0', 'N0', False),
+        ('K1', 'fiber', True),
+        ('K2', 'fiber', True),
+        ('K1', 'fiber', False),
+        ('K1', 'iter', False),
+        ('K0', 'fiber', True)}
+    assert metrics.get_collected_tensor_info(
+        "Z") == {('N0', 'M0', False), ("M0", "M0", False)}
+
+
+def test_get_collected_tensor_info_extra_intersection_test():
+    yaml = """
+    einsum:
+      declaration:
+        Z: [M, N]
+        A: [M]
+        B: [M]
+        C: [N]
+      expressions:
+      - Z[m, n] = A[m] * B[m] * C[n]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: Intersector
+          class: Intersector
+          attributes:
+            type: two-finger
     bindings:
-    - name: Memory
-      bindings:
-      - tensor: Z
-        rank: M
-
-    - name: Buffer
-      bindings:
-      - tensor: Z
-        rank: M
-
-    - name: MAC
-      bindings:
-      - einsum: Z
-        op: add
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Intersector
+        bindings:
+        - rank: M
+    format:
+      Z:
+        default:
+          rank-order: [M, N]
+          M:
+            format: C
+          N:
+            format: C
+            pbits: 64
     """
-    program, hardware, format_ = build_program_hardware(yaml)
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    with pytest.raises(NotImplementedError):
-        Metrics(program, hardware, format_)
+    assert metrics.get_collected_tensor_info("A") == {("M", "fiber", True)}
+    assert metrics.get_collected_tensor_info("B") == {("M", "fiber", True)}
+    assert metrics.get_collected_tensor_info("C") == set()
+    assert metrics.get_collected_tensor_info("Z") == set()
 
 
-def test_get_compute_components():
-    metrics = build_metrics()
-    bindings = Bindings.from_str(build_gamma_yaml())
+def test_get_collected_tensor_info_flattening():
+    program, arch, bindings, format_ = parse_yamls(build_sigma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    intersect = LeaderFollowerComponent(
-        "Intersection", {}, bindings.get("Intersection"))
+    assert metrics.get_collected_tensor_info("A") == {("MK00", "MK00", False)}
+    assert metrics.get_collected_tensor_info("B") == {("K0", "K0", False)}
 
-    assert metrics.get_compute_components() == [intersect]
 
+def test_get_eager_evict_on():
+    program, arch, bindings, format_ = parse_yamls(build_extensor_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-def test_get_format():
-    metrics = build_metrics()
-    spec = {
-        "M": {
-            "format": "U",
-            "rhbits": 32,
-            "pbits": 32},
-        "K": {
-            "format": "C",
-            "cbits": 32,
-            "pbits": 64}}
-    assert metrics.get_format(Tensor("A", ["M", "K"])) == spec
+    assert metrics.get_eager_evict_on("A", "K2") == []
+    assert metrics.get_eager_evict_on("A", "M0") == ["M2"]
+    assert metrics.get_eager_evict_on("B", "N0") == ["K2"]
 
 
-def test_get_merger_components():
-    yaml = build_gamma_yaml()
-    program, hardware, format_ = build_program_hardware(yaml)
+def test_get_eager_evicts():
+    program, arch, bindings, format_ = parse_yamls(build_extensor_yaml())
+    hardware = Hardware(arch, bindings, program)
     metrics = Metrics(program, hardware, format_)
 
-    assert metrics.get_merger_components() == []
+    assert metrics.get_eager_evicts("N2") == []
+    assert metrics.get_eager_evicts("K2") == [("B", "N0")]
+    assert metrics.get_eager_evicts("M2") == [("A", "M0"), ("Z", "M0")]
 
-    bindings = Bindings.from_str(yaml)
-    attrs = {"radix": 64, "next_latency": 1}
-    merger = MergerComponent(
-        "HighRadixMerger",
-        attrs,
-        bindings.get("HighRadixMerger"))
 
-    binding = bindings.get("HighRadixMerger")[0].copy()
-    binding["final_ranks"] = ["M", "N", "K"]
+def test_get_eager_write():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    program.reset()
-    program.add_einsum(1)
+    assert not metrics.get_eager_write()
+
+    program, arch, bindings, format_ = parse_yamls(build_extensor_yaml())
+    hardware = Hardware(arch, bindings, program)
     metrics = Metrics(program, hardware, format_)
 
-    assert metrics.get_merger_components() == [(merger, binding)]
+    assert metrics.get_eager_write()
 
 
-def test_get_merger_components_output():
+def test_get_fiber_trace():
     yaml = """
     einsum:
       declaration:
-        Z: [M, N]
+        Z0: [M]
+        Z1: [M]
+        Z2: [M]
+        Z3: [M]
+        Z4: [M]
+        A: [M, K]
+        B: [M, K]
+        C: [M, K]
+        D: [M, K]
+        E: [M, K]
+        F: [M, K]
+        G: [M, K]
       expressions:
-        - Z[m, n] = a
+        - Z0[m] = a
+        - Z1[m] = A[m, k]
+        - Z2[m] = A[m, k] * B[m, k]
+        - Z3[m] = A[m, k] + B[m, k]
+        - Z4[m] = A[m, k] * B[m, k] * C[m, k] + D[m, k] + E[m, k] * F[m, k] + G[m, k]
+    architecture:
+      accel:
+      - name: empty
+    bindings:
+      Z0:
+      - config: accel
+        prefix: tmp/Z0
+      Z1:
+      - config: accel
+        prefix: tmp/Z1
+      Z2:
+      - config: accel
+        prefix: tmp/Z2
+      Z3:
+      - config: accel
+        prefix: tmp/Z3
+      Z4:
+      - config: accel
+        prefix: tmp/Z4
+    format:
+      Z0:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            pbits: 64
+    """
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    mapping:
-      rank-order:
-        Z: [M, N]
-      loop-order:
-        Z: [N, M]
+    assert metrics.get_fiber_trace("Z0", "M", True) == "iter"
+    assert metrics.get_fiber_trace("Z0", "M", False) == "iter"
 
-    architecture:
-      subtree:
-      - name: System
-        local:
-        - name: Merger
-          class: Merger
+    program.reset()
+    program.add_einsum(1)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_fiber_trace("Z1", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z1", "M", False) == "populate_write_0"
+    assert metrics.get_fiber_trace("A", "M", True) == "populate_1"
+    assert metrics.get_fiber_trace("A", "K", True) == "iter"
+
+    program.reset()
+    program.add_einsum(2)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_fiber_trace("Z2", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z2", "M", False) == "populate_write_0"
+    assert metrics.get_fiber_trace("A", "M", True) == "intersect_2"
+    assert metrics.get_fiber_trace("A", "K", True) == "intersect_0"
+    assert metrics.get_fiber_trace("B", "M", True) == "intersect_3"
+    assert metrics.get_fiber_trace("B", "K", True) == "intersect_1"
 
-        - name: Compute
-          class: compute
+    program.reset()
+    program.add_einsum(3)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
+    assert metrics.get_fiber_trace("Z3", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z3", "M", False) == "populate_write_0"
+    assert metrics.get_fiber_trace("A", "M", True) == "union_2"
+    assert metrics.get_fiber_trace("A", "K", True) == "union_0"
+    assert metrics.get_fiber_trace("B", "M", True) == "union_3"
+    assert metrics.get_fiber_trace("B", "K", True) == "union_1"
+
+    program.reset()
+    program.add_einsum(4)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_fiber_trace("Z4", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z4", "M", False) == "populate_write_0"
+    assert metrics.get_fiber_trace("A", "M", True) == "intersect_4"
+    assert metrics.get_fiber_trace("A", "K", True) == "intersect_2"
+    assert metrics.get_fiber_trace("B", "M", True) == "intersect_6"
+    assert metrics.get_fiber_trace("B", "K", True) == "intersect_4"
+    assert metrics.get_fiber_trace("C", "M", True) == "intersect_7"
+    assert metrics.get_fiber_trace("C", "K", True) == "intersect_5"
+    assert metrics.get_fiber_trace("D", "M", True) == "union_8"
+    assert metrics.get_fiber_trace("D", "K", True) == "union_6"
+    assert metrics.get_fiber_trace("E", "M", True) == "intersect_12"
+    assert metrics.get_fiber_trace("E", "K", True) == "intersect_10"
+    assert metrics.get_fiber_trace("F", "M", True) == "intersect_13"
+    assert metrics.get_fiber_trace("F", "K", True) == "intersect_11"
+    assert metrics.get_fiber_trace("G", "M", True) == "union_11"
+    assert metrics.get_fiber_trace("G", "K", True) == "union_9"
+
+
+def test_get_fiber_trace_coord_math():
+    yaml = """
+    einsum:
+      declaration:
+        A: [K]
+        B: [M]
+        Z0: [M]
+        Z1: [M]
+        Z2: [M]
+      expressions:
+      - Z0[m] = A[2 * m]
+      - Z1[m] = A[2 * m] + B[m]
+      - Z2[m] = A[2 * m] * B[m]
+    architecture:
+      accel:
+      - name: empty
     bindings:
-    - name: Merger
-      bindings:
-      - tensor: Z
-        init_ranks: [N, M]
-        swap_depth: 0
-
-    - name: Compute
-      bindings:
-      - einsum: Z
-        op: add
+      Z0:
+      - config: accel
+        prefix: tmp/Z0
+      Z1:
+      - config: accel
+        prefix: tmp/Z1
+      Z2:
+      - config: accel
+        prefix: tmp/Z2
+    format:
+      Z0:
+        default:
+          rank-order: [M]
+          M:
+            format: C
     """
-    program, hardware, format_ = build_program_hardware(yaml)
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
     metrics = Metrics(program, hardware, format_)
 
-    bindings = Bindings.from_str(yaml)
-    merger = MergerComponent("Merger", {}, bindings.get("Merger"))
-    binding = bindings.get("Merger")[0].copy()
-    binding["final_ranks"] = ["M", "N"]
+    assert metrics.get_fiber_trace("A", "K", True) == "populate_1"
+    assert metrics.get_fiber_trace("Z0", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z0", "M", False) == "populate_write_0"
 
-    assert metrics.get_merger_components() == [(merger, binding)]
+    program.reset()
+    program.add_einsum(1)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_fiber_trace("A", "K", True) == "union_2"
+    assert metrics.get_fiber_trace("B", "M", True) == "union_3"
+    assert metrics.get_fiber_trace("Z1", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z1", "M", False) == "populate_write_0"
+
+    program.reset()
+    program.add_einsum(2)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    assert metrics.get_fiber_trace("A", "K", True) == "intersect_2"
+    assert metrics.get_fiber_trace("B", "M", True) == "intersect_3"
+    assert metrics.get_fiber_trace("Z2", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z2", "M", False) == "populate_write_0"
 
 
-def test_get_merger_components_part_merge():
+def test_get_fiber_trace_leader_follower_multiple_intersectors():
     yaml = """
     einsum:
       declaration:
-        A: [K, M]
         Z: [M]
+        A: [M, K]
+        B: [M, K]
+        C: [M, K]
+        D: [M, K]
       expressions:
-        - Z[m] = A[k, m]
+        - Z[m] = A[m, k] * B[m, k] + C[m, k] * D[m, k]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: LeaderFollower0
+          class: Intersector
+          attributes:
+            type: leader-follower
+        - name: LeaderFollower1
+          class: Intersector
+          attributes:
+            type: leader-follower
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: LeaderFollower0
+        bindings:
+        - rank: M
+          leader: A
+      - component: LeaderFollower1
+        bindings:
+        - rank: M
+          leader: A
+    format:
+      Z0:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            pbits: 64
+    """
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
 
-    mapping:
-      rank-order:
-        A: [M, K]
+    with pytest.raises(NotImplementedError):
+        metrics = Metrics(program, hardware, format_)
 
-      partitioning:
-        Z:
-          M: [uniform_shape(10)]
-      loop-order:
-        Z: [M1, K, M0]
 
+def test_get_fiber_trace_leader_follower_multiple_terms():
+    yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+        A: [M, K]
+        B: [M, K]
+        C: [M, K]
+        D: [M, K]
+      expressions:
+        - Z[m] = A[m, k] * B[m, k] + C[m, k] * D[m, k]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Merger
-          class: Merger
+        - name: LeaderFollower
+          class: Intersector
+          attributes:
+            type: leader-follower
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: LeaderFollower
+        bindings:
+        - rank: M
+          leader: A
+        - rank: K
+          leader: A
+    format:
+      Z0:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            pbits: 64
+    """
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+
+    with pytest.raises(NotImplementedError):
+        metrics = Metrics(program, hardware, format_)
 
-        - name: Compute
-          class: compute
 
+def test_get_fiber_trace_leader_follower():
+    yaml = """
+    einsum:
+      declaration:
+        Z: [M]
+        A: [M, K]
+        B: [M, K]
+        C: [M, K]
+        D: [M, K]
+      expressions:
+        - Z[m] = A[m, k] * B[m, k] * C[m, k] * D[m, k]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: LeaderFollower
+          class: Intersector
+          attributes:
+            type: leader-follower
     bindings:
-    - name: Merger
-      bindings:
-      - tensor: A
-        init_ranks: [M1, M0, K]
-        swap_depth: 1
-
-    - name: Compute
-      bindings:
-      - einsum: Z
-        op: add
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: LeaderFollower
+        bindings:
+        - rank: M
+          leader: C
+        - rank: K
+          leader: B
+    format:
+      Z0:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            pbits: 64
     """
-    program, hardware, format_ = build_program_hardware(yaml)
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
     metrics = Metrics(program, hardware, format_)
 
-    bindings = Bindings.from_str(yaml)
-    merger = MergerComponent("Merger", {}, bindings.get("Merger"))
-    binding = bindings.get("Merger")[0].copy()
-    binding["final_ranks"] = ["M1", "K", "M0"]
+    assert metrics.get_fiber_trace("Z", "M", True) == "populate_read_0"
+    assert metrics.get_fiber_trace("Z", "M", False) == "populate_write_0"
+    assert metrics.get_fiber_trace("C", "M", True) == "intersect_2"
+    assert metrics.get_fiber_trace("A", "M", True) == "intersect_3"
+    assert metrics.get_fiber_trace("B", "M", True) == "intersect_4"
+    assert metrics.get_fiber_trace("D", "M", True) == "intersect_5"
 
-    assert metrics.get_merger_components() == [(merger, binding)]
+    assert metrics.get_fiber_trace("B", "K", True) == "intersect_0"
+    assert metrics.get_fiber_trace("A", "K", True) == "intersect_1"
+    assert metrics.get_fiber_trace("C", "K", True) == "intersect_2"
+    assert metrics.get_fiber_trace("D", "K", True) == "intersect_3"
 
 
-def test_get_on_chip_buffer_not_in_dram():
-    metrics = build_metrics()
+def test_get_fiber_trace_get_payload():
+    program, arch, bindings, format_ = parse_yamls(build_sigma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    with pytest.raises(ValueError) as excinfo:
-        metrics.get_on_chip_buffer(Tensor("T", ["M", "K", "N"]))
-    assert str(excinfo.value) == "Tensor T not stored in DRAM"
+    assert metrics.get_fiber_trace("B", "K0", True) == "get_payload_B"
 
 
-def test_get_on_chip_buffer():
-    metrics = build_metrics()
-    bindings = Bindings.from_str(build_gamma_yaml())
+def test_get_format():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    attrs = {"width": 8, "depth": 3145728}
-    cache = CacheComponent("FiberCache", attrs, bindings.get("FiberCache"))
-    regs = BuffetComponent("RegFile0", {}, bindings.get("RegFile0"))
+    assert metrics.get_format() == format_
 
-    assert metrics.get_on_chip_buffer(Tensor("A", ["M", "K"])) == regs
-    assert metrics.get_on_chip_buffer(Tensor("B", ["K", "N"])) == cache
 
+def test_get_hardware():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-def test_get_on_chip_rank_not_in_dram():
-    metrics = build_metrics()
+    assert metrics.get_hardware() == hardware
 
-    with pytest.raises(ValueError) as excinfo:
-        metrics.get_on_chip_rank(Tensor("T", ["M", "K", "N"]))
-    assert str(excinfo.value) == "Tensor T not stored in DRAM"
 
+def test_get_loop_formats():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-def test_get_on_chip_rank():
-    metrics = build_metrics()
+    assert metrics.get_loop_formats() == {"A": "default", "B": "default"}
 
-    assert metrics.get_on_chip_rank(Tensor("A", ["M", "K"])) == "M"
-    assert metrics.get_on_chip_rank(Tensor("B", ["K", "N"])) == "K"
 
+def test_get_merger_init_ranks_multiple_bindings():
+    yaml = """
+    einsum:
+      declaration:
+        A: [M, N]
+        Z: [M, N]
+      expressions:
+      - Z[m, n] = A[m, n]
+    architecture:
+      merger:
+      - name: mergers
+        local:
+        - name: Merger0
+          class: Merger
+          attributes:
+            inputs: 2
+            comparator_radix: 2
+        - name: Merger1
+          class: Merger
+          attributes:
+            inputs: 2
+            comparator_radix: 2
+    bindings:
+      Z:
+      - config: merger
+        prefix: tmp/Z
+      - component: Merger0
+        bindings:
+        - tensor: A
+          init-ranks: [M, N]
+          final-ranks: [N, M]
+      - component: Merger1
+        bindings:
+        - tensor: A
+          init-ranks: [M, N]
+          final-ranks: [N, M]
+    format:
+      A:
+        default:
+          rank-order: [N, M]
+          N:
+            format: U
+          M:
+            format: U
+            pbits: 32
+    """
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-def test_in_dram():
-    metrics = build_metrics()
+    with pytest.raises(ValueError) as excinfo:
+        metrics.get_merger_init_ranks("A", ["N", "M"])
+    assert str(
+        excinfo.value) == "Multiple bindings for merge of tensor A to final rank order ['N', 'M']"
 
-    assert metrics.in_dram(Tensor("A", ["M", "K"]))
-    assert metrics.in_dram(Tensor("B", ["M", "K"]))
-    assert not metrics.in_dram(Tensor("T", ["M", "K", "N"]))
 
+def test_get_merger_init_ranks():
+    program, arch, bindings, format_ = parse_yamls(build_gamma_yaml())
+    program.reset()
+    program.add_einsum(1)
 
-def test_on_chip_stationary():
-    metrics = build_metrics()
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    assert metrics.on_chip_stationary(Tensor("A", ["M", "K"]))
-    assert not metrics.on_chip_stationary(Tensor("B", ["K", "N"]))
+    assert metrics.get_merger_init_ranks(
+        "T", [
+            "M", "N", "K"]) == [
+        "M", "K", "N"]
+    assert metrics.get_merger_init_ranks(
+        "T", ["M1", "M0", "K1", "N", "K0"]) is None
+    assert metrics.get_merger_init_ranks("Z", ["M", "N"]) is None
 
 
-def test_on_chip_stationary_root_buffered():
+def test_get_source_memory_not_memory():
     yaml = """
     einsum:
       declaration:
         Z: [M]
       expressions:
         - Z[m] = a
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: LeaderFollower
+          class: Intersector
+          attributes:
+            type: leader-follower
+        - name: DRAM
+          class: DRAM
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: DRAM
+        bindings:
+        - tensor: Z
+          rank: M
+          type: payload
+          format: default
+    format:
+      Z:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            pbits: 64
+    """
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
+
+    with pytest.raises(ValueError) as excinfo:
+        metrics.get_source_memory("LeaderFollower", "Z", "M", "payload")
+    assert str(
+        excinfo.value) == "Destination component LeaderFollower not a memory"
+
+
+def test_get_source_memory():
+    yaml = """
+    einsum:
+      declaration:
+        A: [K, M]
+        B: [K, M]
+        C: [K]
+        Z: [M]
+      expressions:
+      - Z[m] = A[k, m] * B[k, m] * C[k]
 
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Memory
+        - name: DRAM
           class: DRAM
+          attributes:
+            bandwidth: 512
 
         subtree:
-        - name: PE
+        - name: level1
           local:
-          - name: Buffer
-            class: Buffet
-
-          - name: MAC
-            class: compute
+          - name: L2Cache
+            class: Cache
+            attributes:
+              width: 64
+              depth: 1024
+              bandwidth: 2048
+
+          subtree:
+          - name: level2
+            local:
+            - name: L1Cache
+              class: Cache
+              attributes:
+                width: 64
+                depth: 128
 
     bindings:
-    - name: Memory
-      bindings:
-      - tensor: Z
-        rank: root
-
-    - name: Buffer
-      bindings:
-      - tensor: Z
-        rank: root
-
-    - name: MAC
-      bindings:
-      - einsum: Z
-        op: add
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: DRAM
+        bindings:
+        - tensor: A
+          rank: M
+          type: payload
+          format: default
+        - tensor: A
+          rank: K
+          type: coord
+          format: default
+        - tensor: A
+          rank: K
+          type: payload
+          format: default
+        - tensor: Z
+          rank: M
+          type: elem
+          format: default
+      - component: L2Cache
+        bindings:
+        - tensor: A
+          rank: M
+          type: payload
+          format: default
+        - tensor: A
+          rank: K
+          type: coord
+          format: default
+        - tensor: A
+          rank: K
+          type: payload
+          format: default
+        - tensor: Z
+          rank: M
+          type: elem
+          format: default
+      - component: L1Cache
+        bindings:
+        - tensor: A
+          rank: K
+          type: coord
+          format: default
+        - tensor: A
+          rank: K
+          type: payload
+          format: default
+        - tensor: B
+          rank: K
+          type: payload
+          format: default
+        - tensor: Z
+          rank: M
+          type: elem
+          format: default
+
+    format:
+      A:
+        default:
+          rank-order: [M, K]
+          M:
+            format: U
+            pbits: 32
+          K:
+            format: C
+            cbits: 32
+            pbits: 64
+      B:
+        default:
+          rank-order: [M, K]
+          M:
+            format: U
+          K:
+            format: U
+            pbits: 64
+      Z:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            cbits: 32
+            pbits: 64
     """
-    metrics = Metrics(*build_program_hardware(yaml))
+    program, arch, bindings, format_ = parse_yamls(yaml)
+    hardware = Hardware(arch, bindings, program)
+    metrics = Metrics(program, hardware, format_)
 
-    assert metrics.on_chip_stationary(Tensor("Z", ["M"]))
+    assert metrics.get_source_memory("L2Cache", "C", "K", "payload") is None
+    assert metrics.get_source_memory("L1Cache", "B", "M", "payload") is None
+    assert metrics.get_source_memory("L2Cache", "B", "K", "payload") is None
+    assert metrics.get_source_memory("L1Cache", "B", "K", "payload") is None
+    assert metrics.get_source_memory(
+        "L2Cache", "A", "M", "payload") == hardware.get_component("DRAM")
+    assert metrics.get_source_memory(
+        "L1Cache", "Z", "M", "elem") == hardware.get_component("L2Cache")
diff --git a/tests/ir/test_part_node.py b/tests/ir/test_part_nodes.py
similarity index 100%
rename from tests/ir/test_part_node.py
rename to tests/ir/test_part_nodes.py
diff --git a/tests/ir/test_partitioning.py b/tests/ir/test_partitioning.py
index 02619f7..3e35b11 100644
--- a/tests/ir/test_partitioning.py
+++ b/tests/ir/test_partitioning.py
@@ -358,12 +358,10 @@ def test_get_final_rank_id():
     """
     partitioning = build_partitioning(all_parts)
 
-    assert partitioning.get_final_rank_id(Tensor("B", ["K", "N"]), "N") == "N2"
-    assert partitioning.get_final_rank_id(
-        Tensor("B", ["K", "N"]), "N2") == "N2"
-    assert partitioning.get_final_rank_id(
-        Tensor("A", ["K", "M"]), "M1I") == "M1"
-    assert partitioning.get_final_rank_id(Tensor("B", ["K", "N"]), "K") == "K"
+    assert partitioning.get_final_rank_id(["K", "N"], "N") == "N2"
+    assert partitioning.get_final_rank_id(["K", "N"], "N2") == "N2"
+    assert partitioning.get_final_rank_id(["K", "M"], "M1I") == "M1"
+    assert partitioning.get_final_rank_id(["K", "N"], "K") == "K"
 
 
 def test_final_rank_id_flattening():
@@ -374,23 +372,15 @@ def test_final_rank_id_flattening():
     """
     partitioning = build_partitioning(all_parts)
 
-    assert partitioning.get_final_rank_id(
-        Tensor("A", ["K", "M"]), "MK00") == "MK00"
-    assert partitioning.get_final_rank_id(
-        Tensor("A", ["K", "M"]), "MK01") == "MK01"
-    assert partitioning.get_final_rank_id(
-        Tensor("A", ["K", "M"]), "MK0") == "MK01"
-    assert partitioning.get_final_rank_id(
-        Tensor("A", ["K", "M"]), "M") == "MK01"
-    assert partitioning.get_final_rank_id(
-        Tensor("Z", ["M", "N"]), "M") == "MK00"
-    assert partitioning.get_final_rank_id(
-        Tensor("A", ["K", "M"]), "K0") == "MK01"
-    assert partitioning.get_final_rank_id(
-        Tensor("B", ["K", "N"]), "K0") == "MK00"
-    assert partitioning.get_final_rank_id(
-        Tensor("B", ["K", "N"]), "K1") == "K1"
-    assert partitioning.get_final_rank_id(Tensor("B", ["K", "N"]), "N") == "N"
+    assert partitioning.get_final_rank_id(["K", "M"], "MK00") == "MK00"
+    assert partitioning.get_final_rank_id(["K", "M"], "MK01") == "MK01"
+    assert partitioning.get_final_rank_id(["K", "M"], "MK0") == "MK01"
+    assert partitioning.get_final_rank_id(["K", "M"], "M") == "MK01"
+    assert partitioning.get_final_rank_id(["M", "N"], "M") == "MK00"
+    assert partitioning.get_final_rank_id(["K", "M"], "K0") == "MK01"
+    assert partitioning.get_final_rank_id(["K", "N"], "K0") == "MK00"
+    assert partitioning.get_final_rank_id(["K", "N"], "K1") == "K1"
+    assert partitioning.get_final_rank_id(["K", "N"], "N") == "N"
 
 
 def test_get_final_rank_id_conv():
@@ -400,10 +390,10 @@ def test_get_final_rank_id_conv():
     """
     partitioning = build_partitioning_conv(all_parts)
 
-    assert partitioning.get_final_rank_id(Tensor("I", ["W"]), "W") == "W2"
-    assert partitioning.get_final_rank_id(Tensor("I", ["W"]), "W1I") == "W1"
-    assert partitioning.get_final_rank_id(Tensor("I", ["W"]), "W0") == "W0"
-    assert partitioning.get_final_rank_id(Tensor("F", ["S"]), "S") == "S"
+    assert partitioning.get_final_rank_id(["W"], "W") == "W2"
+    assert partitioning.get_final_rank_id(["W"], "W1I") == "W1"
+    assert partitioning.get_final_rank_id(["W"], "W0") == "W0"
+    assert partitioning.get_final_rank_id(["S"], "S") == "S"
 
 
 def test_get_intermediates():
diff --git a/tests/ir/test_program.py b/tests/ir/test_program.py
index 125833d..b66b8d9 100644
--- a/tests/ir/test_program.py
+++ b/tests/ir/test_program.py
@@ -30,6 +30,22 @@ def create_default():
     return Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
 
 
+def create_cascade():
+    yaml = """
+    einsum:
+        declaration:
+            Z: [M, N]
+            A: [K, M]
+            B: [K, N]
+            T: [M, N]
+            C: [M, N]
+        expressions:
+            - T[m, n] = A[k, m] * B[k, n]
+            - Z[m, n] = T[m, n] + C[m, n]
+    """
+    return Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+
+
 def create_loop_ordered():
     yaml = """
     einsum:
@@ -215,6 +231,11 @@ def test_apply_partition_swizzling():
     assert A.get_ranks() == ["J", "K1", "N", "M", "K0"]
 
 
+def test_get_all_einsums():
+    program = create_cascade()
+    assert program.get_all_einsums() == ["T", "Z"]
+
+
 def test_get_equation_unconfigured():
     program = create_default()
 
diff --git a/tests/ir/test_spacetime.py b/tests/ir/test_spacetime.py
index 25e559c..ae99d9b 100644
--- a/tests/ir/test_spacetime.py
+++ b/tests/ir/test_spacetime.py
@@ -1,10 +1,34 @@
 import pytest
 from sympy import symbols
 
+from teaal.ir.coord_math import CoordMath
 from teaal.ir.partitioning import Partitioning
 from teaal.ir.spacetime import SpaceTime
+from teaal.ir.tensor import Tensor
 from teaal.parse.mapping import Mapping
 from teaal.parse.spacetime import SpaceTimeParser
+from tests.utils.parse_tree import *
+
+
+def parse_mapping(parts, spacetime):
+    yaml = """
+    mapping:
+        partitioning:
+            Z:""" + parts + """
+        spacetime:
+            Z:""" + spacetime
+    return Mapping.from_str(yaml)
+
+
+def build_partitioning(mapping):
+    dict_ = mapping.get_partitioning()["Z"]
+
+    coord_math = CoordMath()
+    tensor = Tensor("T", ["J", "M", "N", "K"])
+    ranks = make_ranks(["j", "m", "n", "k"])
+    coord_math.add(tensor, ranks)
+
+    return Partitioning(dict_, ["J", "M", "N", "K"], coord_math)
 
 
 def create_yaml(space, time, opt=None):
@@ -24,8 +48,8 @@ def create_yaml(space, time, opt=None):
 
 
 def create_eqn_exprs():
-    k, m, n = symbols("k m n")
-    return {k: k, m: m, n: n}
+    j, k, m, n = symbols("j k m n")
+    return {j: j, k: k, m: m, n: n}
 
 
 def test_bad_space():
@@ -148,6 +172,27 @@ def test_get_style():
     assert spacetime.get_style("N") == "coord"
 
 
+def test_get_style_flattening():
+    parts = """
+                (M, K): [flatten()]
+    """
+    sptm = """
+                space: [MK.pos]
+                time: [J, N]
+    """
+    mapping = parse_mapping(parts, sptm)
+    partitioning = build_partitioning(mapping)
+    eqn_exprs = create_eqn_exprs()
+    spacetime = SpaceTime(
+        mapping.get_spacetime()["Z"],
+        partitioning,
+        eqn_exprs)
+
+    assert spacetime.get_style("MK") == "pos"
+    assert spacetime.get_style("M") == "pos"
+    assert spacetime.get_style("K") == "pos"
+
+
 def test_get_time():
     yaml = create_yaml(["M"], ["K", "N"])
     eqn_exprs = create_eqn_exprs()
diff --git a/tests/ir/test_tensor.py b/tests/ir/test_tensor.py
index 7f5568c..9b9aac1 100644
--- a/tests/ir/test_tensor.py
+++ b/tests/ir/test_tensor.py
@@ -91,6 +91,11 @@ def test_peek_empty():
     assert tensor.peek() is None
 
 
+def test_peek_clean():
+    tensor = Tensor("A", ["I", "J"])
+    assert tensor.peek_clean() == "I"
+
+
 def test_peek_rest():
     tensor = Tensor("A", ["K1", "M", "K0"])
     assert tensor.peek_rest() == ["K1", "M", "K0"]
diff --git a/tests/parse/test_arch.py b/tests/parse/test_arch.py
index 2707296..59f5cee 100644
--- a/tests/parse/test_arch.py
+++ b/tests/parse/test_arch.py
@@ -31,7 +31,7 @@ def test_bad_architecture():
 def test_unnamed_subtree():
     yaml = """
     architecture:
-      subtree:
+      Config0:
       - attributes:
           foo: 1
     """
@@ -44,7 +44,7 @@ def test_unnamed_subtree():
 def test_unnamed_local():
     yaml = """
     architecture:
-      subtree:
+      Config0:
       - name: System
         local:
         - class: DRAM
@@ -58,7 +58,7 @@ def test_unnamed_local():
 def test_unclassed_local():
     yaml = """
     architecture:
-      subtree:
+      Config0:
       - name: System
         local:
         - name: Memory
@@ -105,14 +105,19 @@ def test_unspecified():
 
 def test_all_spec():
     regs = build_local("Registers", "Buffet", {})
-    mac = build_local("MAC", "compute", {})
-    subtree = build_subtree("PE", 8, {}, [regs, mac], [])
+    mac = build_local("MAC", "compute", {"type": "mul"})
+    subtree0 = build_subtree("PE", 8, {}, [regs, mac], [])
+
+    mac0 = build_local("MAC0", "compute", {"type": "mul"})
+    mac1 = build_local("MAC1", "compute", {"type": "add"})
+    subtree1 = build_subtree("PE", 8, {}, [regs, mac0, mac1], [])
 
     mem = build_local("Memory", "DRAM", {"datawidth": 8, "bandwidth": 128})
     attrs = {"clock_frequency": 10 ** 9}
-    tree = build_subtree("System", 1, attrs, [mem], [subtree])
+    tree0 = build_subtree("System", 1, attrs, [mem], [subtree0])
+    tree1 = build_subtree("System", 1, attrs, [mem], [subtree1])
 
     arch = Architecture.from_file("tests/integration/test_arch.yaml")
-    spec = {"architecture": {"subtree": [tree]}}
+    spec = {"architecture": {"Config0": [tree0], "Config1": [tree1]}}
 
     assert arch.get_spec() == spec
diff --git a/tests/parse/test_bindings.py b/tests/parse/test_bindings.py
index dfd650c..0341bfe 100644
--- a/tests/parse/test_bindings.py
+++ b/tests/parse/test_bindings.py
@@ -1,9 +1,11 @@
+import pytest
+
 from teaal.parse.bindings import Bindings
 
 
 def test_empty():
     bindings = Bindings.from_str("")
-    assert bindings.get("BAD") == []
+    assert bindings.get_component("BAD") == {}
 
 
 def test_no_bindings():
@@ -12,16 +14,51 @@ def test_no_bindings():
       - bar
       - baz
     """
-    assert Bindings.from_str(yaml).get("BAD") == []
+    assert Bindings.from_str(yaml).get_component("BAD") == {}
+
+
+def test_no_config():
+    yaml = """
+    bindings:
+      Z:
+      - component: foo
+        bindings:
+        - tensor: bar
+    """
+    with pytest.raises(ValueError) as excinfo:
+        Bindings.from_str(yaml)
+    assert str(
+        excinfo.value) == "Accelerator config and prefix missing for Einsum Z"
 
 
 def test_defined():
     bindings = Bindings.from_file("tests/integration/test_bindings.yaml")
-    mem = [{"tensor": "A", "rank": "root"}, {"tensor": "Z", "rank": "root"}]
-    regs = [{"tensor": "A", "rank": "M"}, {"tensor": "Z", "rank": "M"}]
-    mac = [{"einsum": "Z", "op": "add"}]
-
-    assert bindings.get("Memory") == mem
-    assert bindings.get("Registers") == regs
-    assert bindings.get("MAC") == mac
-    assert bindings.get("BAD") == []
+    mem = {"Z": [{"tensor": "A", "rank": "M", "format": "A_default", "type": "payload"}, {
+        "tensor": "Z", "rank": "M", "type": "payload", "format": "Z_default"}]}
+    regs = {"Z": [{"tensor": "A",
+                   "rank": "M",
+                   "format": "A_default",
+                   "type": "payload",
+                   "style": "eager",
+                   "evict-on": "M"},
+                  {"tensor": "Z",
+                   "rank": "M",
+                   "format": "Z_default",
+                   "rank": "M",
+                   "type": "payload",
+                   "evict-on": "root"}]}
+    mac = {"Z": [{"op": "add"}]}
+
+    assert bindings.get_config("Z") == "Config0"
+    assert bindings.get_prefix("Z") == "tmp/Z"
+
+    assert bindings.get_component("Memory") == mem
+    assert bindings.get_component("Registers") == regs
+    assert bindings.get_component("MAC") == mac
+    assert bindings.get_component("BAD") == {}
+
+    assert bindings.get_bindings() == {
+        "Z": {
+            "Memory": mem["Z"],
+            "Registers": regs["Z"],
+            "MAC": mac["Z"]}}
diff --git a/tests/parse/test_format.py b/tests/parse/test_format.py
index 80cf3ba..1e83db2 100644
--- a/tests/parse/test_format.py
+++ b/tests/parse/test_format.py
@@ -7,14 +7,24 @@ def build_format():
     yaml = """
     format:
       A:
-        M:
-          format: U
-          rhbits: 32
-          pbits: 32
-        K:
-          format: C
-          cbits: 32
-          pbits: 64
+        init:
+          rank-order: [M, K]
+          M:
+            format: U
+            rhbits: 32
+            pbits: 32
+          K:
+            format: C
+            cbits: 32
+            pbits: 64
+        loop:
+          rank-order: [K, M]
+          K:
+            format: C
+          M:
+            format: C
+            cbits: 32
+            pbits: 64
     """
     return Format.from_str(yaml)
 
@@ -27,25 +37,42 @@ def test_no_format():
     Format.from_file("tests/integration/test_arch.yaml")
 
 
-def test_missing_tensor():
-    format_ = build_format()
+def test_missing_rank_order():
+    yaml = """
+    format:
+      A:
+        BAD:
+          M:
+            format: C
+            pbits: 32
+    """
 
     with pytest.raises(ValueError) as excinfo:
-        format_.get_spec("B")
+        Format.from_str(yaml)
     assert str(
-        excinfo.value) == "Format unspecified for tensor B"
+        excinfo.value) == "Rank order not specified for tensor A in format BAD"
 
 
 def test_format():
     format_ = build_format()
     spec = {
-        "M": {
-            "format": "U",
-            "rhbits": 32,
-            "pbits": 32},
-        "K": {
-            "format": "C",
-            "cbits": 32,
-            "pbits": 64}}
+        "init": {
+            "rank-order": ["M", "K"],
+            "M": {
+                "format": "U",
+                "rhbits": 32,
+                "pbits": 32},
+            "K": {
+                "format": "C",
+                "cbits": 32,
+                "pbits": 64}},
+        "loop": {
+            "rank-order": ["K", "M"],
+            "K": {"format": "C"},
+            "M": {
+                "format": "C",
+                "cbits": 32,
+                "pbits": 64}}}
 
     assert format_.get_spec("A") == spec
+    assert format_.get_spec("B") == {}
diff --git a/tests/trans/test_collector.py b/tests/trans/test_collector.py
index 43d0cc4..42608c2 100644
--- a/tests/trans/test_collector.py
+++ b/tests/trans/test_collector.py
@@ -1,5 +1,6 @@
 import pytest
 
+from teaal.ir.fusion import Fusion
 from teaal.ir.hardware import Hardware
 from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
@@ -7,11 +8,31 @@
 from teaal.trans.collector import Collector
 
 
+def build_extensor_yaml():
+    with open("tests/integration/extensor.yaml", "r") as f:
+        return f.read()
+
+
+def build_extensor_energy_yaml():
+    with open("tests/integration/extensor-energy.yaml", "r") as f:
+        return f.read()
+
+
 def build_gamma_yaml():
     with open("tests/integration/gamma.yaml", "r") as f:
         return f.read()
 
 
+def build_outerspace_yaml():
+    with open("tests/integration/outerspace.yaml", "r") as f:
+        return f.read()
+
+
+def build_sigma_yaml():
+    with open("tests/integration/sigma.yaml", "r") as f:
+        return f.read()
+
+
 def build_collector(yaml, i):
     einsum = Einsum.from_str(yaml)
     mapping = Mapping.from_str(yaml)
@@ -19,281 +40,1049 @@ def build_collector(yaml, i):
 
     arch = Architecture.from_str(yaml)
     bindings = Bindings.from_str(yaml)
-    hardware = Hardware(arch, bindings)
+    hardware = Hardware(arch, bindings, program)
 
     format_ = Format.from_str(yaml)
 
     program.add_einsum(i)
     metrics = Metrics(program, hardware, format_)
-    return Collector(program, metrics)
+    fusion = Fusion(hardware)
+    fusion.add_einsum(program)
+    return Collector(program, metrics, fusion)
 
 
-def test_dump():
-    yaml = build_gamma_yaml()
-    collector = build_collector(yaml, 0)
-    hifiber = "metrics = {}\n" + \
-        "metrics[\"T\"] = {}\n" + \
-        "metrics[\"T\"][\"T footprint\"] = 0\n" + \
-        "metrics[\"T\"][\"T traffic\"] = 0\n" + \
-        "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"T\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \
-        "metrics[\"T\"][\"A traffic\"] = metrics[\"T\"][\"A footprint\"]\n" + \
-        "B_KN_format = Format(B_KN, {\"K\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"T\"][\"B footprint\"] = B_KN_format.getTensor()\n" + \
-        "metrics[\"T\"][\"B traffic\"] = Traffic.cacheTraffic(B_KN, \"K\", B_KN_format, 25165824) + B_KN_format.getRank(\"K\")\n" + \
-        "metrics[\"T\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 0)"
+def add_einsum(collector, i):
+    program = collector.program
+    hardware = collector.metrics.hardware
+    format_ = collector.metrics.format
+    fusion = collector.fusion
 
-    assert collector.dump().gen(0) == hifiber
+    program.reset()
+    program.add_einsum(i)
+    metrics = Metrics(program, hardware, format_)
+    fusion.add_einsum(program)
+    return Collector(program, metrics, fusion)
 
-    collector = build_collector(yaml, 1)
-    hifiber = "metrics[\"Z\"] = {}\n" + \
-        "Z_MN_format = Format(Z_MN, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"Z\"][\"Z footprint\"] = Z_MN_format.getTensor()\n" + \
-        "metrics[\"Z\"][\"Z traffic\"] = metrics[\"Z\"][\"Z footprint\"]\n" + \
-        "metrics[\"Z\"][\"T footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"T traffic\"] = 0\n" + \
-        "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"Z\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \
-        "metrics[\"Z\"][\"A traffic\"] = metrics[\"Z\"][\"A footprint\"]\n" + \
-        "metrics[\"Z\"][\"mul\"] = Compute.opCount(Metrics.dump(), \"mul\")\n" + \
-        "metrics[\"Z\"][\"add\"] = Compute.opCount(Metrics.dump(), \"add\")\n" + \
-        "metrics[\"Z\"][\"T_MKN merge ops\"] = Compute.swapCount(T_MKN, 1, 64, 1)"
 
-    assert collector.dump().gen(0) == hifiber
+def check_hifiber_lines(gen_lines, corr_lines):
+    gen_set = set(gen_lines)
+    corr_set = set(corr_lines)
 
+    print("In generated")
+    for line in gen_lines:
+        if line not in corr_set:
+            print(line)
 
-def test_dump_buffet():
+    print("In corr")
+    for line in corr_lines:
+        if line not in gen_set:
+            print(line)
+
+    assert gen_set == corr_set
+
+
+def test_create_component_unknown():
     yaml = """
     einsum:
       declaration:
-        A: [M]
-        Z: [M, N]
+        Z: []
+        A: [K]
+        B: [K]
       expressions:
-        - Z[m, n] = A[m]
-
+      - Z[] = A[k] * B[k]
     mapping:
-      loop-order:
-        Z: [N, M]
-
+      spacetime:
+        Z:
+          space: []
+          time: [K]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Memory
+        - name: DRAM
           class: DRAM
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+    # TODO: Allow the format to be empty
+    format:
+      Z:
+        default:
+          rank-order: []
+    """
+    collector = build_collector(yaml, 0)
 
-        subtree:
-        - name: PE
+    dram = collector.metrics.get_hardware().get_component("DRAM")
+    with pytest.raises(ValueError) as excinfo:
+        collector.create_component(dram, "K")
+    assert str(
+        excinfo.value) == "Unable to create consumable metrics component for DRAM of type DRAMComponent"
 
-          local:
-          - name: RegFile
-            class: Buffet
 
-          - name: Compute
-            class: Compute
+def test_create_component():
+    yaml = """
+    einsum:
+      declaration:
+        Z: []
+        A: [I, J, K]
+        B: [I, J, K]
+      expressions:
+      - Z[] = A[i, j, k] * B[i, j, k]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [I, J, K]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: LF
+          class: Intersector
+          attributes:
+            type: leader-follower
+        - name: SA
+          class: Intersector
+          attributes:
+            type: skip-ahead
+        - name: TF
+          class: Intersector
+          attributes:
+            type: two-finger
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: LF
+        bindings:
+        - rank: I
+          leader: A
+      - component: SA
+        bindings:
+        - rank: J
+      - component: TF
+        bindings:
+        - rank: K
+    # TODO: Allow the format to be empty
+    format:
+      Z:
+        default:
+          rank-order: []
+    """
+    collector = build_collector(yaml, 0)
+    get_comp = collector.metrics.get_hardware().get_component
+
+    assert collector.create_component(get_comp("LF"), "I").gen(
+        0) == "LF_I = LeaderFollowerIntersector()"
+    assert collector.create_component(get_comp("SA"), "J").gen(
+        0) == "SA_J = SkipAheadIntersector()"
+    assert collector.create_component(get_comp("TF"), "K").gen(
+        0) == "TF_K = TwoFingerIntersector()"
 
+
+def test_consume_traces_unknown():
+    yaml = """
+    einsum:
+      declaration:
+        Z: []
+        A: [K]
+        B: [K]
+      expressions:
+      - Z[] = A[k] * B[k]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [K]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: DRAM
+          class: DRAM
     bindings:
-    - name: Memory
-      bindings:
-      - tensor: A
-        rank: root
+      Z:
+      - config: accel
+        prefix: tmp/Z
+    # TODO: Allow the format to be empty
+    format:
+      Z:
+        default:
+          rank-order: []
+    """
+    collector = build_collector(yaml, 0)
 
-    - name: RegFile
-      bindings:
-      - tensor: A
-        rank: M
+    with pytest.raises(ValueError) as excinfo:
+        collector.consume_traces("DRAM", "K")
+    assert str(
+        excinfo.value) == "Unable to consume traces for component DRAM of type DRAMComponent"
 
-    - name: Compute
-      bindings:
-      - einsum: Z
-        op: add
 
+def test_consume_traces():
+    yaml = """
+    einsum:
+      declaration:
+        Z: []
+        A: [I, J, K]
+        B: [I, J, K]
+      expressions:
+      - Z[] = A[i, j, k] * B[i, j, k]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [I, J, K]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: LF
+          class: Intersector
+          attributes:
+            type: leader-follower
+        - name: SA
+          class: Intersector
+          attributes:
+            type: skip-ahead
+        - name: TF
+          class: Intersector
+          attributes:
+            type: two-finger
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: LF
+        bindings:
+        - rank: I
+          leader: A
+      - component: SA
+        bindings:
+        - rank: J
+      - component: TF
+        bindings:
+        - rank: K
+    # TODO: Allow the format to be empty
     format:
-      A:
-        M:
-          format: C
-          cbits: 32
-          pbits: 64
+      Z:
+        default:
+          rank-order: []
     """
     collector = build_collector(yaml, 0)
+
+    assert collector.consume_traces("LF", "I").gen(
+        0) == "LF_I.addTraces(Metrics.consumeTrace(\"I\", \"intersect_0\"))"
+    assert collector.consume_traces("SA", "J").gen(
+        0) == "SA_J.addTraces(Metrics.consumeTrace(\"J\", \"intersect_0\"), Metrics.consumeTrace(\"J\", \"intersect_1\"))"
+    assert collector.consume_traces("TF", "K").gen(
+        0) == "TF_K.addTraces(Metrics.consumeTrace(\"K\", \"intersect_0\"), Metrics.consumeTrace(\"K\", \"intersect_1\"))"
+
+
+def test_dump_gamma_T():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+
+    hifiber = "metrics = {}\n" + \
+        "metrics[\"T\"] = {}\n" + \
+        "formats = {\"A\": Format(A_MK, {\"rank-order\": [\"M\", \"K\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"B\": Format(B_KN, {\"rank-order\": [\"K\", \"N\"], \"K\": {\"format\": \"U\", \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \
+        "bindings = [{\"tensor\": \"B\", \"rank\": \"K\", \"type\": \"payload\", \"format\": \"default\"}, {\"tensor\": \"B\", \"rank\": \"N\", \"type\": \"coord\", \"format\": \"default\"}, {\"tensor\": \"B\", \"rank\": \"N\", \"type\": \"payload\", \"format\": \"default\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_T-K-intersect_3.csv\", \"tmp/gamma_T-K-iter.csv\", \"tmp/gamma_T-K-intersect_3_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_T-N-populate_1.csv\", \"tmp/gamma_T-N-iter.csv\", \"tmp/gamma_T-N-populate_1_payload.csv\")\n" + \
+        "traces = {(\"B\", \"K\", \"payload\", \"read\"): \"tmp/gamma_T-K-intersect_3_payload.csv\", (\"B\", \"N\", \"coord\", \"read\"): \"tmp/gamma_T-N-populate_1.csv\", (\"B\", \"N\", \"payload\", \"read\"): \"tmp/gamma_T-N-populate_1_payload.csv\"}\n" + \
+        "traffic = Traffic.cacheTraffic(bindings, formats, traces, 25165824, 64)\n" + \
+        "metrics[\"T\"][\"MainMemory\"] = {}\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"B\"] = {}\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"B\"][\"read\"] = 0\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_T-M-populate_1.csv\", \"tmp/gamma_T-M-iter.csv\", \"tmp/gamma_T-M-populate_1_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_T-K-intersect_2.csv\", \"tmp/gamma_T-K-iter.csv\", \"tmp/gamma_T-K-intersect_2_payload.csv\")\n" + \
+        "traces = {(\"A\", \"M\", \"payload\", \"read\"): \"tmp/gamma_T-M-populate_1_payload.csv\", (\"A\", \"K\", \"coord\", \"read\"): \"tmp/gamma_T-K-intersect_2.csv\", (\"A\", \"K\", \"payload\", \"read\"): \"tmp/gamma_T-K-intersect_2_payload.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, float(\"inf\"), 64)\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"A\"] = {}\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"A\"][\"read\"] = 0\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \
+        "metrics[\"T\"][\"MainMemory\"][\"time\"] = (metrics[\"T\"][\"MainMemory\"][\"A\"][\"read\"] + metrics[\"T\"][\"MainMemory\"][\"B\"][\"read\"]) / 1099511627776\n" + \
+        "metrics[\"T\"][\"Intersect\"] = 0\n" + \
+        "metrics[\"T\"][\"Intersect\"] += Intersect_K.getNumIntersects()\n" + \
+        "metrics[\"T\"][\"Intersect\"][\"time\"] = metrics[\"T\"][\"Intersect\"] / 32000000000"
+
+    assert collector.dump().gen(0) == hifiber
+
+
+def test_dump_gamma_Z():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+    collector.dump()
+
+    collector = add_einsum(collector, 1)
+
+    hifiber = "metrics[\"Z\"] = {}\n" + \
+        "formats = {\"Z\": Format(Z_MN, {\"rank-order\": [\"M\", \"N\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"A\": Format(A_MK, {\"rank-order\": [\"M\", \"K\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"K\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_Z-M-intersect_3.csv\", \"tmp/gamma_Z-M-iter.csv\", \"tmp/gamma_Z-M-intersect_3_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_Z-K-intersect_1.csv\", \"tmp/gamma_Z-K-iter.csv\", \"tmp/gamma_Z-K-intersect_1_payload.csv\")\n" + \
+        "traces = {(\"A\", \"M\", \"payload\", \"read\"): \"tmp/gamma_Z-M-intersect_3_payload.csv\", (\"A\", \"K\", \"coord\", \"read\"): \"tmp/gamma_Z-K-intersect_1.csv\", (\"A\", \"K\", \"payload\", \"read\"): \"tmp/gamma_Z-K-intersect_1_payload.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, float(\"inf\"), 64)\n" + \
+        "bindings = [{\"tensor\": \"Z\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_Z-M-populate_read_0.csv\", \"tmp/gamma_Z-M-iter.csv\", \"tmp/gamma_Z-M-populate_read_0_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_Z-M-populate_write_0.csv\", \"tmp/gamma_Z-M-iter.csv\", \"tmp/gamma_Z-M-populate_write_0_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_Z-N-populate_read_0.csv\", \"tmp/gamma_Z-N-iter.csv\", \"tmp/gamma_Z-N-populate_read_0_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/gamma_Z-N-populate_write_0.csv\", \"tmp/gamma_Z-N-iter.csv\", \"tmp/gamma_Z-N-populate_write_0_payload.csv\")\n" + \
+        "traces = {(\"Z\", \"M\", \"payload\", \"read\"): \"tmp/gamma_Z-M-populate_read_0_payload.csv\", (\"Z\", \"M\", \"payload\", \"write\"): \"tmp/gamma_Z-M-populate_write_0_payload.csv\", (\"Z\", \"N\", \"coord\", \"read\"): \"tmp/gamma_Z-N-populate_read_0.csv\", (\"Z\", \"N\", \"coord\", \"write\"): \"tmp/gamma_Z-N-populate_write_0.csv\", (\"Z\", \"N\", \"payload\", \"read\"): \"tmp/gamma_Z-N-populate_read_0_payload.csv\", (\"Z\", \"N\", \"payload\", \"write\"): \"tmp/gamma_Z-N-populate_write_0_payload.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, float(\"inf\"), 64)\n" + \
+        "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 1099511627776\n" + \
+        "metrics[\"Z\"][\"HighRadixMerger\"] = {}\n" + \
+        "metrics[\"Z\"][\"HighRadixMerger\"][\"T_MKN\"] = Compute.numSwaps(T_MKN, 1, 64, 1)\n" + \
+        "metrics[\"Z\"][\"HighRadixMerger\"][\"time\"] = metrics[\"Z\"][\"HighRadixMerger\"][T_MKN] / 32000000000\n" + \
+        "metrics[\"Z\"][\"FPMul\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPMul\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \
+        "metrics[\"Z\"][\"FPMul\"][\"time\"] = metrics[\"Z\"][\"FPMul\"][\"mul\"] / 32000000000\n" + \
+        "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 32000000000\n" + \
+        "metrics[\"blocks\"] = [[\"T\", \"Z\"]]\n" + \
+        "metrics[\"time\"] = max(metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"Z\"][\"FPMul\"][\"time\"], metrics[\"Z\"][\"HighRadixMerger\"][\"time\"], metrics[\"T\"][\"Intersect\"][\"time\"], metrics[\"T\"][\"MainMemory\"][\"time\"] + metrics[\"Z\"][\"MainMemory\"][\"time\"])"
+
+    # print(collector.dump().gen(0))
+    # assert False
+
+    assert collector.dump().gen(0) == hifiber
+
+
+def test_dump_outerspace_Z():
+    yaml = build_outerspace_yaml()
+    collector = build_collector(yaml, 0)
+    collector.dump()
+
+    collector = add_einsum(collector, 1)
+    collector.dump()
+
+    collector = add_einsum(collector, 2)
+
+    hifiber = "metrics[\"Z\"] = {}\n" + \
+        "formats = {\"Z\": Format(Z_MN, {\"rank-order\": [\"M\", \"N\"], \"M\": {\"format\": \"U\", \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \
+        "bindings = [{\"tensor\": \"Z\", \"rank\": \"M\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"root\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"coord\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}, {\"tensor\": \"Z\", \"rank\": \"N\", \"type\": \"payload\", \"format\": \"default\", \"evict-on\": \"M\", \"style\": \"lazy\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/outerspace_Z-M-populate_read_0.csv\", \"tmp/outerspace_Z-M-iter.csv\", \"tmp/outerspace_Z-M-populate_read_0_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/outerspace_Z-M-populate_write_0.csv\", \"tmp/outerspace_Z-M-iter.csv\", \"tmp/outerspace_Z-M-populate_write_0_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/outerspace_Z-N-populate_read_0.csv\", \"tmp/outerspace_Z-N-iter.csv\", \"tmp/outerspace_Z-N-populate_read_0_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/outerspace_Z-N-populate_write_0.csv\", \"tmp/outerspace_Z-N-iter.csv\", \"tmp/outerspace_Z-N-populate_write_0_payload.csv\")\n" + \
+        "traces = {(\"Z\", \"M\", \"payload\", \"read\"): \"tmp/outerspace_Z-M-populate_read_0_payload.csv\", (\"Z\", \"M\", \"payload\", \"write\"): \"tmp/outerspace_Z-M-populate_write_0_payload.csv\", (\"Z\", \"N\", \"coord\", \"read\"): \"tmp/outerspace_Z-N-populate_read_0.csv\", (\"Z\", \"N\", \"coord\", \"write\"): \"tmp/outerspace_Z-N-populate_write_0.csv\", (\"Z\", \"N\", \"payload\", \"read\"): \"tmp/outerspace_Z-N-populate_read_0_payload.csv\", (\"Z\", \"N\", \"payload\", \"write\"): \"tmp/outerspace_Z-N-populate_write_0_payload.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 8192, 64)\n" + \
+        "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 1099511627776\n" + \
+        "metrics[\"Z\"][\"SortHW\"] = {}\n" + \
+        "metrics[\"Z\"][\"SortHW\"][\"T1_MKN\"] = Compute.numSwaps(T1_MKN, 1, float(\"inf\"), \"N\")\n" + \
+        "metrics[\"Z\"][\"SortHW\"][\"time\"] = metrics[\"Z\"][\"SortHW\"][T1_MKN] / 193500000000\n" + \
+        "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 193500000000\n" + \
+        "metrics[\"blocks\"] = [[\"T0\"], [\"T1\", \"Z\"]]\n" + \
+        "metrics[\"time\"] = max(metrics[\"T0\"][\"FPMul\"][\"time\"], metrics[\"T0\"][\"MainMemory\"][\"time\"]) + max(metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"T1\"][\"MainMemory\"][\"time\"] + metrics[\"Z\"][\"MainMemory\"][\"time\"], metrics[\"Z\"][\"SortHW\"][\"time\"])"
+
+    assert collector.dump().gen(0) == hifiber
+
+
+def test_dump_extensor():
+    yaml = build_extensor_yaml()
+    collector = build_collector(yaml, 0)
+
+    hifiber = "metrics = {}\n" + \
+        "metrics[\"Z\"] = {}\n" + \
+        "formats = {\"Z\": Format(Z_N2M2M1N1M0N0, {\"rank-order\": [\"N2\", \"M2\", \"M1\", \"N1\", \"M0\", \"N0\"], \"N2\": {\"format\": \"U\"}, \"M2\": {\"format\": \"U\"}, \"M1\": {\"format\": \"U\"}, \"N1\": {\"format\": \"U\"}, \"M0\": {\"format\": \"U\"}, \"N0\": {\"format\": \"C\", \"cbits\": 64, \"pbits\": 64}}), \"A\": Format(A_K2M2M1K1M0K0, {\"rank-order\": [\"K2\", \"M2\", \"M1\", \"K1\", \"M0\", \"K0\"], \"K2\": {\"format\": \"C\"}, \"M2\": {\"format\": \"C\"}, \"M1\": {\"format\": \"C\"}, \"K1\": {\"format\": \"C\", \"cbits\": 64}, \"M0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"B\": Format(B_N2K2N1K1N0K0, {\"rank-order\": [\"N2\", \"K2\", \"N1\", \"K1\", \"N0\", \"K0\"], \"N2\": {\"format\": \"C\"}, \"K2\": {\"format\": \"C\"}, \"N1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"N0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"M0\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"M0\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N0\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"N0\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"coord\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"M0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"payload\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/extensor-N1-populate_1.csv\", \"tmp/extensor-N1-iter.csv\", \"tmp/extensor-N1-populate_1_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/extensor-K1-intersect_1.csv\", \"tmp/extensor-K1-iter.csv\", \"tmp/extensor-K1-intersect_1_payload.csv\")\n" + \
+        "traces = {(\"A\", \"K1\", \"coord\", \"read\"): \"tmp/extensor-K1-intersect_0.csv\", (\"A\", \"M0\", \"coord\", \"read\"): \"tmp/extensor-M0-eager_a_m0_read.csv\", (\"B\", \"N1\", \"coord\", \"read\"): \"tmp/extensor-N1-populate_1.csv\", (\"B\", \"N1\", \"payload\", \"read\"): \"tmp/extensor-N1-populate_1_payload.csv\", (\"B\", \"K1\", \"coord\", \"read\"): \"tmp/extensor-K1-intersect_1.csv\", (\"B\", \"K1\", \"payload\", \"read\"): \"tmp/extensor-K1-intersect_1_payload.csv\", (\"B\", \"N0\", \"coord\", \"read\"): \"tmp/extensor-N0-eager_b_n0_read.csv\", (\"Z\", \"N0\", \"coord\", \"read\"): \"tmp/extensor-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"coord\", \"write\"): \"tmp/extensor-N0-eager_z_m0_write.csv\", (\"Z\", \"N0\", \"payload\", \"read\"): \"tmp/extensor-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"payload\", \"write\"): \"tmp/extensor-N0-eager_z_m0_write.csv\", (\"A\", \"M0\", \"payload\", \"read\"): \"tmp/extensor-M0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"coord\", \"read\"): \"tmp/extensor-K0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"payload\", \"read\"): \"tmp/extensor-K0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"payload\", \"read\"): \"tmp/extensor-N0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"coord\", \"read\"): \"tmp/extensor-K0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/extensor-K0-eager_b_n0_read.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 251658240, 64)\n" + \
+        "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"A\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"B\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 586314575512\n" + \
+        "metrics[\"Z\"][\"FPMul\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPMul\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \
+        "metrics[\"Z\"][\"FPMul\"][\"time\"] = metrics[\"Z\"][\"FPMul\"][\"mul\"] / 128000000000\n" + \
+        "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 128000000000\n" + \
+        "metrics[\"Z\"][\"K2Intersect\"] = 0\n" + \
+        "metrics[\"Z\"][\"K2Intersect\"] += K2Intersect_K2.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"K2Intersect\"][\"time\"] = metrics[\"Z\"][\"K2Intersect\"] / 1000000000\n" + \
+        "metrics[\"Z\"][\"K1Intersect\"] = 0\n" + \
+        "metrics[\"Z\"][\"K1Intersect\"] += K1Intersect_K1.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"K1Intersect\"][\"time\"] = metrics[\"Z\"][\"K1Intersect\"] / 1000000000\n" + \
+        "metrics[\"Z\"][\"K0Intersection\"] = 0\n" + \
+        "metrics[\"Z\"][\"K0Intersection\"] += K0Intersection_K0.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"K0Intersection\"][\"time\"] = metrics[\"Z\"][\"K0Intersection\"] / 128000000000\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = max(metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"Z\"][\"FPMul\"][\"time\"], metrics[\"Z\"][\"K0Intersection\"][\"time\"], metrics[\"Z\"][\"K1Intersect\"][\"time\"], metrics[\"Z\"][\"K2Intersect\"][\"time\"], metrics[\"Z\"][\"MainMemory\"][\"time\"])"
+
+    assert collector.dump().gen(0) == hifiber
+
+
+def test_dump_extensor_energy():
+    yaml = build_extensor_energy_yaml()
+    collector = build_collector(yaml, 0)
+
+    hifiber = "metrics = {}\n" + \
+        "metrics[\"Z\"] = {}\n" + \
+        "formats = {\"Z\": Format(Z_N2M2M1N1M0N0, {\"rank-order\": [\"N2\", \"M2\", \"M1\", \"N1\", \"M0\", \"N0\"], \"N2\": {\"format\": \"U\"}, \"M2\": {\"format\": \"U\"}, \"M1\": {\"format\": \"U\"}, \"N1\": {\"format\": \"U\"}, \"M0\": {\"format\": \"U\"}, \"N0\": {\"format\": \"C\", \"cbits\": 64, \"pbits\": 64}}), \"A\": Format(A_K2M2M1K1M0K0, {\"rank-order\": [\"K2\", \"M2\", \"M1\", \"K1\", \"M0\", \"K0\"], \"K2\": {\"format\": \"C\"}, \"M2\": {\"format\": \"C\"}, \"M1\": {\"format\": \"C\"}, \"K1\": {\"format\": \"C\", \"cbits\": 64}, \"M0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}}), \"B\": Format(B_N2K2N1K1N0K0, {\"rank-order\": [\"N2\", \"K2\", \"N1\", \"K1\", \"N0\", \"K0\"], \"N2\": {\"format\": \"C\"}, \"K2\": {\"format\": \"C\"}, \"N1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K1\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"N0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 32}, \"K0\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"A\", \"rank\": \"M0\", \"type\": \"coord\", \"evict-on\": \"M2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"M0\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"K1\", \"type\": \"payload\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"lazy\"}, {\"tensor\": \"B\", \"rank\": \"N0\", \"type\": \"coord\", \"evict-on\": \"K2\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"N0\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"coord\"}, {\"tensor\": \"Z\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"M0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"A\", \"evict-on\": \"M2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"B\", \"evict-on\": \"K2\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"payload\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/extensor_energy-N1-populate_1.csv\", \"tmp/extensor_energy-N1-iter.csv\", \"tmp/extensor_energy-N1-populate_1_payload.csv\")\n" + \
+        "Traffic.filterTrace(\"tmp/extensor_energy-K1-intersect_1.csv\", \"tmp/extensor_energy-K1-iter.csv\", \"tmp/extensor_energy-K1-intersect_1_payload.csv\")\n" + \
+        "traces = {(\"A\", \"K1\", \"coord\", \"read\"): \"tmp/extensor_energy-K1-intersect_0.csv\", (\"A\", \"M0\", \"coord\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"B\", \"N1\", \"coord\", \"read\"): \"tmp/extensor_energy-N1-populate_1.csv\", (\"B\", \"N1\", \"payload\", \"read\"): \"tmp/extensor_energy-N1-populate_1_payload.csv\", (\"B\", \"K1\", \"coord\", \"read\"): \"tmp/extensor_energy-K1-intersect_1.csv\", (\"B\", \"K1\", \"payload\", \"read\"): \"tmp/extensor_energy-K1-intersect_1_payload.csv\", (\"B\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"Z\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"coord\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"Z\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"payload\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"A\", \"M0\", \"payload\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 251658240, 64)\n" + \
+        "metrics[\"Z\"][\"MainMemory\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"A\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"B\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"] = {}\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] = 0\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"M0\", \"type\": \"coord\", \"evict-on\": \"K1\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"M0\"}, {\"tensor\": \"B\", \"rank\": \"N0\", \"type\": \"coord\", \"evict-on\": \"K1\", \"format\": \"default\", \"style\": \"eager\", \"root\": \"N0\"}, {\"tensor\": \"Z\", \"evict-on\": \"N1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"coord\"}, {\"tensor\": \"Z\", \"evict-on\": \"N1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"M0\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"A\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"M0\", \"rank\": \"K0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"N0\", \"type\": \"payload\"}, {\"tensor\": \"B\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"coord\"}, {\"tensor\": \"B\", \"evict-on\": \"K1\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"N0\", \"rank\": \"K0\", \"type\": \"payload\"}]\n" + \
+        "traces = {(\"A\", \"M0\", \"coord\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"Z\", \"N0\", \"coord\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"coord\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"Z\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_z_m0_read.csv\", (\"Z\", \"N0\", \"payload\", \"write\"): \"tmp/extensor_energy-N0-eager_z_m0_write.csv\", (\"A\", \"M0\", \"payload\", \"read\"): \"tmp/extensor_energy-M0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"A\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_a_m0_read.csv\", (\"B\", \"N0\", \"payload\", \"read\"): \"tmp/extensor_energy-N0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"coord\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/extensor_energy-K0-eager_b_n0_read.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 524288, 64)\n" + \
+        "metrics[\"Z\"][\"LLB\"] = {}\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"A\"] = {}\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"A\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"B\"] = {}\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"B\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"Z\"] = {}\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"Z\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"Z\"][\"write\"] = 0\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \
+        "metrics[\"Z\"][\"MainMemory\"][\"time\"] = (metrics[\"Z\"][\"MainMemory\"][\"A\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"B\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"MainMemory\"][\"Z\"][\"write\"]) / 586314575512\n" + \
+        "metrics[\"Z\"][\"LLB\"][\"time\"] = (metrics[\"Z\"][\"LLB\"][\"A\"][\"read\"] + metrics[\"Z\"][\"LLB\"][\"B\"][\"read\"] + metrics[\"Z\"][\"LLB\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"LLB\"][\"Z\"][\"write\"]) / 9223372036854775807\n" + \
+        "metrics[\"Z\"][\"FPMul\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPMul\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \
+        "metrics[\"Z\"][\"FPMul\"][\"time\"] = metrics[\"Z\"][\"FPMul\"][\"mul\"] / 128000000000\n" + \
+        "metrics[\"Z\"][\"FPAdd\"] = {}\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"add\"] = Metrics.dump()[\"Compute\"][\"payload_add\"]\n" + \
+        "metrics[\"Z\"][\"FPAdd\"][\"time\"] = metrics[\"Z\"][\"FPAdd\"][\"add\"] / 128000000000\n" + \
+        "metrics[\"Z\"][\"K2Intersect\"] = 0\n" + \
+        "metrics[\"Z\"][\"K2Intersect\"] += K2Intersect_K2.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"K2Intersect\"][\"time\"] = metrics[\"Z\"][\"K2Intersect\"] / 1000000000\n" + \
+        "metrics[\"Z\"][\"K1Intersect\"] = 0\n" + \
+        "metrics[\"Z\"][\"K1Intersect\"] += K1Intersect_K1.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"K1Intersect\"][\"time\"] = metrics[\"Z\"][\"K1Intersect\"] / 1000000000\n" + \
+        "metrics[\"Z\"][\"K0Intersection\"] = 0\n" + \
+        "metrics[\"Z\"][\"K0Intersection\"] += K0Intersection_K0.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"K0Intersection\"][\"time\"] = metrics[\"Z\"][\"K0Intersection\"] / 128000000000\n" + \
+        "metrics[\"Z\"][\"TopSequencer\"] = {}\n" + \
+        "metrics[\"Z\"][\"TopSequencer\"][\"N2\"] = Compute.numIters(\"tmp/extensor_energy-N2-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"TopSequencer\"][\"K2\"] = Compute.numIters(\"tmp/extensor_energy-K2-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"TopSequencer\"][\"M2\"] = Compute.numIters(\"tmp/extensor_energy-M2-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"TopSequencer\"][\"time\"] = (metrics[\"Z\"][\"TopSequencer\"][\"N2\"] + metrics[\"Z\"][\"TopSequencer\"][\"K2\"] + metrics[\"Z\"][\"TopSequencer\"][\"M2\"]) / 1000000000\n" + \
+        "metrics[\"Z\"][\"MiddleSequencer\"] = {}\n" + \
+        "metrics[\"Z\"][\"MiddleSequencer\"][\"M1\"] = Compute.numIters(\"tmp/extensor_energy-M1-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"MiddleSequencer\"][\"N1\"] = Compute.numIters(\"tmp/extensor_energy-N1-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"MiddleSequencer\"][\"K1\"] = Compute.numIters(\"tmp/extensor_energy-K1-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"MiddleSequencer\"][\"time\"] = (metrics[\"Z\"][\"MiddleSequencer\"][\"M1\"] + metrics[\"Z\"][\"MiddleSequencer\"][\"N1\"] + metrics[\"Z\"][\"MiddleSequencer\"][\"K1\"]) / 1000000000\n" + \
+        "metrics[\"Z\"][\"BottomSequencer\"] = {}\n" + \
+        "metrics[\"Z\"][\"BottomSequencer\"][\"M0\"] = Compute.numIters(\"tmp/extensor_energy-M0-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"BottomSequencer\"][\"N0\"] = Compute.numIters(\"tmp/extensor_energy-N0-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"BottomSequencer\"][\"K0\"] = Compute.numIters(\"tmp/extensor_energy-K0-iter.csv\")\n" + \
+        "metrics[\"Z\"][\"BottomSequencer\"][\"time\"] = (metrics[\"Z\"][\"BottomSequencer\"][\"M0\"] + metrics[\"Z\"][\"BottomSequencer\"][\"N0\"] + metrics[\"Z\"][\"BottomSequencer\"][\"K0\"]) / 128000000000\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = max(metrics[\"Z\"][\"BottomSequencer\"][\"time\"], metrics[\"Z\"][\"FPAdd\"][\"time\"], metrics[\"Z\"][\"FPMul\"][\"time\"], metrics[\"Z\"][\"K0Intersection\"][\"time\"], metrics[\"Z\"][\"K1Intersect\"][\"time\"], metrics[\"Z\"][\"K2Intersect\"][\"time\"], metrics[\"Z\"][\"LLB\"][\"time\"], metrics[\"Z\"][\"MainMemory\"][\"time\"], metrics[\"Z\"][\"MiddleSequencer\"][\"time\"], metrics[\"Z\"][\"TopSequencer\"][\"time\"])"
+
+    assert collector.dump().gen(0) == hifiber
+
+
+def test_dump_sigma():
+    yaml = build_sigma_yaml()
+    collector = build_collector(yaml, 0)
+
     hifiber = "metrics = {}\n" + \
         "metrics[\"Z\"] = {}\n" + \
-        "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \
-        "A_M_format = Format(A_M, {\"M\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"Z\"][\"A footprint\"] = A_M_format.getTensor()\n" + \
-        "metrics[\"Z\"][\"A traffic\"] = Traffic.buffetTraffic(A_M, \"M\", A_M_format) + A_M_format.getRank(\"M\")\n" + \
-        "metrics[\"Z\"][\"add\"] = Compute.opCount(Metrics.dump(), \"add\")"
+        "formats = {\"A\": Format(Tensor(rank_ids=[\"K1\", \"MK01\", \"MK00\"], shape=[K, M * K, M * K]), {\"rank-order\": [\"K1\", \"MK01\", \"MK00\"], \"K1\": {\"format\": \"U\"}, \"MK01\": {\"format\": \"U\"}, \"MK00\": {\"format\": \"C\", \"pbits\": 32}}), \"B\": Format(B_K1NK0, {\"rank-order\": [\"K1\", \"N\", \"K0\"], \"K1\": {\"format\": \"U\"}, \"N\": {\"format\": \"U\"}, \"K0\": {\"format\": \"U\", \"pbits\": 32}})}\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"MK00\", \"type\": \"payload\", \"evict-on\": \"root\", \"format\": \"flattened\", \"style\": \"eager\", \"root\": \"MK00\"}, {\"tensor\": \"B\", \"rank\": \"K0\", \"type\": \"payload\", \"evict-on\": \"root\", \"format\": \"partitioned\", \"style\": \"eager\", \"root\": \"K0\"}]\n" + \
+        "traces = {(\"A\", \"MK00\", \"payload\", \"read\"): \"tmp/sigma-MK00-eager_a_mk00_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/sigma-K0-eager_b_k0_read.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 268435456, 32, {\"K0\": \"MK00\"})\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"MK00\", \"format\": \"flattened\", \"type\": \"payload\", \"evict-on\": \"MK01\", \"style\": \"eager\", \"root\": \"MK00\"}, {\"tensor\": \"B\", \"rank\": \"K0\", \"format\": \"partitioned\", \"type\": \"payload\", \"evict-on\": \"N\", \"style\": \"eager\", \"root\": \"K0\"}]\n" + \
+        "traces = {(\"A\", \"MK00\", \"payload\", \"read\"): \"tmp/sigma-MK00-eager_a_mk00_read.csv\", (\"B\", \"K0\", \"payload\", \"read\"): \"tmp/sigma-K0-eager_b_k0_read.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 1048576, 4096, {\"K0\": \"MK00\"})\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"] = {}\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"] = {}\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"][\"read\"] += traffic[0][\"A\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"] = {}\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"][\"read\"] += traffic[0][\"B\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"DataSRAMBanks\"][\"time\"] = (metrics[\"Z\"][\"DataSRAMBanks\"][\"A\"][\"read\"] + metrics[\"Z\"][\"DataSRAMBanks\"][\"B\"][\"read\"]) / 8246337208320\n" + \
+        "metrics[\"Z\"][\"Multiplier\"] = {}\n" + \
+        "metrics[\"Z\"][\"Multiplier\"][\"mul\"] = Metrics.dump()[\"Compute\"][\"payload_mul\"]\n" + \
+        "metrics[\"Z\"][\"Multiplier\"][\"time\"] = metrics[\"Z\"][\"Multiplier\"][\"mul\"] / 8192000000000\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = max(metrics[\"Z\"][\"DataSRAMBanks\"][\"time\"], metrics[\"Z\"][\"Multiplier\"][\"time\"])"
 
     assert collector.dump().gen(0) == hifiber
 
 
-def test_dump_leader_follower_bad_rank():
+def test_dump_new_flattened_tensor_for_format():
     yaml = """
     einsum:
       declaration:
+        Z: [K, M]
         A: [K, M]
-        B: [K]
-        Z: [M]
       expressions:
-        - Z[m] = A[k, m] * B[m]
-
+        - Z[k, m] = A[k, m]
+    mapping:
+      partitioning:
+        Z:
+          (K, M): [flatten()]
+      spacetime:
+        Z:
+          space: []
+          time: [KM]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Intersect
-          class: LeaderFollower
-
+        - name: Buffer
+          class: Buffet
+          attributes:
+            width: 64
+            depth: 1024
     bindings:
-    - name: Intersect
-      bindings:
-      - einsum: Z
-        rank: P
-        leader: B
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Buffer
+        bindings:
+        - tensor: A
+          rank: KM
+          type: payload
+          evict-on: root
+          format: default
+    format:
+      A:
+        default:
+          rank-order: [KM]
+          KM:
+            format: C
+            pbits: 32
     """
     collector = build_collector(yaml, 0)
 
-    with pytest.raises(ValueError) as excinfo:
-        collector.dump()
-    assert str(excinfo.value) == "Tensor B has no rank P"
+    hifiber = "metrics = {}\n" + \
+        "metrics[\"Z\"] = {}\n" + \
+        "formats = {\"A\": Format(Tensor(rank_ids=[\"KM\"], shape=[K * M]), {\"rank-order\": [\"KM\"], \"KM\": {\"format\": \"C\", \"pbits\": 32}})}\n" + \
+        "bindings = [{\"tensor\": \"A\", \"rank\": \"KM\", \"type\": \"payload\", \"evict-on\": \"root\", \"format\": \"default\", \"style\": \"lazy\"}]\n" + \
+        "Traffic.filterTrace(\"tmp/Z-KM-populate_1.csv\", \"tmp/Z-KM-iter.csv\", \"tmp/Z-KM-populate_1_payload.csv\")\n" + \
+        "traces = {(\"A\", \"KM\", \"payload\", \"read\"): \"tmp/Z-KM-populate_1_payload.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 65536, 64)\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = 0"
+
+    assert collector.dump().gen(0) == hifiber
 
 
-def test_dump_leader_follower():
+def test_dump_skip_zero_bits():
     yaml = """
     einsum:
       declaration:
+        Z: [K, M]
         A: [K, M]
-        B: [K]
-        Z: [M]
       expressions:
-        - Z[m] = A[k, m] * B[m]
-
+        - Z[k, m] = A[k, m]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [K, M]
     architecture:
-      subtree:
-      - name: System
+      accel:
+      - name: level0
         local:
-        - name: Intersect
-          class: LeaderFollower
-
+        - name: Buffer
+          class: Buffet
+          attributes:
+            width: 64
+            depth: 1024
     bindings:
-    - name: Intersect
-      bindings:
-      - einsum: Z
-        rank: K
-        leader: B
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Buffer
+        bindings:
+        - tensor: Z
+          rank: K
+          type: payload
+          style: eager
+          evict-on: root
+          format: default
+        - tensor: A
+          rank: K
+          type: coord
+          style: eager
+          evict-on: root
+          format: default
+    format:
+      Z:
+        default:
+          rank-order: [K, M]
+          K:
+            format: C
+            cbits: 0
+          M:
+            format: C
+            pbits: 32
+      A:
+        default:
+          rank-order: [K, M]
+          K:
+            format: C
+            pbits: 0
+          M:
+            format: C
+            pbits: 32
     """
     collector = build_collector(yaml, 0)
 
     hifiber = "metrics = {}\n" + \
         "metrics[\"Z\"] = {}\n" + \
-        "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \
-        "metrics[\"Z\"][\"A footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"A traffic\"] = 0\n" + \
-        "metrics[\"Z\"][\"B footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"B traffic\"] = 0\n" + \
-        "metrics[\"Z\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 1)"
+        "formats = {\"Z\": Format(Z_KM, {\"rank-order\": [\"K\", \"M\"], \"K\": {\"format\": \"C\", \"cbits\": 0}, \"M\": {\"format\": \"C\", \"pbits\": 32}}), \"A\": Format(A_KM, {\"rank-order\": [\"K\", \"M\"], \"K\": {\"format\": \"C\", \"pbits\": 0}, \"M\": {\"format\": \"C\", \"pbits\": 32}})}\n" + \
+        "bindings = [{\"tensor\": \"Z\", \"evict-on\": \"root\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"K\", \"rank\": \"M\", \"type\": \"payload\"}, {\"tensor\": \"A\", \"evict-on\": \"root\", \"style\": \"eager\", \"format\": \"default\", \"root\": \"K\", \"rank\": \"M\", \"type\": \"payload\"}]\n" + \
+        "traces = {(\"Z\", \"M\", \"payload\", \"read\"): \"tmp/Z-M-eager_z_k_read.csv\", (\"Z\", \"M\", \"payload\", \"write\"): \"tmp/Z-M-eager_z_k_write.csv\", (\"A\", \"M\", \"payload\", \"read\"): \"tmp/Z-M-eager_a_k_read.csv\"}\n" + \
+        "traffic = Traffic.buffetTraffic(bindings, formats, traces, 65536, 64)\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = 0"
 
     assert collector.dump().gen(0) == hifiber
 
 
-def test_dump_skip_ahead():
+def test_end():
+    hifiber = "Metrics.endCollect()"
+
+    assert Collector.end().gen(0) == hifiber
+
+
+def test_make_body_none():
+    yaml = build_extensor_yaml()
+    collector = build_collector(yaml, 0)
+
+    hifiber = ""
+
+    assert collector.make_body().gen(0) == hifiber
+
+
+def test_make_body_iter_num():
     yaml = """
     einsum:
       declaration:
+        Z: [K, M]
         A: [K, M]
-        B: [K]
-        Z: [M]
       expressions:
-        - Z[m] = A[k, m] * B[m]
+        - Z[k, m] = A[k, m]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [K, M]
+    architecture:
+      accel:
+      - name: level0
+        local:
+        - name: Buffer
+          class: Buffet
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: Buffer
+        bindings:
+        - tensor: Z
+          rank: K
+          type: payload
+          style: eager
+          evict-on: root
+          format: default
+    format:
+      Z:
+        default:
+          rank-order: [K, M]
+          K:
+            format: C
+            pbits: 32
+          M:
+            format: C
+            cbits: 32
+            pbits: 64
+    """
+    collector = build_collector(yaml, 0)
+
+    hifiber = "m_iter_num = Metrics.getIter().copy()"
+
+    assert collector.make_body().gen(0) == hifiber
+
+
+def test_make_loop_footer_unconfigured():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+
+    with pytest.raises(ValueError) as excinfo:
+        collector.make_loop_footer("K")
+    assert str(
+        excinfo.value) == "Unconfigured collector. Make sure to first call start()"
+
+
+def test_make_loop_footer():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+    collector.start()
+
+    hifiber = "Intersect_K.addTraces(Metrics.consumeTrace(\"K\", \"intersect_2\"))"
+    assert collector.make_loop_footer("K").gen(0) == hifiber
+
+    yaml = build_extensor_yaml()
+    collector = build_collector(yaml, 0)
+    collector.start()
+
+    program = collector.program
+    part_ir = program.get_partitioning()
+    for tensor in program.get_equation().get_tensors():
+        tensor.update_ranks(
+            part_ir.partition_ranks(
+                tensor.get_ranks(),
+                part_ir.get_all_parts(),
+                True,
+                True))
+        program.get_loop_order().apply(tensor)
+
+    assert collector.make_loop_footer("M2").gen(0) == ""
+
+    hifiber = "n0_iter_num = Metrics.getIter().copy()\n" + \
+        "K0Intersection_K0.addTraces(Metrics.consumeTrace(\"K0\", \"intersect_0\"), Metrics.consumeTrace(\"K0\", \"intersect_1\"))"
+
+    assert collector.make_loop_footer("K0").gen(0) == hifiber
+
+    hifiber = "K1Intersect_K1.addTraces(Metrics.consumeTrace(\"K1\", \"intersect_0\"), Metrics.consumeTrace(\"K1\", \"intersect_1\"))\n" + \
+        "z_k1.trace(\"eager_z_k1_write\", iteration_num=n0_iter_num)"
+
+    assert collector.make_loop_footer("K1").gen(0) == hifiber
+
+
+def test_make_loop_header_unconfigured():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+
+    with pytest.raises(ValueError) as excinfo:
+        collector.make_loop_header("K")
+    assert str(
+        excinfo.value) == "Unconfigured collector. Make sure to first call start()"
+
+
+def test_make_loop_header():
+    yaml = build_extensor_yaml()
+    collector = build_collector(yaml, 0)
+    collector.start()
+
+    assert collector.make_loop_header("N2").gen(0) == ""
 
+    hifiber = "eager_a_m0_read = set()\n" + \
+        "eager_z_m0_read = set()"
+
+    assert collector.make_loop_header("M1").gen(0) == hifiber
+
+    hifiber_option1 = "if () not in eager_z_m0_read:\n" + \
+        "    eager_z_m0_read.add(())\n" + \
+        "    z_m0.trace(\"eager_z_m0_read\")\n" + \
+        "if () not in eager_a_m0_read:\n" + \
+        "    eager_a_m0_read.add(())\n" + \
+        "    a_m0.trace(\"eager_a_m0_read\")"
+
+    hifiber_option2 = "if () not in eager_a_m0_read:\n" + \
+        "    eager_a_m0_read.add(())\n" + \
+        "    a_m0.trace(\"eager_a_m0_read\")\n" + \
+        "if () not in eager_z_m0_read:\n" + \
+        "    eager_z_m0_read.add(())\n" + \
+        "    z_m0.trace(\"eager_z_m0_read\")"
+
+    assert collector.make_loop_header("M0").gen(
+        0) in {hifiber_option1, hifiber_option2}
+
+# TODO: Multiply buffer size by number of instances
+
+
+def test_make_loop_header_eager_root():
+    yaml = """
+    einsum:
+      declaration:
+        A: [K]
+        Z: [M]
+      expressions:
+      - Z[M] = A[k]
+    mapping:
+      loop-order:
+        Z: [K, M]
+      spacetime:
+        Z:
+          space: []
+          time: [K, M]
+    format:
+      Z:
+        default:
+          rank-order: [M]
+          M:
+            format: U
+            pbits: 32
     architecture:
-      subtree:
+      Accelerator:
       - name: System
+        attributes:
+          clock_frequency: 1000
         local:
-        - name: Intersect
-          class: SkipAhead
-
+        - name: MainMemory
+          class: DRAM
+          attributes:
+            bandwidth: 2048
+        subtree:
+        - name: Chip
+          local:
+          - name: Buffer
+            class: Buffet
+            attributes:
+              width: 32
+              depth: 128
     bindings:
-    - name: Intersect
-      bindings:
-      - einsum: Z
-        rank: K
+      Z:
+      - config: Accelerator
+        prefix: tmp/eager_root
+      - component: MainMemory
+        bindings:
+        - tensor: Z
+          rank: M
+          type: payload
+          format: default
+      - component: Buffer
+        bindings:
+        - tensor: Z
+          rank: M
+          type: payload
+          format: default
+          style: eager
+          evict-on: root
     """
     collector = build_collector(yaml, 0)
+    collector.start()
 
-    hifiber = "metrics = {}\n" + \
-        "metrics[\"Z\"] = {}\n" + \
-        "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \
-        "metrics[\"Z\"][\"A footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"A traffic\"] = 0\n" + \
-        "metrics[\"Z\"][\"B footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"B traffic\"] = 0\n" + \
-        "metrics[\"Z\"][\"K intersections\"] = Compute.skipCount(Metrics.dump(), \"K\")"
+    hifiber = "z_k.trace(\"eager_z_k_write\", iteration_num=m_iter_num)"
+    assert collector.make_loop_footer("K").gen(0) == hifiber
 
-    assert collector.dump().gen(0) == hifiber
 
-# def test_dump_leader_follower_not_intersected():
-#     yaml = """
-#     einsum:
-#       declaration:
-#         A: [M]
-#         B: [K]
-#         C: [K, M]
-#         Z: [M]
-#       expressions:
-#         - Z[m] = A[k, m] * B[m] * C[k, m]
-#
-#     architecture:
-#       subtree:
-#       - name: System
-#         local:
-#         - name: Intersect
-#           class: LeaderFollower
-#
-#     bindings:
-#     - name: Intersect
-#       bindings:
-#       - einsum: Z
-#         rank: K
-#         leader: C
-#     """
-#     collector = build_collector(yaml, 0)
-#
-#     hifiber = "metrics = {}\n" + \
-#           "metrics[\"Z\"] = {}\n" + \
-#           "metrics[\"Z\"][\"Z footprint\"] = 0\n" + \
-#           "metrics[\"Z\"][\"Z traffic\"] = 0\n" + \
-#           "metrics[\"Z\"][\"A footprint\"] = 0\n" + \
-#           "metrics[\"Z\"][\"A traffic\"] = 0\n" + \
-#           "metrics[\"Z\"][\"B footprint\"] = 0\n" + \
-#           "metrics[\"Z\"][\"B traffic\"] = 0\n" + \
-#           "metrics[\"Z\"][\"C footprint\"] = 0\n" + \
-#           "metrics[\"Z\"][\"C traffic\"] = 0\n" + \
-#           "metrics[\"Z\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 1)"
-#
-#     assert collector.dump().gen(0) == hifiber
+def test_register_ranks():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+
+    hifiber = "Metrics.registerRank(\"M\")\n" + \
+        "Metrics.registerRank(\"K\")\n" + \
+        "Metrics.registerRank(\"N\")"
 
+    assert collector.register_ranks().gen(0) == hifiber
 
-def test_end():
-    hifiber = "Metrics.endCollect()"
 
-    assert Collector.end().gen(0) == hifiber
+def test_set_collecting_type_err():
+    yaml = build_gamma_yaml()
+    collector = build_collector(yaml, 0)
+
+    with pytest.raises(ValueError) as excinfo:
+        collector.set_collecting(None, "K", "fiber", False, True)
+    assert str(
+        excinfo.value) == "Tensor must be specified for trace type fiber"
 
 
 def test_set_collecting():
     yaml = build_gamma_yaml()
     collector = build_collector(yaml, 0)
-    hifiber = "B_KN.setCollecting(\"K\", True)"
 
-    assert collector.set_collecting("B", "K").gen(0) == hifiber
+    hifiber = "Metrics.trace(\"K\", type_=\"intersect_3\", consumable=False)"
+    assert collector.set_collecting(
+        "B", "K", "fiber", False, True).gen(0) == hifiber
+
+    hifiber = "Metrics.trace(\"K\", type_=\"iter\", consumable=False)"
+    assert collector.set_collecting(
+        None, "K", "iter", False, True).gen(0) == hifiber
+
+
+def test_set_collecting_eager():
+    yaml = build_extensor_yaml()
+    collector = build_collector(yaml, 0)
+
+    program = collector.program
+    part_ir = program.get_partitioning()
+    for tensor in program.get_equation().get_tensors():
+        tensor.update_ranks(
+            part_ir.partition_ranks(
+                tensor.get_ranks(),
+                part_ir.get_all_parts(),
+                True,
+                True))
+        program.get_loop_order().apply(tensor)
+
+    hifiber = "Metrics.trace(\"N0\", type_=\"eager_a_m0_read\", consumable=False)"
+    assert collector.set_collecting(
+        "A", "N0", "M0", False, True).gen(0) == hifiber
+
+    hifiber = "n0_iter_num = None\n" + \
+        "Metrics.trace(\"N0\", type_=\"eager_z_m0_write\", consumable=False)"
+    assert collector.set_collecting(
+        "Z", "N0", "M0", False, False).gen(0) == hifiber
 
 
 def test_start():
     yaml = build_gamma_yaml()
     collector = build_collector(yaml, 0)
-    hifiber = "Metrics.beginCollect([\"M\", \"K\", \"N\"])"
 
-    assert collector.start().gen(0) == hifiber
+    generated = collector.start().gen(0).split("\n")
+
+    corr = ["Metrics.beginCollect(\"tmp/gamma_T\")"]
+    check_hifiber_lines(generated[:1], corr)
+
+    corr = ["Intersect_K = LeaderFollowerIntersector()"]
+    check_hifiber_lines(generated[1:2], corr)
+
+    corr = ["Metrics.trace(\"K\", type_=\"iter\", consumable=False)",
+            "Metrics.trace(\"K\", type_=\"intersect_2\", consumable=True)",
+            "Metrics.trace(\"M\", type_=\"iter\", consumable=False)",
+            "Metrics.trace(\"M\", type_=\"populate_1\", consumable=False)",
+            "Metrics.trace(\"K\", type_=\"intersect_2\", consumable=False)",
+            "Metrics.trace(\"K\", type_=\"intersect_3\", consumable=False)",
+            "Metrics.trace(\"N\", type_=\"iter\", consumable=False)",
+            "Metrics.trace(\"N\", type_=\"populate_1\", consumable=False)"]
+    check_hifiber_lines(generated[2:], corr)
+
+
+def test_start_sequencer():
+    yaml = build_extensor_energy_yaml()
+    collector = build_collector(yaml, 0)
+
+    generated = collector.start().gen(0).split("\n")
+
+    corr = ['Metrics.beginCollect("tmp/extensor_energy")']
+    check_hifiber_lines(generated[:1], corr)
+
+    corr = ["K2Intersect_K2 = SkipAheadIntersector()",
+            "K1Intersect_K1 = SkipAheadIntersector()",
+            "K0Intersection_K0 = SkipAheadIntersector()"]
+    check_hifiber_lines(generated[1:4], corr)
+
+    corr = [
+        "Metrics.trace(\"N2\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"K2\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"M2\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"M1\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"N1\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"K1\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"M0\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"N0\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"K0\", type_=\"iter\", consumable=False)",
+        "Metrics.trace(\"N0\", type_=\"eager_z_m0_read\", consumable=False)",
+        "n0_iter_num = None",
+        "Metrics.trace(\"N0\", type_=\"eager_z_m0_write\", consumable=False)",
+        "Metrics.trace(\"M0\", type_=\"eager_z_m0_read\", consumable=False)",
+        "n0_iter_num = None",
+        "Metrics.trace(\"M0\", type_=\"eager_z_m0_write\", consumable=False)",
+        "Metrics.trace(\"K0\", type_=\"eager_a_m0_read\", consumable=False)",
+        "Metrics.trace(\"K1\", type_=\"intersect_0\", consumable=True)",
+        "Metrics.trace(\"M0\", type_=\"eager_a_m0_read\", consumable=False)",
+        "Metrics.trace(\"K0\", type_=\"intersect_0\", consumable=True)",
+        "Metrics.trace(\"K1\", type_=\"intersect_0\", consumable=False)",
+        "Metrics.trace(\"K2\", type_=\"intersect_0\", consumable=True)",
+        "Metrics.trace(\"N1\", type_=\"populate_1\", consumable=False)",
+        "Metrics.trace(\"K1\", type_=\"intersect_1\", consumable=True)",
+        "Metrics.trace(\"N0\", type_=\"eager_b_n0_read\", consumable=False)",
+        "Metrics.trace(\"K0\", type_=\"intersect_1\", consumable=True)",
+        "Metrics.trace(\"K1\", type_=\"intersect_1\", consumable=False)",
+        "Metrics.trace(\"K2\", type_=\"intersect_1\", consumable=True)",
+        "Metrics.trace(\"K0\", type_=\"eager_b_n0_read\", consumable=False)"]
+    check_hifiber_lines(generated[4:32], corr)
+
+    corr = ["Metrics.registerRank(\"N2\")",
+            "Metrics.registerRank(\"K2\")",
+            "Metrics.registerRank(\"M2\")",
+            "Metrics.registerRank(\"M1\")",
+            "Metrics.registerRank(\"N1\")",
+            "Metrics.registerRank(\"K1\")",
+            "Metrics.registerRank(\"M0\")",
+            "Metrics.registerRank(\"N0\")",
+            "Metrics.registerRank(\"K0\")"]
+    check_hifiber_lines(generated[32:], corr)
+
+
+def test_start_flattening():
+    yaml = build_sigma_yaml()
+    collector = build_collector(yaml, 0)
+
+    generated = collector.start().gen(0).split("\n")
+
+    corr = ["Metrics.beginCollect(\"tmp/sigma\")",
+            "Metrics.associateShape(\"MK01\", (M, K))",
+            "Metrics.matchRanks(\"MK00\", \"M\")",
+            "Metrics.matchRanks(\"MK00\", \"K0\")",
+            "Metrics.associateShape(\"MK00\", (M, K))"]
+    check_hifiber_lines(generated[:5], corr)
+
+    corr = [
+        "Metrics.trace(\"MK00\", type_=\"eager_a_mk00_read\", consumable=False)",
+        "Metrics.trace(\"K0\", type_=\"eager_b_k0_read\", consumable=False)"]
+    check_hifiber_lines(generated[5:7], corr)
+
+    corr = ["Metrics.registerRank(\"K1\")",
+            "Metrics.registerRank(\"MK01\")",
+            "Metrics.registerRank(\"N\")",
+            "Metrics.registerRank(\"MK00\")"]
+    check_hifiber_lines(generated[7:], corr)
+
+
+def test_trace_tree():
+    yaml = build_extensor_yaml()
+    collector = build_collector(yaml, 0)
+
+    program = collector.program
+    part_ir = program.get_partitioning()
+    for tensor in program.get_equation().get_tensors():
+        tensor.update_ranks(
+            part_ir.partition_ranks(
+                tensor.get_ranks(),
+                part_ir.get_all_parts(),
+                True,
+                True))
+        program.get_loop_order().apply(tensor)
+
+    hifiber = "if (m1, k1) not in eager_a_m0_read:\n" + \
+        "    eager_a_m0_read.add((m1, k1))\n" + \
+        "    a_m0.trace(\"eager_a_m0_read\")"
+    assert collector.trace_tree("A", "M0", True).gen(0) == hifiber
+
+    hifiber = "z_m0.trace(\"eager_z_m0_write\", iteration_num=n0_iter_num)"
+    assert collector.trace_tree("Z", "M0", False).gen(0) == hifiber
diff --git a/tests/trans/test_equation.py b/tests/trans/test_equation.py
index 7be9cfe..d6c1774 100644
--- a/tests/trans/test_equation.py
+++ b/tests/trans/test_equation.py
@@ -1,10 +1,11 @@
 import pytest
 from sympy import symbols
 
+from teaal.ir.hardware import Hardware
 from teaal.ir.iter_graph import IterationGraph
+from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
-from teaal.parse.einsum import Einsum
-from teaal.parse.mapping import Mapping
+from teaal.parse import *
 from teaal.trans.equation import Equation
 from tests.utils.parse_tree import make_plus
 
@@ -25,7 +26,7 @@ def make_basic():
     program = Program(einsum, mapping)
     program.add_einsum(0)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_output():
@@ -44,7 +45,7 @@ def make_output():
     program = Program(einsum, mapping)
     program.add_einsum(0)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_mult_terms():
@@ -65,7 +66,7 @@ def make_mult_terms():
     program = Program(einsum, mapping)
     program.add_einsum(0)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_take():
@@ -83,7 +84,7 @@ def make_take():
     program = Program(einsum, mapping)
     program.add_einsum(0)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_other(einsum, mapping):
@@ -130,7 +131,7 @@ def make_display(style, opt):
     program = Program(einsum, mapping)
     program.add_einsum(0)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_matmul(mapping):
@@ -155,7 +156,7 @@ def make_matmul(mapping):
         program.apply_all_partitioning(tensor)
         program.get_loop_order().apply(tensor)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_conv(expr, loop_order):
@@ -181,7 +182,7 @@ def make_conv(expr, loop_order):
         program.apply_all_partitioning(tensor)
         program.get_loop_order().apply(tensor)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
 
 
 def make_conv_part(expr, loop_order):
@@ -211,7 +212,24 @@ def make_conv_part(expr, loop_order):
         program.apply_all_partitioning(tensor)
         program.get_loop_order().apply(tensor)
 
-    return IterationGraph(program), Equation(program)
+    return IterationGraph(program), Equation(program, None)
+
+
+def make_gamma():
+    fname = "tests/integration/gamma.yaml"
+    einsum = Einsum.from_file(fname)
+    mapping = Mapping.from_file(fname)
+    arch = Architecture.from_file(fname)
+    bindings = Bindings.from_file(fname)
+    format_ = Format.from_file(fname)
+
+    program = Program(einsum, mapping)
+    hardware = Hardware(arch, bindings, program)
+
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+
+    return IterationGraph(program), Equation(program, metrics)
 
 
 def test_eager_inputs_one_fiber():
@@ -332,6 +350,15 @@ def test_make_iter_expr_display_slip():
     assert eqn.make_iter_expr(rank, tensors).gen() == iter_expr
 
 
+def test_make_iter_expr_leader_follower():
+    graph, eqn = make_gamma()
+
+    graph.pop_concord()
+    iter_expr = "t_k << Fiber.intersection(a_k, b_k, style=\"leader-follower\")"
+
+    assert eqn.make_iter_expr(*graph.peek_concord()).gen() == iter_expr
+
+
 def test_flattened_output_only_bad():
     mapping = """
         partitioning:
@@ -352,7 +379,7 @@ def test_flattened_output_only_bad():
 def test_make_iter_expr_output_only():
     program = make_other("A[i] = b", "")
     graph = IterationGraph(program)
-    eqn = Equation(program)
+    eqn = Equation(program, None)
 
     rank, tensors = graph.peek_concord()
     iter_expr = "a_i.iterRangeShapeRef(0, I, 1)"
@@ -370,7 +397,7 @@ def test_make_iter_expr_output_only_display():
     program = make_other("A[i] = b", mapping)
 
     graph = IterationGraph(program)
-    eqn = Equation(program)
+    eqn = Equation(program, None)
 
     rank, tensors = graph.peek_concord()
     iter_expr = "enumerate(a_i.iterRangeShapeRef(0, I, 1))"
@@ -387,7 +414,7 @@ def test_make_iter_expr_output_only_partition():
     program = make_other("A[i] = b", mapping)
 
     graph = IterationGraph(program)
-    eqn = Equation(program)
+    eqn = Equation(program, None)
 
     rank, tensors = graph.peek_concord()
     iter_expr = "a_i2.iterRangeShapeRef(0, I, I1)"
@@ -472,6 +499,13 @@ def test_make_iter_expr_conv_part():
     assert eqn.make_iter_expr(*graph.peek_concord()).gen() == hifiber
 
 
+def test_make_iter_expr_metrics():
+    graph, eqn = make_gamma()
+    hifiber = "t_m << a_m"
+
+    assert eqn.make_iter_expr(*graph.peek_concord()).gen() == hifiber
+
+
 def test_make_payload_no_tensors():
     _, eqn = make_basic()
     with pytest.raises(ValueError) as excinfo:
@@ -547,7 +581,7 @@ def test_make_payload_display_slip():
 def test_make_payload_output_only():
     program = make_other("A[i] = b", "")
     graph = IterationGraph(program)
-    eqn = Equation(program)
+    eqn = Equation(program, None)
 
     rank, tensors = graph.pop_concord()
     iter_expr = "i, a_ref"
@@ -585,6 +619,13 @@ def test_make_payload_conv_enum():
     assert eqn.make_payload(*graph.pop_concord()).gen(parens=False) == hifiber
 
 
+def test_make_payload_metrics():
+    graph, eqn = make_gamma()
+    hifiber = "m, (t_k, a_k)"
+
+    assert eqn.make_payload(*graph.pop_concord()).gen(parens=False) == hifiber
+
+
 def test_make_update():
     _, eqn = make_basic()
     stmt = "a_ref += b_val * c_val * d_val"
@@ -593,14 +634,14 @@ def test_make_update():
 
 def test_make_update_vars():
     program = make_other("A[i] = b * c * d", "")
-    eqn = Equation(program)
+    eqn = Equation(program, None)
     stmt = "a_ref += b * c * d"
     assert eqn.make_update().gen(depth=0) == stmt
 
 
 def test_make_update_mult_terms():
     program = make_other("A[i] = b * B[i] + c * C[i] + d * D[i]", "")
-    eqn = Equation(program)
+    eqn = Equation(program, None)
     stmt = "a_ref += b * b_val + c * c_val + d * d_val"
     assert eqn.make_update().gen(depth=0) == stmt
 
@@ -609,17 +650,3 @@ def test_make_update_take():
     _, eqn = make_take()
     stmt = "z_ref += b"
     assert eqn.make_update().gen(depth=0) == stmt
-
-
-def test_iter_fiber_not_fiber():
-    expr = "O[p, q] = I[q + s] * F[s]"
-    graph, eqn = make_conv(expr, "[P, W, Q]")
-    graph.pop_concord()
-    graph.pop_concord()
-    graph.pop_concord()
-    _, tensors = graph.peek_concord()
-
-    with pytest.raises(ValueError) as excinfo:
-        eqn._Equation__iter_fiber(None, tensors[0])
-
-    assert str(excinfo.value) == "Cannot iterate over payload o_ref"
diff --git a/tests/trans/test_footer.py b/tests/trans/test_footer.py
index d507922..1ceb22f 100644
--- a/tests/trans/test_footer.py
+++ b/tests/trans/test_footer.py
@@ -28,7 +28,7 @@ def assert_make_footer(loop_order, partitioning, display, hifiber_options):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    graphics = Graphics(program)
+    graphics = Graphics(program, None)
     graphics.make_header()
 
     for tensor in program.get_equation().get_tensors():
@@ -37,7 +37,7 @@ def assert_make_footer(loop_order, partitioning, display, hifiber_options):
 
     hifiber = Footer.make_footer(
         program, graphics, Partitioner(
-            program, TransUtils())).gen(
+            program, TransUtils(program))).gen(
         depth=0)
     assert hifiber in hifiber_options
 
diff --git a/tests/trans/test_graphics.py b/tests/trans/test_graphics.py
index f40385a..04fdbbd 100644
--- a/tests/trans/test_graphics.py
+++ b/tests/trans/test_graphics.py
@@ -1,6 +1,7 @@
+from teaal.ir.hardware import Hardware
+from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
-from teaal.parse.einsum import Einsum
-from teaal.parse.mapping import Mapping
+from teaal.parse import *
 from teaal.trans.graphics import Graphics
 
 
@@ -16,7 +17,7 @@ def create_default():
     """
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
-    return Graphics(program)
+    return Graphics(program, None)
 
 
 def create_spacetime(opt):
@@ -38,7 +39,24 @@ def create_spacetime(opt):
                 opt: """ + opt
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
-    return Graphics(program)
+    return Graphics(program, None)
+
+
+def create_gamma():
+    fname = "tests/integration/gamma.yaml"
+    einsum = Einsum.from_file(fname)
+    mapping = Mapping.from_file(fname)
+    arch = Architecture.from_file(fname)
+    bindings = Bindings.from_file(fname)
+    format_ = Format.from_file(fname)
+
+    program = Program(einsum, mapping)
+    hardware = Hardware(arch, bindings, program)
+
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+
+    return Graphics(program, metrics)
 
 
 def test_make_body_none():
@@ -64,6 +82,11 @@ def test_make_body_slip():
     assert graphics.make_body().gen(0) == hifiber
 
 
+def test_make_body_metrics():
+    graphics = create_gamma()
+    assert graphics.make_body().gen(0) == ""
+
+
 def test_make_footer_none():
     graphics = create_default()
     assert graphics.make_footer().gen(0) == ""
@@ -76,6 +99,11 @@ def test_make_footer():
     assert graphics.make_footer().gen(0) == hifiber
 
 
+def test_make_footer_metrics():
+    graphics = create_gamma()
+    assert graphics.make_footer().gen(0) == ""
+
+
 def test_make_header_none():
     graphics = create_default()
     assert graphics.make_header().gen(0) == ""
@@ -92,3 +120,8 @@ def test_make_header_slip():
     hifiber = "canvas = createCanvas(A_KM, B_KN, Z_MN)\n" + \
         "timestamps = {}"
     assert graphics.make_header().gen(0) == hifiber
+
+
+def test_make_header_metrics():
+    graphics = create_gamma()
+    assert graphics.make_header().gen(0) == ""
diff --git a/tests/trans/test_header.py b/tests/trans/test_header.py
index a1ca39a..6c6da39 100644
--- a/tests/trans/test_header.py
+++ b/tests/trans/test_header.py
@@ -1,10 +1,11 @@
 import pytest
 
+from teaal.ir.hardware import Hardware
 from teaal.ir.iter_graph import IterationGraph
+from teaal.ir.metrics import Metrics
 from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
-from teaal.parse.einsum import Einsum
-from teaal.parse.mapping import Mapping
+from teaal.parse import *
 from teaal.trans.header import Header
 from teaal.trans.partitioner import Partitioner
 from teaal.trans.utils import TransUtils
@@ -26,7 +27,7 @@ def build_header(exprs, mapping):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    header = Header(program, Partitioner(program, TransUtils()))
+    header = Header(program, None, Partitioner(program, TransUtils(program)))
 
     return header
 
@@ -47,11 +48,34 @@ def build_header_conv(loop_order):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    header = Header(program, Partitioner(program, TransUtils()))
+    header = Header(program, None, Partitioner(program, TransUtils(program)))
 
     return header
 
 
+def build_header_gamma():
+    fname = "tests/integration/gamma.yaml"
+    einsum = Einsum.from_file(fname)
+    mapping = Mapping.from_file(fname)
+    arch = Architecture.from_file(fname)
+    bindings = Bindings.from_file(fname)
+    format_ = Format.from_file(fname)
+
+    program = Program(einsum, mapping)
+    hardware = Hardware(arch, bindings, program)
+
+    program.add_einsum(0)
+    metrics = Metrics(program, hardware, format_)
+
+    header = Header(
+        program,
+        metrics,
+        Partitioner(
+            program,
+            TransUtils(program)))
+    return header
+
+
 def build_matmul_header(mapping):
     exprs = """
             - Z[m, n] = A[k, m] * B[k, n]
@@ -60,18 +84,28 @@ def build_matmul_header(mapping):
 
 
 def test_make_get_payload():
-    hifiber = "a_val = a_m.getPayload(m, k)"
-
+    header = build_matmul_header("")
     tensor = Tensor("A", ["M", "K"])
-    assert Header.make_get_payload(tensor, ["M", "K"]).gen(0) == hifiber
 
+    hifiber = "a_val = a_m.getPayload(m, k)"
+    assert header.make_get_payload(tensor, ["M", "K"]).gen(0) == hifiber
 
-def test_make_get_payload_output():
-    hifiber = "z_n = z_m.getPayloadRef(m)"
 
+def test_make_get_payload_output():
+    header = build_matmul_header("")
     tensor = Tensor("Z", ["M", "N"])
     tensor.set_is_output(True)
-    assert Header.make_get_payload(tensor, ["M"]).gen(0) == hifiber
+
+    hifiber = "z_n = z_m.getPayloadRef(m)"
+    assert header.make_get_payload(tensor, ["M"]).gen(0) == hifiber
+
+
+def test_make_get_payload_metrics():
+    header = build_header_gamma()
+    tensor = Tensor("A", ["M", "K"])
+
+    hifiber = "a_k = a_m.getPayload(m, trace=\"get_payload_A\")"
+    assert header.make_get_payload(tensor, ["M"]).gen(0) == hifiber
 
 
 def test_make_get_root():
@@ -137,11 +171,18 @@ def test_make_output_conv_shape():
     assert header.make_output().gen(0) == hifiber
 
 
+def test_make_output_metrics_shape():
+    hifiber = "T_MKN = Tensor(rank_ids=[\"M\", \"K\", \"N\"], shape=[M, K, N])"
+    header = build_header_gamma()
+
+    assert header.make_output().gen(0) == hifiber
+
+
 def test_make_swizzle_bad():
     header = build_matmul_header("")
     tensor = Tensor("A", ["K", "M"])
     with pytest.raises(ValueError) as excinfo:
-        header.make_swizzle(tensor, "foo")
+        header.make_swizzle(tensor, [], "foo")
 
     assert str(
         excinfo.value) == "Unknown swizzling reason: foo"
@@ -152,7 +193,22 @@ def test_make_swizzle_loop_order():
 
     header = build_matmul_header("")
     tensor = Tensor("A", ["K", "M"])
-    assert header.make_swizzle(tensor, "loop-order").gen(depth=0) == hifiber
+    assert header.make_swizzle(
+        tensor, ["M", "K"], "loop-order").gen(depth=0) == hifiber
+
+
+def test_make_swizzle_none():
+    hifiber = ""
+
+    mapping = """
+      rank-order:
+        A: [M, K]
+    """
+
+    header = build_matmul_header(mapping)
+    tensor = Tensor("A", ["M", "K"])
+    assert header.make_swizzle(
+        tensor, ["M", "K"], "loop-order").gen(depth=0) == hifiber
 
 
 def test_make_swizzle_partitioning():
@@ -168,7 +224,21 @@ def test_make_swizzle_partitioning():
 
     header = build_matmul_header(mapping)
     tensor = Tensor("A", ["K1", "K0", "M"])
-    assert header.make_swizzle(tensor, "partitioning").gen(depth=0) == hifiber
+    assert header.make_swizzle(
+        tensor, [
+            "M", "K0"], "partitioning").gen(
+        depth=0) == hifiber
+
+
+def test_make_swizzle_metrics():
+    hifiber = "A_KM = A_MK.swizzleRanks(rank_ids=[\"K\", \"M\"])"
+
+    header = build_matmul_header("")
+    tensor = Tensor("A", ["M", "K"])
+    assert header.make_swizzle(
+        tensor, [
+            "K", "M"], "metrics").gen(
+        depth=0) == hifiber
 
 
 def test_make_tensor_from_fiber():
diff --git a/tests/trans/test_hifiber.py b/tests/trans/test_hifiber.py
index 6d7ad94..5c52e59 100644
--- a/tests/trans/test_hifiber.py
+++ b/tests/trans/test_hifiber.py
@@ -571,58 +571,193 @@ def test_hifiber_dyn_flattening():
     assert str(HiFiber(einsum, mapping)) == hifiber
 
 
-def test_hifiber_hardware():
-    fname = "tests/integration/gamma.yaml"
-    einsum = Einsum.from_file(fname)
-    mapping = Mapping.from_file(fname)
-    arch = Architecture.from_file(fname)
-    bindings = Bindings.from_file(fname)
-    format_ = Format.from_file(fname)
+def test_hifiber_traffic():
+    yaml = """
+    einsum:
+      declaration:
+        A: [K, M]
+        B: [K, M]
+        C: [K]
+        Z: [M]
+      expressions:
+      - Z[m] = A[k, m] * B[k, m] * C[k]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [M, K]
+    architecture:
+      accel:
+      - name: level0
+        attributes:
+          clock_frequency: 2048
+        local:
+        - name: DRAM
+          class: DRAM
+          attributes:
+            bandwidth: 512
+        subtree:
+        - name: level1
+          local:
+          - name: L2Cache
+            class: Cache
+            attributes:
+              width: 64
+              depth: 1024
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: DRAM
+        bindings:
+        - tensor: Z
+          rank: M
+          type: elem
+          format: default
+      - component: L2Cache
+        bindings:
+        - tensor: Z
+          rank: M
+          type: elem
+          format: default
+    format:
+      Z:
+        default:
+          rank-order: [M]
+          M:
+            format: C
+            cbits: 32
+            pbits: 64
+    """
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    format_ = Format.from_str(yaml)
 
-    hifiber = "T_MKN = Tensor(rank_ids=[\"M\", \"K\", \"N\"])\n" + \
-        "t_m = T_MKN.getRoot()\n" + \
+    hifiber = "Z_M = Tensor(rank_ids=[\"M\"], shape=[M])\n" + \
+        "A_MK = A_KM.swizzleRanks(rank_ids=[\"M\", \"K\"])\n" + \
+        "B_MK = B_KM.swizzleRanks(rank_ids=[\"M\", \"K\"])\n" + \
+        "z_m = Z_M.getRoot()\n" + \
         "a_m = A_MK.getRoot()\n" + \
-        "b_k = B_KN.getRoot()\n" + \
-        "B_KN.setCollecting(\"K\", True)\n" + \
-        "Metrics.beginCollect([\"M\", \"K\", \"N\"])\n" + \
-        "for m, (t_k, a_k) in t_m << a_m:\n" + \
-        "    for k, (t_n, (a_val, b_n)) in t_k << (a_k & b_k):\n" + \
-        "        for n, (t_ref, b_val) in t_n << b_n:\n" + \
-        "            t_ref += b_val\n" + \
+        "b_m = B_MK.getRoot()\n" + \
+        "c_k = C_K.getRoot()\n" + \
+        "Metrics.beginCollect(\"tmp/Z\")\n" + \
+        "Metrics.trace(\"M\", type_=\"populate_read_0\", consumable=False)\n" + \
+        "Metrics.trace(\"M\", type_=\"populate_write_0\", consumable=False)\n" + \
+        "for m, (z_ref, (a_k, b_k)) in z_m << (a_m & b_m):\n" + \
+        "    for k, (a_val, (b_val, c_val)) in a_k & (b_k & c_k):\n" + \
+        "        z_ref += a_val * b_val * c_val\n" + \
         "Metrics.endCollect()\n" + \
         "metrics = {}\n" + \
-        "metrics[\"T\"] = {}\n" + \
-        "metrics[\"T\"][\"T footprint\"] = 0\n" + \
-        "metrics[\"T\"][\"T traffic\"] = 0\n" + \
-        "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"T\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \
-        "metrics[\"T\"][\"A traffic\"] = metrics[\"T\"][\"A footprint\"]\n" + \
-        "B_KN_format = Format(B_KN, {\"K\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"T\"][\"B footprint\"] = B_KN_format.getTensor()\n" + \
-        "metrics[\"T\"][\"B traffic\"] = Traffic.cacheTraffic(B_KN, \"K\", B_KN_format, 25165824) + B_KN_format.getRank(\"K\")\n" + \
-        "metrics[\"T\"][\"K intersections\"] = Compute.lfCount(Metrics.dump(), \"K\", 0)\n" + \
-        "Z_MN = Tensor(rank_ids=[\"M\", \"N\"])\n" + \
-        "T_MNK = T_MKN.swizzleRanks(rank_ids=[\"M\", \"N\", \"K\"])\n" + \
-        "z_m = Z_MN.getRoot()\n" + \
-        "t_m = T_MNK.getRoot()\n" + \
-        "a_m = A_MK.getRoot()\n" + \
-        "Metrics.beginCollect([\"M\", \"N\", \"K\"])\n" + \
-        "for m, (z_n, (t_n, a_k)) in z_m << (t_m & a_m):\n" + \
-        "    for n, (z_ref, t_k) in z_n << t_n:\n" + \
-        "        for k, (t_val, a_val) in t_k & a_k:\n" + \
-        "            z_ref += t_val * a_val\n" + \
+        "metrics[\"Z\"] = {}\n" + \
+        "formats = {\"Z\": Format(Z_M, {\"rank-order\": [\"M\"], \"M\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})}\n" + \
+        "bindings = [{\"tensor\": \"Z\", \"rank\": \"M\", \"type\": \"elem\", \"format\": \"default\"}]\n" + \
+        "traces = {(\"Z\", \"M\", \"elem\", \"read\"): \"tmp/Z-M-populate_read_0.csv\", (\"Z\", \"M\", \"elem\", \"write\"): \"tmp/Z-M-populate_write_0.csv\"}\n" + \
+        "traffic = Traffic.cacheTraffic(bindings, formats, traces, 65536, 64)\n" + \
+        "metrics[\"Z\"][\"DRAM\"] = {}\n" + \
+        "metrics[\"Z\"][\"DRAM\"][\"Z\"] = {}\n" + \
+        "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"read\"] = 0\n" + \
+        "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"write\"] = 0\n" + \
+        "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"read\"] += traffic[0][\"Z\"][\"read\"]\n" + \
+        "metrics[\"Z\"][\"DRAM\"][\"Z\"][\"write\"] += traffic[0][\"Z\"][\"write\"]\n" + \
+        "metrics[\"Z\"][\"DRAM\"][\"time\"] = (metrics[\"Z\"][\"DRAM\"][\"Z\"][\"read\"] + metrics[\"Z\"][\"DRAM\"][\"Z\"][\"write\"]) / 512\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = metrics[\"Z\"][\"DRAM\"][\"time\"]"
+
+    assert str(HiFiber(einsum, mapping, arch, bindings, format_)) == hifiber
+
+
+def test_hifiber_intersect():
+    yaml = """
+    einsum:
+      declaration:
+        Z: []
+        A: [I, J, K]
+        B: [I, J, K]
+      expressions:
+      - Z[] = A[i, j, k] * B[i, j, k]
+    mapping:
+      spacetime:
+        Z:
+          space: []
+          time: [I, J, K]
+    architecture:
+      accel:
+      - name: level0
+        attributes:
+          clock_frequency: 2048
+        local:
+        - name: TF
+          class: Intersector
+          attributes:
+            type: two-finger
+    bindings:
+      Z:
+      - config: accel
+        prefix: tmp/Z
+      - component: TF
+        bindings:
+        - rank: K
+    # TODO: Allow the format to be empty
+    format:
+      Z:
+        default:
+          rank-order: []
+    """
+    einsum = Einsum.from_str(yaml)
+    mapping = Mapping.from_str(yaml)
+    arch = Architecture.from_str(yaml)
+    bindings = Bindings.from_str(yaml)
+    format_ = Format.from_str(yaml)
+
+    hifiber = "Z_ = Tensor(rank_ids=[], shape=[])\n" + \
+        "z_ref = Z_.getRoot()\n" + \
+        "a_i = A_IJK.getRoot()\n" + \
+        "b_i = B_IJK.getRoot()\n" + \
+        "Metrics.beginCollect(\"tmp/Z\")\n" + \
+        "TF_K = TwoFingerIntersector()\n" + \
+        "Metrics.trace(\"K\", type_=\"intersect_0\", consumable=True)\n" + \
+        "Metrics.trace(\"K\", type_=\"intersect_1\", consumable=True)\n" + \
+        "for i, (a_j, b_j) in a_i & b_i:\n" + \
+        "    for j, (a_k, b_k) in a_j & b_j:\n" + \
+        "        for k, (a_val, b_val) in a_k & b_k:\n" + \
+        "            z_ref += a_val * b_val\n" + \
+        "        TF_K.addTraces(Metrics.consumeTrace(\"K\", \"intersect_0\"), Metrics.consumeTrace(\"K\", \"intersect_1\"))\n" + \
         "Metrics.endCollect()\n" + \
+        "metrics = {}\n" + \
         "metrics[\"Z\"] = {}\n" + \
-        "Z_MN_format = Format(Z_MN, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"N\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"Z\"][\"Z footprint\"] = Z_MN_format.getTensor()\n" + \
-        "metrics[\"Z\"][\"Z traffic\"] = metrics[\"Z\"][\"Z footprint\"]\n" + \
-        "metrics[\"Z\"][\"T footprint\"] = 0\n" + \
-        "metrics[\"Z\"][\"T traffic\"] = 0\n" + \
-        "A_MK_format = Format(A_MK, {\"M\": {\"format\": \"U\", \"rhbits\": 32, \"pbits\": 32}, \"K\": {\"format\": \"C\", \"cbits\": 32, \"pbits\": 64}})\n" + \
-        "metrics[\"Z\"][\"A footprint\"] = A_MK_format.getTensor()\n" + \
-        "metrics[\"Z\"][\"A traffic\"] = metrics[\"Z\"][\"A footprint\"]\n" + \
-        "metrics[\"Z\"][\"mul\"] = Compute.opCount(Metrics.dump(), \"mul\")\n" + \
-        "metrics[\"Z\"][\"add\"] = Compute.opCount(Metrics.dump(), \"add\")\n" + \
-        "metrics[\"Z\"][\"T_MKN merge ops\"] = Compute.swapCount(T_MKN, 1, 64, 1)"
+        "formats = {}\n" + \
+        "metrics[\"Z\"][\"TF\"] = 0\n" + \
+        "metrics[\"Z\"][\"TF\"] += TF_K.getNumIntersects()\n" + \
+        "metrics[\"Z\"][\"TF\"][\"time\"] = metrics[\"Z\"][\"TF\"] / 2048\n" + \
+        "metrics[\"blocks\"] = [[\"Z\"]]\n" + \
+        "metrics[\"time\"] = metrics[\"Z\"][\"TF\"][\"time\"]"
 
     assert str(HiFiber(einsum, mapping, arch, bindings, format_)) == hifiber
+
+
+def test_hifiber_gamma_no_errors():
+    # There is too much variation in the Gamma spec to test if the HiFiber
+    # remains unchanged
+    fname = "tests/integration/gamma.yaml"
+    einsum = Einsum.from_file(fname)
+    mapping = Mapping.from_file(fname)
+    arch = Architecture.from_file(fname)
+    bindings = Bindings.from_file(fname)
+    format_ = Format.from_file(fname)
+
+    print(HiFiber(einsum, mapping, arch, bindings, format_))
+
+
+def test_hifiber_extensor_no_errors():
+    # There is too much variation in the ExTensor spec to test if the HiFiber
+    # remains unchanged
+    fname = "tests/integration/extensor.yaml"
+    einsum = Einsum.from_file(fname)
+    mapping = Mapping.from_file(fname)
+    arch = Architecture.from_file(fname)
+    bindings = Bindings.from_file(fname)
+    format_ = Format.from_file(fname)
+
+    print(HiFiber(einsum, mapping, arch, bindings, format_))
diff --git a/tests/trans/test_partitioner.py b/tests/trans/test_partitioner.py
index bf928b9..9e48f01 100644
--- a/tests/trans/test_partitioner.py
+++ b/tests/trans/test_partitioner.py
@@ -29,7 +29,7 @@ def build_partitioner(parts):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    partitioner = Partitioner(program, TransUtils())
+    partitioner = Partitioner(program, TransUtils(program))
     return program, partitioner
 
 
@@ -51,7 +51,7 @@ def build_partitioner_conv(expr, parts):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    partitioner = Partitioner(program, TransUtils())
+    partitioner = Partitioner(program, TransUtils(program))
     return program, partitioner
 
 
@@ -70,7 +70,7 @@ def build_partitioner_copy(parts):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    partitioner = Partitioner(program, TransUtils())
+    partitioner = Partitioner(program, TransUtils(program))
     return program, partitioner
 
 
@@ -90,7 +90,7 @@ def build_partitioner_math_no_halo(parts):
     program = Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
     program.add_einsum(0)
 
-    partitioner = Partitioner(program, TransUtils())
+    partitioner = Partitioner(program, TransUtils(program))
     return program, partitioner
 
 
@@ -566,7 +566,7 @@ def assert_unpartition(spec, hifiber_options):
     for tensor in program.get_equation().get_tensors():
         program.apply_all_partitioning(tensor)
 
-    partitioner = Partitioner(program, TransUtils())
+    partitioner = Partitioner(program, TransUtils(program))
     hifiber = partitioner.unpartition(
         program.get_equation().get_output()).gen(0)
 
@@ -643,7 +643,7 @@ def test_unpartition_flatten():
 
     program.apply_all_partitioning(program.get_equation().get_output())
 
-    partitioner = Partitioner(program, TransUtils())
+    partitioner = Partitioner(program, TransUtils(program))
     hifiber = partitioner.unpartition(
         program.get_equation().get_output()).gen(0)
     corr = "tmp0 = Z_M1NM01NM00\n" + \
diff --git a/tests/trans/test_utils.py b/tests/trans/test_utils.py
index d0f4989..6b13e68 100644
--- a/tests/trans/test_utils.py
+++ b/tests/trans/test_utils.py
@@ -1,10 +1,25 @@
 import pytest
 
 from teaal.hifiber import *
+from teaal.ir.program import Program
 from teaal.ir.tensor import Tensor
+from teaal.parse import *
 from teaal.trans.utils import TransUtils
 
 
+def make_program():
+    yaml = """
+    einsum:
+      declaration:
+        A: [K, M]
+        B: [K, N]
+        Z: [M, N]
+      expressions:
+        - Z[m, n] = A[k, m] * B[k, n]
+    """
+    return Program(Einsum.from_str(yaml), Mapping.from_str(yaml))
+
+
 def test_build_expr_bad():
     with pytest.raises(ValueError) as excinfo:
         TransUtils.build_expr(range(5))
@@ -18,6 +33,7 @@ def test_build_expr():
     assert TransUtils.build_expr("foo").gen() == "\"foo\""
     assert TransUtils.build_expr([1, 2, 3, 4]).gen() == "[1, 2, 3, 4]"
     assert TransUtils.build_expr({1: 2, 3: 4}).gen() == "{1: 2, 3: 4}"
+    assert TransUtils.build_expr((1, 2, 3, 4)).gen() == "(1, 2, 3, 4)"
 
 
 def test_build_rank_ids():
@@ -33,8 +49,7 @@ def test_build_set_rank_ids():
 
 
 def test_build_shape():
-    tensor = Tensor("A", ["I", "J"])
-    assert TransUtils.build_shape(tensor).gen() == "shape=[I, J]"
+    assert TransUtils.build_shape(["I", "J"]).gen() == "shape=[I, J]"
 
 
 def test_build_swizzle():
@@ -44,14 +59,16 @@ def test_build_swizzle():
 
 
 def test_next_tmp():
-    utils = TransUtils()
+    program = make_program()
+    utils = TransUtils(program)
     assert utils.next_tmp() == "tmp0"
     assert utils.next_tmp() == "tmp1"
     assert utils.next_tmp() == "tmp2"
 
 
 def test_curr_tmp():
-    utils = TransUtils()
+    program = make_program()
+    utils = TransUtils(program)
 
     with pytest.raises(ValueError) as excinfo:
         utils.curr_tmp()