mlcommons · venkataramaraoguttikonda · Feb 22, 2024 · Sep 15, 2024
@@ -106,7 +106,7 @@ class PyTorch2ChakraConverter:
         parent_to_children_map (Dict[int, List[int]]): Map of Chakra parent node
                                                        IDs to their child node
                                                        IDs. Used to simulate
-                                                       execution based on data
+                                                       execution based on control
                                                        dependencies.
     """
 
@@ -187,7 +187,7 @@ def convert(self) -> None:
 
         root_nodes = [node for node in self.chakra_nodes.values() if self.is_root_node(node)]
         for root_node in root_nodes:
-            self.convert_ctrl_dep_to_data_dep(root_node)
+            self.convert_parent_child_relationship_to_ctrl_dep(root_node)
 
         self.remove_dangling_nodes()
 
@@ -488,8 +488,6 @@ def convert_to_chakra_node(self, pytorch_node: PyTorchNode) -> ChakraNode:
         chakra_node.id = pytorch_node.id
         chakra_node.name = pytorch_node.name
         chakra_node.type = self.get_chakra_node_type_from_pytorch_node(pytorch_node)
-        if pytorch_node.parent in self.chakra_nodes:
-            chakra_node.ctrl_deps.append(pytorch_node.parent)
         chakra_node.duration_micros = pytorch_node.exclusive_dur
         chakra_node.inputs.values = str(pytorch_node.inputs)
         chakra_node.inputs.shapes = str(pytorch_node.input_shapes)
@@ -499,6 +497,7 @@ def convert_to_chakra_node(self, pytorch_node: PyTorchNode) -> ChakraNode:
         chakra_node.outputs.types = str(pytorch_node.output_types)
         chakra_node.attr.extend([
             ChakraAttr(name="rf_id", int64_val=pytorch_node.rf_id),
+            ChakraAttr(name="parent", int64_val=pytorch_node.parent),
             ChakraAttr(name="fw_parent", int64_val=pytorch_node.fw_parent),
             ChakraAttr(name="seq_id", int64_val=pytorch_node.seq_id),
             ChakraAttr(name="scope", int64_val=pytorch_node.scope),
@@ -580,33 +579,32 @@ def is_root_node(self, node):
                          "[pytorch|profiler|execution_trace|thread]"]:
             return True
 
-    def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:
-        """
-        Traverses nodes based on control dependencies (parent nodes) and encodes
-        data dependencies appropriately. This method is crucial for converting the
-        dependency structure from PyTorch execution traces to Chakra execution
-        traces. In PyTorch traces, control dependencies are represented by a
-        parent field in each node, denoting the parent node ID. This structure
-        indicates which functions (operators) are called by a particular operator.
-
-        In contrast, Chakra execution traces, while retaining control dependencies
-        for compatibility, primarily rely on data dependencies to represent
-        relationships between nodes. Data dependencies in Chakra are more broadly
-        defined compared to those in PyTorch, where they are implicitly encoded in
-        tensor input-output relationships. In Chakra, data dependencies are explicit
-        and represent a general dependency between nodes.
-
-        To convert PyTorch's control dependencies to Chakra's data dependencies, a
-        Depth-First Search (DFS) is performed. The DFS traversal starts from a given
-        Chakra node, traversing through its children (based on control
-        dependencies). During traversal, data dependencies are encoded by linking
-        nodes that have been visited in sequence. These dependencies form a chain,
-        mirroring the function call order from the PyTorch trace.
+    def convert_parent_child_relationship_to_ctrl_dep(self, chakra_node: ChakraNode) -> None:
+        """
+        Traverses nodes based on parent-child relationships in PyTorch execution
+        traces and encodes control dependencies appropriately. This method is
+        crucial for converting the dependency structure from PyTorch execution
+        traces to Chakra execution traces. In PyTorch traces, control dependencies
+        are represented by a parent field in each node, denoting the parent node ID.
+        This structure indicates which functions (operators) are called by a particular
+        operator.
+
+        In contrast, Chakra execution traces, while retaining parent-child
+        relationships for compatibility, primarily rely on control dependencies
+        to represent relationships between nodes. In Chakra, control dependencies
+        are explicit and represent a general dependency between nodes.
+
+        To convert PyTorch's parent-child relationships to Chakra's control dependencies,
+        a Depth-First Search (DFS) is performed. The DFS traversal starts from a given
+        Chakra node, traversing through its children (based on parent-child relationships).
+        During traversal, control dependencies are encoded by linking nodes that
+        have been visited in sequence. These dependencies form a chain, mirroring
+        the function call order from the PyTorch trace.
 
         Special attention is given to the types of nodes involved. CPU and label
         nodes (non-GPU) in PyTorch can only depend on other CPU or label nodes.
         However, GPU nodes can depend on any type of node. Thus, while traversing,
-        if a GPU node is encountered, it can establish a data dependency with the
+        if a GPU node is encountered, it can establish a control dependency with the
         last visited node of any type. For CPU and label nodes, the dependency is
         only established with the last visited non-GPU node. This distinction
         ensures that the converted dependencies accurately reflect the execution
@@ -650,38 +648,38 @@ def convert_ctrl_dep_to_data_dep(self, chakra_node: ChakraNode) -> None:
 
             if node_op_type == PyTorchNodeType.GPU_OP:
                 if last_visited_any:
-                    if last_visited_any.id not in current_node.data_deps:
-                        current_node.data_deps.append(last_visited_any.id)
+                    if last_visited_any.id not in current_node.ctrl_deps:
+                        current_node.ctrl_deps.append(last_visited_any.id)
                         self.logger.debug(
-                            f"GPU Node ID {current_node.id} now has a data "
+                            f"GPU Node ID {current_node.id} now has a control "
                             f"dependency on Node ID {last_visited_any.id}"
                         )
 
                 stream_id = pytorch_node.stream
                 if stream_id in last_gpu_in_stream:
-                    if last_gpu_in_stream[stream_id].id not in current_node.data_deps:
-                        current_node.data_deps.append(last_gpu_in_stream[stream_id].id)
+                    if last_gpu_in_stream[stream_id].id not in current_node.ctrl_deps:
+                        current_node.ctrl_deps.append(last_gpu_in_stream[stream_id].id)
                         self.logger.debug(
-                            f"GPU Node ID {current_node.id} in stream {stream_id} now has a data "
+                            f"GPU Node ID {current_node.id} in stream {stream_id} now has a control "
                             f"dependency on GPU Node ID {last_gpu_in_stream[stream_id].id} in the same stream."
                         )
                 last_gpu_in_stream[stream_id] = current_node
                 last_visited_any = current_node
             else:
                 if pytorch_node.inter_thread_dep:
                     for id in self.id_assigner.get_assigned_ids(pytorch_node.inter_thread_dep):
-                        if id not in current_node.data_deps:
-                            current_node.data_deps.append(id)
+                        if id not in current_node.ctrl_deps:
+                            current_node.ctrl_deps.append(id)
                             self.logger.debug(
-                                f"CPU Node ID {current_node.id} now has an inter-thread data "
+                                f"CPU Node ID {current_node.id} now has an inter-thread control "
                                 f"dependency on Node ID {id}"
                             )
 
                 if last_visited_non_gpu:
-                    if last_visited_non_gpu.id not in current_node.data_deps:
-                        current_node.data_deps.append(last_visited_non_gpu.id)
+                    if last_visited_non_gpu.id not in current_node.ctrl_deps:
+                        current_node.ctrl_deps.append(last_visited_non_gpu.id)
                         self.logger.debug(
-                            f"CPU Node ID {current_node.id} now has a data "
+                            f"CPU Node ID {current_node.id} now has a control "
                             f"dependency on non-GPU Node ID {last_visited_non_gpu.id}"
                         )
                 last_visited_non_gpu = current_node
@@ -701,11 +699,11 @@ def remove_dangling_nodes(self) -> None:
         """
         parent_ids = set()
         for node in self.chakra_nodes.values():
-            parent_ids.update(node.data_deps)
+            parent_ids.update(node.ctrl_deps)
 
         dangling_nodes = []
         for node_id, node in list(self.chakra_nodes.items()):
-            if node_id not in parent_ids and not node.data_deps:
+            if node_id not in parent_ids and not node.ctrl_deps:
                 dangling_nodes.append(node)
                 del self.chakra_nodes[node_id]
                 del self.pytorch_nodes[node_id]
@@ -721,7 +719,7 @@ def update_parent_to_children_map(self) -> None:
         This map is used to efficiently simulate node execution based on data dependencies.
         """
         for node_id, node in self.chakra_nodes.items():
-            for dep_id in node.data_deps:
+            for dep_id in node.ctrl_deps:
                 # Ensure the dependency is registered as a parent of the current node
                 if dep_id not in self.parent_to_children_map:
                     self.parent_to_children_map[dep_id] = []
@@ -765,7 +763,7 @@ def dfs(node_id: int, path: List[int]) -> bool:
             visited.add(node_id)
             stack.add(node_id)
             path.append(node_id)
-            for child_id in self.chakra_nodes[node_id].data_deps:
+            for child_id in self.chakra_nodes[node_id].ctrl_deps:
                 if dfs(child_id, path.copy()):
                     return True
             stack.remove(node_id)
@@ -841,26 +839,26 @@ def close_chakra_execution_trace(self) -> None:
 
     def simulate_execution(self) -> None:
         """
-        Simulates the execution of Chakra nodes based on data dependencies.
+        Simulates the execution of Chakra nodes based on control dependencies.
 
         This method considers both CPU and GPU nodes. Nodes are issued for
         execution based on the readiness determined by dependency resolution.
         A simplistic global clock is used to model the execution time.
         """
-        self.logger.info("Simulating execution of Chakra nodes based on data "
+        self.logger.info("Simulating execution of Chakra nodes based on control "
                          "dependencies.")
 
         # Initialize queues for ready CPU and GPU nodes
         ready_cpu_nodes = [
             (node_id, self.chakra_nodes[node_id])
             for node_id in self.chakra_nodes
-            if not self.chakra_nodes[node_id].data_deps and
+            if not self.chakra_nodes[node_id].ctrl_deps and
             not self.pytorch_nodes[node_id].is_gpu_op()
         ]
         ready_gpu_nodes = [
             (node_id, self.chakra_nodes[node_id])
             for node_id in self.chakra_nodes
-            if not self.chakra_nodes[node_id].data_deps and
+            if not self.chakra_nodes[node_id].ctrl_deps and
             self.pytorch_nodes[node_id].is_gpu_op()
         ]
         ready_cpu_nodes.sort(key=lambda x: x[1].id)
@@ -910,8 +908,8 @@ def simulate_execution(self) -> None:
                 children_ids = self.parent_to_children_map.get(node_id, [])
                 for child_id in children_ids:
                     child_node = self.chakra_nodes[child_id]
-                    child_node.data_deps.remove(node_id)
-                    if not child_node.data_deps:
+                    child_node.ctrl_deps.remove(node_id)
+                    if not child_node.ctrl_deps:
                         if not self.pytorch_nodes[child_id].is_gpu_op():
                             ready_cpu_nodes.append((child_id, child_node))
                         else:

@@ -141,12 +141,10 @@ def get_comm_coll_node(
                                     uint64_val=comm_size))
         return node
 
-    def add_parent(
-        self,
-        child_node: Any,
-        parent_node: Any
-    ) -> None:
-        child_node.data_deps.append(parent_node.id)
+    def add_parent(self, child_node: Any, parent_node: Any) -> None:
+        if not hasattr(child_node, 'ctrl_deps'):
+            child_node.ctrl_deps = []
+        child_node.ctrl_deps.append(parent_node.id)
 
     def convert(self) -> None:
         with open(self.input_filename, "r") as f:

@@ -67,15 +67,15 @@ void ETFeeder::freeChildrenNodes(uint64_t node_id) {
   shared_ptr<ETFeederNode> node = dep_graph_[node_id];
   for (auto child : node->getChildren()) {
     auto child_chakra = child->getChakraNode();
-    for (auto it = child_chakra->mutable_data_deps()->begin();
-         it != child_chakra->mutable_data_deps()->end();
+    for (auto it = child_chakra->mutable_ctrl_deps()->begin();
+         it != child_chakra->mutable_ctrl_deps()->end();
          ++it) {
       if (*it == node_id) {
-        child_chakra->mutable_data_deps()->erase(it);
+        child_chakra->mutable_ctrl_deps()->erase(it);
         break;
       }
     }
-    if (child_chakra->data_deps().size() == 0) {
+    if (child_chakra->ctrl_deps().size() == 0) {
       dep_free_node_id_set_.emplace(child_chakra->id());
       dep_free_node_queue_.emplace(child);
     }
@@ -101,13 +101,13 @@ shared_ptr<ETFeederNode> ETFeeder::readNode() {
   shared_ptr<ETFeederNode> node = make_shared<ETFeederNode>(pkt_msg);
 
   bool dep_unresolved = false;
-  for (int i = 0; i < pkt_msg->data_deps_size(); ++i) {
-    auto parent_node = dep_graph_.find(pkt_msg->data_deps(i));
+  for (int i = 0; i < pkt_msg->ctrl_deps_size(); ++i) {
+    auto parent_node = dep_graph_.find(pkt_msg->ctrl_deps(i));
     if (parent_node != dep_graph_.end()) {
       parent_node->second->addChild(node);
     } else {
       dep_unresolved = true;
-      node->addDepUnresolvedParentID(pkt_msg->data_deps(i));
+      node->addDepUnresolvedParentID(pkt_msg->ctrl_deps(i));
     }
   }
 
@@ -166,7 +166,7 @@ void ETFeeder::readNextWindow() {
     uint64_t node_id = node_id_node.first;
     shared_ptr<ETFeederNode> node = node_id_node.second;
     if ((dep_free_node_id_set_.count(node_id) == 0) &&
-        (node->getChakraNode()->data_deps().size() == 0)) {
+        (node->getChakraNode()->ctrl_deps().size() == 0)) {
       dep_free_node_id_set_.emplace(node_id);
       dep_free_node_queue_.emplace(node);
     }