Address code review comments.

tenstorrent · Nov 22, 2024 · 8baf24c · 8baf24c
1 parent e3b34e7
commit 8baf24c
Show file tree

Hide file tree

Showing 4 changed files with 24 additions and 16 deletions.
diff --git a/include/ttmlir/Dialect/TTIR/Transforms/Passes.td b/include/ttmlir/Dialect/TTIR/Transforms/Passes.td
@@ -115,7 +115,13 @@ def TTIRLoadSystemDesc: Pass<"ttir-load-system-desc", "::mlir::ModuleOp"> {
 def TTIRBroadcastFold: Pass<"ttir-broadcast-fold", "::mlir::ModuleOp"> {
   let summary = "Broadcast operation is folded to all the consumers.";
   let description = [{
-    As TTNN does not support fold operation, this pass folds the broadcast operations to all consumers.
+    This pass walks through the graph and folds all broadcast instructions since broadcast is supported implicitly by backend ops.
+    Example:
+      %1 = "ttir.broadcast"(%arg0) (tensor<1xf32>) -> tensor<512xf32>
+      %2 = "ttir.maximum"(%1, %arg1) (tensor<512xf32>, tensor<512xf32>) -> tensor<512xf32>
+
+      This above broadcast is folded as:
+      %1 = "ttir.maximum"(%arg0, %arg1) (tensor<1xf32>, tensor<512xf32>) -> tensor<512xf32>
   }];
 }
 

diff --git a/lib/Dialect/TTIR/Transforms/Broadcast.cpp b/lib/Dialect/TTIR/Transforms/Broadcast.cpp
@@ -14,6 +14,22 @@ namespace mlir::tt::ttir {
 
 //===----------------------------------------------------------------------===//
 // Broadcast Folding pass
+// Our backend supports implicit broadcast of operands, so explicit broadcast
+// instructions are folded.
+//
+// For Example:
+//
+// %0 = tensor.empty() : tensor<512xf32>
+// %1 = "ttir.broadcast"(%arg0, %0) (tensor<1xf32>, tensor<512xf32>) ->
+// tensor<512xf32> %2 = tensor.empty() : tensor<512xf32> %3 = "ttir.maximum"(%1,
+// %arg1, %2) (tensor<512xf32>, tensor<512xf32>, tensor<512xf32>) ->
+// tensor<512xf32>
+//
+// After folding:
+//
+// %0 = tensor.empty() : tensor<512xf32>
+// %1 = "ttir.maximum"(%arg0, %arg1, %0) (tensor<1xf32>, tensor<512xf32>,
+// tensor<512xf32>) -> tensor<512xf32>
 //===----------------------------------------------------------------------===//
 
 class TTIRBroadcastFoldRewriter : public OpRewritePattern<BroadcastOp> {

diff --git a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp
@@ -121,8 +121,8 @@ void createTTNNPipelineTTIRBroadcastFoldPassFromString(OpPassManager &pm,
 
 void createTTIRToTTNNBackendPipeline(
     OpPassManager &pm, const TTIRToTTNNBackendPipelineOptions &options) {
-  createTTNNPipelineTTIRBroadcastFoldPass(pm, options);
   createTTNNPipelineTTIRPasses(pm, options);
+  createTTNNPipelineTTIRBroadcastFoldPass(pm, options);
   createTTNNPipelineLoweringPasses(pm, options);
   createTTNNPipelineAnalysisPasses(pm, options);
   createTTNNPipelineLayoutDecompositionPass(pm, options);

diff --git a/test/ttmlir/Silicon/TTNN/simple_broadcast.mlir b/test/ttmlir/Silicon/TTNN/simple_broadcast.mlir