diff --git a/.gitignore b/.gitignore
index 30672f9bf..8663a2ff0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,7 @@ third_party/tt-metal
 .cache
 *pycache*
 *.egg-info
+ttrt-artifacts/*
+query_results.json
+run_results.json
+ttrt_report.xml
diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
index 04f2b64af..4ad64de6f 100644
--- a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
+++ b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
@@ -288,7 +288,9 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
       bool isSystemMemorySpace() const { return ::mlir::tt::isSystemMemorySpace(getMemorySpace()); }
       bool isDeviceMemorySpace() const { return ::mlir::tt::isDeviceMemorySpace(getMemorySpace()); }
       bool hasShardedTensorMemoryLayout() const;
+      bool hasInterleavedTensorMemoryLayout() const;
       bool hasShardedL1TensorMemoryLayout() const;
+      bool hasInterleavedL1TensorMemoryLayout() const;
       bool isTiled() const;
       Type getElementType() const;
       Type getScalarElementType() const;
diff --git a/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h b/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h
new file mode 100644
index 000000000..de6b591d1
--- /dev/null
+++ b/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h
@@ -0,0 +1,47 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSIS_H
+#define TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSIS_H
+
+#include <llvm/ADT/StringSwitch.h>
+#include <llvm/Support/CommandLine.h>
+
+namespace mlir::tt {
+
+enum class MemoryLayoutAnalysisPolicyType { DFSharding, L1Interleaved };
+
+struct MemoryLayoutAnalysisPolicyTypeParser
+    : public llvm::cl::parser<MemoryLayoutAnalysisPolicyType> {
+public:
+  MemoryLayoutAnalysisPolicyTypeParser(llvm::cl::Option &opt)
+      : llvm::cl::parser<MemoryLayoutAnalysisPolicyType>(opt) {}
+
+  bool parse(llvm::cl::Option &opt, llvm::StringRef argName,
+             llvm::StringRef arg, MemoryLayoutAnalysisPolicyType &value) {
+    value = llvm::StringSwitch<MemoryLayoutAnalysisPolicyType>(arg)
+                .Case("DFSharding", MemoryLayoutAnalysisPolicyType::DFSharding)
+                .Case("L1Interleaved",
+                      MemoryLayoutAnalysisPolicyType::L1Interleaved);
+    return false;
+  }
+
+  static void print(llvm::raw_ostream &os,
+                    const MemoryLayoutAnalysisPolicyType &value) {
+    llvm::StringRef policy;
+    switch (value) {
+    case MemoryLayoutAnalysisPolicyType::DFSharding:
+      policy = "DFSharding";
+      break;
+    case MemoryLayoutAnalysisPolicyType::L1Interleaved:
+      policy = "L1Interleaved";
+      break;
+    }
+    os << "memory-layout-analysis-policy=" << policy << "\n";
+  }
+};
+
+} // namespace mlir::tt
+
+#endif // TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSIS_H
diff --git a/include/ttmlir/Dialect/TT/Utils/OverrideParams.h b/include/ttmlir/Dialect/TT/Utils/OverrideParams.h
index ed7967c07..56cde1c07 100644
--- a/include/ttmlir/Dialect/TT/Utils/OverrideParams.h
+++ b/include/ttmlir/Dialect/TT/Utils/OverrideParams.h
@@ -6,7 +6,6 @@
 #define TTMLIR_DIALECT_TT_UTILS_OVERRIDEPARAMS_H
 
 #include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
-#include <cstdint>
 #include <llvm/Support/CommandLine.h>
 
 namespace mlir::tt {
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h
index 6ef8476b0..6223ad429 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h
@@ -7,19 +7,16 @@
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
+#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h"
 
 namespace mlir::tt::ttnn {
 
 // Process ops in DFS schedulable order and build shard chain configs.
 // Schedule is also produced as a side effect of sharding.
 //
-class DFShardingPolicy {
+class DFShardingPolicy : public MemoryLayoutAnalysisPolicy {
 private:
-  Operation *rootOp;
-  std::vector<L1ChainConfig> *l1ChainConfigs;
-  llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>> legalLayouts;
-  llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> *schedule;
-  unsigned usableL1CacheSize = 0;
+  std::unordered_set<Edge> overrideReshardEdges;
 
 public:
   DFShardingPolicy(
@@ -28,11 +25,15 @@ class DFShardingPolicy {
           &legalLayouts,
       llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> &schedule,
       unsigned usableL1CacheSize)
-      : rootOp(rootOp), l1ChainConfigs(&l1ChainConfigs),
-        legalLayouts(legalLayouts), schedule(&schedule),
-        usableL1CacheSize(usableL1CacheSize) {}
+      : MemoryLayoutAnalysisPolicy(rootOp, l1ChainConfigs, legalLayouts,
+                                   schedule, usableL1CacheSize),
+        overrideReshardEdges() {}
 
-  void run(const std::unordered_set<Edge> &overrideReshardEdges);
+  void run() final;
+
+  void setOverrideReshardEdges(const std::unordered_set<Edge> &reshardEdges) {
+    overrideReshardEdges = reshardEdges;
+  }
 };
 
 } // namespace mlir::tt::ttnn
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
new file mode 100644
index 000000000..753c07d80
--- /dev/null
+++ b/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
@@ -0,0 +1,30 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
+#define TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
+#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h"
+
+namespace mlir::tt::ttnn {
+
+class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
+public:
+  L1InterleavedPolicy(
+      Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
+      const llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>>
+          &legalLayouts,
+      llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> &schedule,
+      unsigned usableL1CacheSize)
+      : MemoryLayoutAnalysisPolicy(rootOp, l1ChainConfigs, legalLayouts,
+                                   schedule, usableL1CacheSize) {}
+
+  void run() final;
+};
+
+} // namespace mlir::tt::ttnn
+
+#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_L1INTERLEAVEDPOLICY_H
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h
index b7d864b72..39d059555 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h
@@ -6,20 +6,18 @@
 #define TTMLIR_DIALECT_TTNN_ANALYSIS_MEMORYLAYOUTANALYSIS_H
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h"
 #include "ttmlir/Dialect/TTNN/Analysis/Edge.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
 #include "ttmlir/Dialect/TTNN/Analysis/TTNNAnalysis.h"
 
 namespace mlir::tt::ttnn {
 
-enum class MemoryLayoutAnalysisPolicyType {
-  DFSharding,
-};
-
 struct MemoryLayoutAnalysisInput {
   llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>> legalLayouts;
   unsigned usableL1CacheSize = 0;
   std::unordered_set<Edge> overrideReshardEdges;
+  MemoryLayoutAnalysisPolicyType policy;
 
   MemoryLayoutAnalysisInput() : legalLayouts() {}
 
@@ -27,9 +25,10 @@ struct MemoryLayoutAnalysisInput {
       const llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>>
           &legalLayouts,
       unsigned usableL1CacheSize,
-      const std::unordered_set<Edge> &overrideReshardEdges)
+      const std::unordered_set<Edge> &overrideReshardEdges,
+      MemoryLayoutAnalysisPolicyType policy)
       : legalLayouts(legalLayouts), usableL1CacheSize(usableL1CacheSize),
-        overrideReshardEdges(overrideReshardEdges) {}
+        overrideReshardEdges(overrideReshardEdges), policy(policy) {}
 
   bool operator==(const MemoryLayoutAnalysisInput &rhs) const {
     return legalLayouts == rhs.legalLayouts;
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h
new file mode 100644
index 000000000..aecd9c6a4
--- /dev/null
+++ b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h
@@ -0,0 +1,39 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef TTMLIR_DIALECT_TTNN_ANALYSIS_MEMORYLAYOUTANALYSISPOLICY_H
+#define TTMLIR_DIALECT_TTNN_ANALYSIS_MEMORYLAYOUTANALYSISPOLICY_H
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
+
+namespace mlir::tt::ttnn {
+
+class MemoryLayoutAnalysisPolicy {
+protected:
+  Operation *rootOp;
+  std::vector<L1ChainConfig> *l1ChainConfigs;
+  llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>> legalLayouts;
+  llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> *schedule;
+  unsigned usableL1CacheSize = 0;
+
+public:
+  virtual ~MemoryLayoutAnalysisPolicy() {};
+
+  MemoryLayoutAnalysisPolicy(
+      Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
+      const llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>>
+          &legalLayouts,
+      llvm::DenseMap<func::FuncOp, llvm::SmallVector<Operation *>> &schedule,
+      unsigned usableL1CacheSize)
+      : rootOp(rootOp), l1ChainConfigs(&l1ChainConfigs),
+        legalLayouts(legalLayouts), schedule(&schedule),
+        usableL1CacheSize(usableL1CacheSize) {}
+
+  virtual void run() = 0;
+};
+
+} // namespace mlir::tt::ttnn
+
+#endif // TTMLIR_DIALECT_TTNN_ANALYSIS_MEMORYLAYOUTANALYSISPOLICY_H
diff --git a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h
index 9988bbcc1..a42ec0ea8 100644
--- a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h
+++ b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h
@@ -6,13 +6,11 @@
 #define TTMLIR_DIALECT_TTNN_PIPELINES_TTNNPIPELINES_H
 
 #include "mlir/Pass/PassOptions.h"
+#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h"
 #include "ttmlir/Dialect/TT/Utils/OverrideParams.h"
-#include <cstdint>
-#include <llvm/ADT/SmallVector.h>
-#include <llvm/ADT/StringRef.h>
-#include <llvm/Support/CommandLine.h>
 
 namespace mlir::tt::ttnn {
+
 // Options for the TTIR to TTNN backend pipeline.
 //
 struct TTIRToTTNNBackendPipelineOptions
@@ -85,6 +83,14 @@ struct TTIRToTTNNBackendPipelineOptions
                      "of shard specs."),
       llvm::cl::init(false)};
 
+  // Specify policy for memory layout analysis.
+  //
+  Option<MemoryLayoutAnalysisPolicyType, MemoryLayoutAnalysisPolicyTypeParser>
+      memoryLayoutAnalysisPolicy{
+          *this, "memory-layout-analysis-policy",
+          llvm::cl::desc("Specify policy for memory layout analysis."),
+          llvm::cl::init(MemoryLayoutAnalysisPolicyType::DFSharding)};
+
   // Option to provide a system descriptor flatbuffer file to compile
   // against.
   //
diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h b/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h
index 064495f31..06074a0a3 100644
--- a/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h
+++ b/include/ttmlir/Dialect/TTNN/Transforms/Optimizer.h
@@ -19,6 +19,8 @@ struct TTNNOptimizerOptions {
   llvm::StringMap<OutputLayoutOverrideParams> overrideOutputLayout =
       llvm::StringMap<OutputLayoutOverrideParams>();
   bool memoryLayoutAnalysisEnabled = false;
+  MemoryLayoutAnalysisPolicyType memoryLayoutAnalysisPolicy =
+      MemoryLayoutAnalysisPolicyType::DFSharding;
   bool memReconfigEnabled = false;
   int64_t maxLegalLayouts = 64;
 };
@@ -95,6 +97,7 @@ class TTNNOptimizerBase : public ::mlir::OperationPass<::mlir::ModuleOp> {
     memoryLayoutAnalysisEnabled =
         std::move(options.memoryLayoutAnalysisEnabled);
     memReconfigEnabled = std::move(options.memReconfigEnabled);
+    memoryLayoutAnalysisPolicy = std::move(options.memoryLayoutAnalysisPolicy);
     maxLegalLayouts = std::move(options.maxLegalLayouts);
   }
 
@@ -122,6 +125,12 @@ class TTNNOptimizerBase : public ::mlir::OperationPass<::mlir::ModuleOp> {
                        "we support all "
                        "types of shard specs."),
       ::llvm::cl::init(false)};
+  ::mlir::Pass::Option<mlir::tt::MemoryLayoutAnalysisPolicyType,
+                       mlir::tt::MemoryLayoutAnalysisPolicyTypeParser>
+      memoryLayoutAnalysisPolicy{
+          *this, "memory-layout-analysis-policy",
+          llvm::cl::desc("Specify policy for memory layout analysis."),
+          llvm::cl::init(MemoryLayoutAnalysisPolicyType::DFSharding)};
   ::mlir::Pass::Option<int64_t> maxLegalLayouts{
       *this, "max-legal-layouts",
       ::llvm::cl::desc(
diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Passes.h b/include/ttmlir/Dialect/TTNN/Transforms/Passes.h
index 8e79db96a..fa05f41de 100644
--- a/include/ttmlir/Dialect/TTNN/Transforms/Passes.h
+++ b/include/ttmlir/Dialect/TTNN/Transforms/Passes.h
@@ -9,6 +9,7 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "ttmlir/Dialect/TT/Utils/OverrideParams.h"
+#include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNN.h"
 #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
 
diff --git a/lib/Dialect/TT/IR/TTOpsTypes.cpp b/lib/Dialect/TT/IR/TTOpsTypes.cpp
index 6a20b2ba7..3f541a39d 100644
--- a/lib/Dialect/TT/IR/TTOpsTypes.cpp
+++ b/lib/Dialect/TT/IR/TTOpsTypes.cpp
@@ -579,6 +579,10 @@ bool LayoutAttr::hasShardedTensorMemoryLayout() const {
           getMemLayout() == TensorMemoryLayout::BlockSharded);
 }
 
+bool LayoutAttr::hasInterleavedTensorMemoryLayout() const {
+  return (getMemLayout() == TensorMemoryLayout::Interleaved);
+}
+
 bool LayoutAttr::hasShardedL1TensorMemoryLayout() const {
   return ::mlir::tt::isL1MemorySpace(getMemorySpace()) and
          (getMemLayout() == TensorMemoryLayout::HeightSharded or
@@ -586,6 +590,11 @@ bool LayoutAttr::hasShardedL1TensorMemoryLayout() const {
           getMemLayout() == TensorMemoryLayout::BlockSharded);
 }
 
+bool LayoutAttr::hasInterleavedL1TensorMemoryLayout() const {
+  return ::mlir::tt::isL1MemorySpace(getMemorySpace()) and
+         (getMemLayout() == TensorMemoryLayout::Interleaved);
+}
+
 bool LayoutAttr::isTiled() const {
   return ::mlir::isa<::mlir::tt::TileType>(getElementType());
 }
diff --git a/lib/Dialect/TTNN/Analysis/CMakeLists.txt b/lib/Dialect/TTNN/Analysis/CMakeLists.txt
index 25257db36..996064d79 100644
--- a/lib/Dialect/TTNN/Analysis/CMakeLists.txt
+++ b/lib/Dialect/TTNN/Analysis/CMakeLists.txt
@@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIRTTNNAnalysis
         MemoryLayoutAnalysis.cpp
         L1ChainConfig.cpp
         DFShardingPolicy.cpp
+        L1InterleavedPolicy.cpp
         ShardSolver.cpp
 
         ADDITIONAL_HEADER_DIRS
diff --git a/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp b/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp
index 7a7470ad3..f5c93ddbf 100644
--- a/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp
+++ b/lib/Dialect/TTNN/Analysis/DFShardingPolicy.cpp
@@ -8,8 +8,7 @@
 
 namespace mlir::tt::ttnn {
 
-void DFShardingPolicy::run(
-    const std::unordered_set<Edge> &overrideReshardEdges) {
+void DFShardingPolicy::run() {
   rootOp->walk([&](func::FuncOp func) {
     DeviceAttr deviceAttr = getCurrentScopeDevice(func);
     mlir::tt::scheduler::Scheduler scheduler(&func);
diff --git a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
new file mode 100644
index 000000000..bad37d94f
--- /dev/null
+++ b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
@@ -0,0 +1,155 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
+#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
+#include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
+#include "ttmlir/Scheduler/Scheduler.h"
+
+namespace mlir::tt::ttnn {
+
+uint64_t getOpOutputLayoutUsage(
+    Operation *op,
+    llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>> &legalLayouts,
+    DeviceAttr &deviceAttr) {
+  tt::LayoutAttr opLayout = legalLayouts.lookup(op).front();
+  assert(opLayout.hasInterleavedL1TensorMemoryLayout());
+
+  llvm::ArrayRef<int64_t> opOutputTensorShape =
+      mlir::cast<RankedTensorType>(op->getResult(0).getType()).getShape();
+
+  uint64_t opL1OutputUsage = deviceAttr.getLayoutSizeBytes(
+      opOutputTensorShape, opLayout, opLayout.getMemorySpace());
+  return opL1OutputUsage;
+}
+
+void L1InterleavedPolicy::run() {
+  rootOp->walk([&](func::FuncOp func) {
+    DeviceAttr deviceAttr = getCurrentScopeDevice(func);
+    mlir::tt::scheduler::Scheduler scheduler(&func);
+    llvm::SmallVector<mlir::Operation *> scheduleableOps;
+    llvm::DenseMap<Operation *, tt::LayoutAttr> selectedOpLayout;
+    Operation *currentOp = nullptr;
+
+    // TODO(fbajraktari): Add algorithm description. Currently, the algorithm
+    // is the same as for DFSharding policy, but works only for L1 interleaved.
+    //
+    l1ChainConfigs->push_back(L1ChainConfig());
+    while (scheduler.hasUnscheduledOps()) {
+      scheduleableOps = scheduler.getScheduleableOps();
+
+      // Before starting a l1 chain, schedule layout/memory management ops
+      // first until they are exhausted from schedulable ops.
+      //
+      if (l1ChainConfigs->back().isEmpty()) {
+        for (auto *op : scheduleableOps) {
+          if (isa<ToLayoutOp>(op)) {
+            currentOp = op;
+            break;
+          }
+        }
+      }
+
+      if (currentOp == nullptr) {
+        currentOp = scheduleableOps[0];
+      }
+
+      // Schedule currentOp.
+      //
+      scheduler.scheduleOp(currentOp);
+
+      // Skip starting sharding chain if currentOp is a memory management op.
+      //
+      if (l1ChainConfigs->back().isEmpty() && isa<ToLayoutOp>(currentOp)) {
+        currentOp = nullptr;
+        continue;
+      }
+
+      if (scheduler.hasUnscheduledOps()) {
+        scheduleableOps = scheduler.getScheduleableOps();
+
+        // Check if currentOp has a valid successor.
+        //
+        Operation *nextOp = nullptr;
+        for (auto *op : scheduleableOps) {
+          for (auto operand : op->getOperands()) {
+            if (operand.getDefiningOp() == currentOp) {
+              nextOp = op;
+              break;
+            }
+          }
+        }
+
+        if (nextOp) {
+
+          // V1: Check that currentOp is not fork/join op.
+          //
+          bool validForL1Interleaved =
+              currentOp->hasOneUse() &&
+              legalLayouts.lookup(currentOp).size() > 0 &&
+              legalLayouts.lookup(nextOp).size() > 0;
+
+          if (validForL1Interleaved) {
+            // Figure out this const based on exec data, but will be replaced
+            // with API.
+            //
+            constexpr float tensorL1UsageCap = 0.8;
+            uint64_t currentOpL1OutputUsage =
+                getOpOutputLayoutUsage(currentOp, legalLayouts, deviceAttr);
+            uint64_t nextOpL1OutputUsage =
+                getOpOutputLayoutUsage(nextOp, legalLayouts, deviceAttr);
+            bool l1UsageValid = (currentOpL1OutputUsage + nextOpL1OutputUsage) <
+                                tensorL1UsageCap * usableL1CacheSize;
+
+            if (l1UsageValid) {
+              selectedOpLayout[currentOp] =
+                  legalLayouts.lookup(currentOp).front();
+
+              // Add currentOp to l1 chain config.
+              //
+              OpL1MemSpec shardSpec;
+              shardSpec.op = currentOp;
+
+              // Hardcoded tensor split factor for now, until pipeline OP
+              // support is added.
+              //
+              shardSpec.tensorSplitFactor = 1;
+              l1ChainConfigs->back().addOpL1MemSpec(std::move(shardSpec));
+
+              // Update currentOp pointer.
+              //
+              currentOp = nextOp;
+              continue;
+            }
+          }
+        }
+
+        currentOp = nullptr;
+        if (!l1ChainConfigs->back().isEmpty()) {
+          l1ChainConfigs->back().build();
+          l1ChainConfigs->push_back(L1ChainConfig());
+        }
+      }
+    }
+
+    if (l1ChainConfigs->back().isEmpty()) {
+      l1ChainConfigs->pop_back();
+    }
+
+    // Schedule
+    //
+    (*schedule)[func] = scheduler.getSchedule();
+
+    // Resolve l1 chain configs.
+    //
+    for (auto &l1ChainConfig : *l1ChainConfigs) {
+      l1ChainConfig.resolve();
+
+      std::unordered_set<Edge> memReconfigEdges;
+      l1ChainConfig.complete(selectedOpLayout, memReconfigEdges);
+    }
+  });
+}
+
+} // namespace mlir::tt::ttnn
diff --git a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
index e630782f4..302334d1b 100644
--- a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
+++ b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
@@ -4,6 +4,7 @@
 
 #include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h"
 #include "ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h"
+#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
 
 namespace mlir::tt::ttnn {
 
@@ -33,18 +34,46 @@ filterShardedOnly(const llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>>
   return shardedLayouts;
 }
 
-void MemoryLayoutAnalysis::analysisImplementation() {
-  MemoryLayoutAnalysisPolicyType policy =
-      MemoryLayoutAnalysisPolicyType::DFSharding;
+llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>>
+filterL1InterleavedOnly(
+    const llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>>
+        &legalLayouts) {
+  llvm::DenseMap<Operation *, std::vector<tt::LayoutAttr>> l1InterleavedLayouts;
+  for (const auto &opLayouts : legalLayouts) {
+    std::vector<tt::LayoutAttr> opL1InterleavedLayouts;
+    for (const auto &layout : opLayouts.second) {
+      if (layout.hasInterleavedL1TensorMemoryLayout()) {
+        opL1InterleavedLayouts.push_back(layout);
+      }
+    }
+
+    l1InterleavedLayouts[opLayouts.first] = opL1InterleavedLayouts;
+  }
 
-  switch (policy) {
-  case MemoryLayoutAnalysisPolicyType::DFSharding:
+  return l1InterleavedLayouts;
+}
+
+void MemoryLayoutAnalysis::analysisImplementation() {
+  // Apply specific memory layout analysis policy.
+  //
+  switch (analysisInput.policy) {
+  case MemoryLayoutAnalysisPolicyType::DFSharding: {
     DFShardingPolicy dfShardingPolicy(
         op, l1ChainConfigs, filterShardedOnly(analysisInput.legalLayouts),
         analysisResult.schedule, analysisInput.usableL1CacheSize);
-    dfShardingPolicy.run(analysisInput.overrideReshardEdges);
+    dfShardingPolicy.setOverrideReshardEdges(
+        analysisInput.overrideReshardEdges);
+    dfShardingPolicy.run();
     break;
   }
+  case MemoryLayoutAnalysisPolicyType::L1Interleaved: {
+    L1InterleavedPolicy l1InterleavedPolicy(
+        op, l1ChainConfigs, filterL1InterleavedOnly(analysisInput.legalLayouts),
+        analysisResult.schedule, analysisInput.usableL1CacheSize);
+    l1InterleavedPolicy.run();
+    break;
+  }
+  }
 
   // Copy over default legal layouts.
   //
diff --git a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp
index 7f3baaeaf..772b51b04 100644
--- a/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp
+++ b/lib/Dialect/TTNN/Pipelines/TTNNPipelines.cpp
@@ -51,6 +51,8 @@ void createTTNNPipelineAnalysisPasses(
     optimizerOptions.memoryLayoutAnalysisEnabled =
         options.memoryLayoutAnalysisEnabled;
     optimizerOptions.memReconfigEnabled = options.memReconfigEnabled;
+    optimizerOptions.memoryLayoutAnalysisPolicy =
+        options.memoryLayoutAnalysisPolicy;
     optimizerOptions.maxLegalLayouts = options.maxLegalLayouts;
     pm.addPass(mlir::tt::ttnn::createTTNNOptimizer(optimizerOptions));
   }
diff --git a/lib/Dialect/TTNN/Transforms/Optimizer.cpp b/lib/Dialect/TTNN/Transforms/Optimizer.cpp
index 37b4cfe64..2af08e9c9 100644
--- a/lib/Dialect/TTNN/Transforms/Optimizer.cpp
+++ b/lib/Dialect/TTNN/Transforms/Optimizer.cpp
@@ -71,7 +71,8 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase<TTNNOptimizer> {
       MemoryLayoutAnalysis memoryLayoutAnalysis =
           getAnalysis<MemoryLayoutAnalysis>();
       memoryLayoutAnalysis.init(MemoryLayoutAnalysisInput(
-          legalLayouts, chipDesc.getUsableL1Size(), overrideReshardEdges));
+          legalLayouts, chipDesc.getUsableL1Size(), overrideReshardEdges,
+          memoryLayoutAnalysisPolicy));
       legalLayouts = memoryLayoutAnalysis.getResult().legalLayouts;
       opSchedule = memoryLayoutAnalysis.getResult().schedule;
       memReconfigEdges = memoryLayoutAnalysis.getResult().memReconfigEdges;
diff --git a/test/ttmlir/Dialect/TTNN/sharding_matmul_override_0.mlir b/test/ttmlir/Dialect/TTNN/sharding_matmul_override_0.mlir
index 9516f96f5..2e07f7f5c 100644
--- a/test/ttmlir/Dialect/TTNN/sharding_matmul_override_0.mlir
+++ b/test/ttmlir/Dialect/TTNN/sharding_matmul_override_0.mlir
@@ -2,7 +2,7 @@
 #any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
 module attributes {} {
   func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>, %arg2: tensor<96x64xbf16>) -> tensor<64x64xbf16> {
-    // CHECK: #[[LAYOUT_7:layout7]] = #tt.layout<{{.*}}, memref<{{.*}}>, #dram>, {{.*}}>
+    // CHECK: #[[LAYOUT_7:layout7]] = #tt.layout<{{.*}}, memref<{{.*}}, #dram>, {{.*}}>
     %0 = tensor.empty() : tensor<64x96xbf16>
     // CHECK: {{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_7]]>
     %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#any_device_tile, #any_device_tile, #any_device_tile]}> : (tensor<64x128xbf16>, tensor<128x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
diff --git a/test/ttmlir/Dialect/TTNN/sharding_matmul_override_32.mlir b/test/ttmlir/Dialect/TTNN/sharding_matmul_override_32.mlir
index 3e26d1490..8e984348f 100644
--- a/test/ttmlir/Dialect/TTNN/sharding_matmul_override_32.mlir
+++ b/test/ttmlir/Dialect/TTNN/sharding_matmul_override_32.mlir
@@ -3,7 +3,7 @@
 module attributes {} {
   func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>, %arg2: tensor<96x64xbf16>) -> tensor<64x64xbf16> {
     // CHECK: #[[L1_:.*]] = #tt.memory_space<l1>
-    // CHECK: #[[LAYOUT_7:layout7]] = #tt.layout<{{.*}}, memref<{{.*}}>, #l1_>, {{.*}}>
+    // CHECK: #[[LAYOUT_7:layout7]] = #tt.layout<{{.*}}, memref<{{.*}}, #l1_>, {{.*}}>
     %0 = tensor.empty() : tensor<64x96xbf16>
     // CHECK: {{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_7]]>
     %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#any_device_tile, #any_device_tile, #any_device_tile]}> : (tensor<64x128xbf16>, tensor<128x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
diff --git a/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir b/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir
new file mode 100644
index 000000000..e09552c42
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/all_l1_interleaved_policy.mlir
@@ -0,0 +1,31 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>, %arg2: tensor<64x96xbf16>, %arg3: tensor<96x32xbf16>, %arg4: tensor<64x32xbf16>) -> tensor<64x32xbf16> {
+    // CHECK: #[[L1_:.*]] = #tt.memory_space<l1>
+    // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
+    // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
+    // CHECK: #[[LAYOUT_8:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved>
+    %0 = tensor.empty() : tensor<64x96xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]>
+    %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<128x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
+    %2 = tensor.empty() : tensor<64x96xbf16>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]>
+    %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x96xbf16>, tensor<64x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
+    %4 = tensor.empty() : tensor<64x96xbf16>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]>
+    %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
+    %6 = tensor.empty() : tensor<64x32xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]>
+    %7 = "ttir.matmul"(%5, %arg3, %6) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x96xbf16>, tensor<96x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
+    %8 = tensor.empty() : tensor<64x32xbf16>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]>
+    %9 = "ttir.add"(%7, %arg4, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x32xbf16>, tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
+    %10 = tensor.empty() : tensor<64x32xbf16>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_8]]>
+    %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
+    return %11 : tensor<64x32xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/large_tensors.mlir b/test/ttmlir/Silicon/TTNN/large_tensors.mlir
new file mode 100644
index 000000000..b258435db
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/large_tensors.mlir
@@ -0,0 +1,19 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8192xbf16>, %arg2: tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> {
+    // CHECK: #[[LAYOUT_2:layout2]] = #tt.layout<{{.*}}, memref<{{.*}}, #dram>, {{.*}}>
+    %0 = tensor.empty() : tensor<8192x8192xbf16>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
+    %2 = tensor.empty() : tensor<8192x8192xbf16>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
+    %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
+    %4 = tensor.empty() : tensor<8192x8192xbf16>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
+    %7 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
+    return %7 : tensor<8192x8192xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir b/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir
new file mode 100644
index 000000000..bd001a267
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/mnist_l1_interleaved.mlir
@@ -0,0 +1,44 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+#loc = loc("MNISTLinear":4294967295:0)
+module @"tt-forge-graph" attributes {} {
+  func.func @main(%arg0: tensor<1x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<1x10xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x10xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<1x256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<1x10xf32> {
+    // CHECK: #[[LAYOUT_6:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
+    // CHECK: #[[LAYOUT_7:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
+    // CHECK: #[[LAYOUT_8:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <{{.*}}>, memref<{{.*}}, #dram>, interleaved>
+    %0 = tensor.empty() : tensor<1x256xf32> loc(#loc8)
+    // CHECK: %[[C:.*]] = "ttnn.matmul"[[C:.*]] -> tensor<1x256xf32, #[[LAYOUT_6]]>
+    %1 = "ttir.matmul"(%arg0, %arg4, %0) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x784xf32>, tensor<784x256xf32>, tensor<1x256xf32>) -> tensor<1x256xf32> loc(#loc8)
+    %2 = tensor.empty() : tensor<1x256xf32> loc(#loc9)
+    // CHECK: %[[C:.*]] = "ttnn.add"[[C:.*]] -> tensor<1x256xf32, #[[LAYOUT_6]]>
+    %3 = "ttir.add"(%1, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256xf32>, tensor<1x256xf32>, tensor<1x256xf32>) -> tensor<1x256xf32> loc(#loc9)
+    %4 = tensor.empty() : tensor<1x256xf32> loc(#loc10)
+    // CHECK: %[[C:.*]] = "ttnn.relu"[[C:.*]] -> tensor<1x256xf32, #[[LAYOUT_6]]>
+    %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<1x256xf32>, tensor<1x256xf32>) -> tensor<1x256xf32> loc(#loc10)
+    %6 = tensor.empty() : tensor<1x10xf32> loc(#loc11)
+    // CHECK: %[[C:.*]] = "ttnn.matmul"[[C:.*]] -> tensor<1x10xf32, #[[LAYOUT_7]]>
+    %7 = "ttir.matmul"(%5, %arg2, %6) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x256xf32>, tensor<256x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> loc(#loc11)
+    %8 = tensor.empty() : tensor<1x10xf32> loc(#loc12)
+    // CHECK: %[[C:.*]] = "ttnn.add"[[C:.*]] -> tensor<1x10xf32, #[[LAYOUT_7]]>
+    %9 = "ttir.add"(%7, %arg1, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> loc(#loc12)
+    %10 = tensor.empty() : tensor<1x10xf32> loc(#loc13)
+    // CHECK: %{{.*}} = "ttnn.softmax"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_8]]>
+    %11 = "ttir.softmax"(%9, %10) <{dimension = 1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> loc(#loc13)
+    return %11 : tensor<1x10xf32> loc(#loc7)
+  } loc(#loc)
+} loc(#loc)
+#loc1 = loc("MNISTLinear":4294967295:10)
+#loc2 = loc("MNISTLinear":4294967295:8)
+#loc3 = loc("MNISTLinear":4294967295:6)
+#loc4 = loc("MNISTLinear":4294967295:4)
+#loc5 = loc("MNISTLinear":4294967295:3)
+#loc6 = loc("MNISTLinear":4294967295:2)
+#loc7 = loc(unknown)
+#loc8 = loc("matmul_1"(#loc1))
+#loc9 = loc("add_2"(#loc2))
+#loc10 = loc("relu_3"(#loc3))
+#loc11 = loc("matmul_5"(#loc4))
+#loc12 = loc("add_6"(#loc5))
+#loc13 = loc("softmax_7"(#loc6))
diff --git a/test/ttmlir/Silicon/TTNN/simple_fork_join.mlir b/test/ttmlir/Silicon/TTNN/simple_fork_join.mlir
new file mode 100644
index 000000000..981c26b49
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/simple_fork_join.mlir
@@ -0,0 +1,18 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// UNSUPPORTED: true
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<64x128xbf16>, %arg2: tensor<64x128xbf16>, %arg3: tensor<64x128xbf16>) -> tensor<64x128xbf16> {
+    %0 = tensor.empty() : tensor<64x128xbf16>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+    %2 = tensor.empty() : tensor<64x128xbf16>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+    %4 = tensor.empty() : tensor<64x128xbf16>
+    %5 = "ttir.add"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+    %6 = tensor.empty() : tensor<64x128xbf16>
+    %7 = "ttir.relu"(%5, %6) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<64x128xbf16>) -> tensor<64x128xbf16>
+    return %7 : tensor<64x128xbf16>
+  }
+}