diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h b/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h
index 3c57ca66b..b8aee2e4e 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h
@@ -58,7 +58,7 @@ class L1ChainConfig {
            std::unordered_set<Edge> &memReconfigEdges);
 
   bool isEmpty() { return opL1MemSpecs.empty(); }
-  void addOpL1MemSpec(OpL1MemSpec &&spec) {
+  void addOpL1MemSpec(OpL1MemSpec spec) {
     assert(state == L1ChainState::InBuild);
     l1ChainedOps.insert(spec.op);
     opL1MemSpecs.push_back(std::move(spec));
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h b/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
index f453e9a1d..2392cd7c9 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h
@@ -8,10 +8,43 @@
 #include "mlir/Dialect/Func/IR/FuncOps.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
 #include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysisPolicy.h"
+#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h"
 
 namespace mlir::tt::ttnn {
 
 class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
+public:
+  struct OpMemSpec {
+    TTNNLayoutAttr layout;
+    // Minimum L1 memory usage required for scheduling the op
+    // given the layouts of all the ops that are already scheduled.
+    //
+    uint64_t requiredL1Usage;
+  };
+
+  // This struct is holding information about the greedily choosen
+  // configuration of the @baseOp: 1) layouts and 2) precedence.
+  //
+  // The @layouts represents the mapping between the op and its choosen
+  // layout. All the ops that are included in the @layouts map must be
+  // either @baseOp or its operand with legal L1 Interleaved output layout
+  // at the moment of analyzing the @baseOp.
+  //
+  // The @precedence represents the order of the op's operands in which they
+  // should be scheduled. Only op's operands that are included in the @layouts
+  // map are included in the @precedence.
+  //
+  struct OpConfig {
+    Operation *baseOp;
+    llvm::DenseMap<Operation *, TTNNLayoutAttr> layouts;
+    llvm::SmallVector<Operation *> precedence;
+  };
+
+  struct L1Usage {
+    size_t outputL1Usage;
+    size_t requiredL1Usage;
+  };
+
 public:
   L1InterleavedPolicy(
       Operation *rootOp, std::vector<L1ChainConfig> &l1ChainConfigs,
@@ -22,7 +55,71 @@ class L1InterleavedPolicy : public MemoryLayoutAnalysisPolicy {
       : MemoryLayoutAnalysisPolicy(rootOp, l1ChainConfigs, legalLayouts,
                                    schedule, usableL1CacheSize) {}
 
+  /**
+   * Retrieve the greedy OpConfig for the given base operation
+   * and its opsL1Usage map.
+   *
+   * @param baseOp     The base operation for which the greedy configuration is
+   * being determined.
+   * @param opsL1Usage A map between the operation and its output L1 usage. All
+   * operations included in the opsL1Usage map must be either the baseOp or its
+   * operand with a legal L1 Interleaved output layout at the time of analyzing
+   * the baseOp.
+   * @return The greedy OpConfig for the baseOp.
+   */
+  OpConfig getGreedyConfig(Operation *baseOp,
+                           llvm::DenseMap<Operation *, L1Usage> &opsL1Usage);
+
   void run() final;
+
+private:
+  // Check if the op is analyzable. Op is analyzable if it has at least one
+  // legal layout.
+  bool isAnalyzable(Operation *op);
+
+  // Fetch op's DRAM layout from legalLayouts.
+  bool hasDRAMBufferType(Operation *op);
+  TTNNLayoutAttr getDRAMLayout(Operation *op);
+
+  // Fetch op's L1 Interleaved layout from legalLayouts.
+  bool hasL1BufferType(Operation *op);
+  TTNNLayoutAttr getL1InterleavedLayout(Operation *op);
+
+  size_t getAvailableL1CacheSize() const {
+    // Figure out this const based on exec data, but will be replaced
+    // with API.
+    //
+    constexpr float tensorL1UsageCap = 0.75;
+    return tensorL1UsageCap * usableL1CacheSize;
+  }
+
+  // Precedence schedule map for each operation. It contains the order
+  // in which operands need to be executed for each op.
+  llvm::DenseMap<Operation *, llvm::SmallVector<Operation *>> precedenceMap;
+
+  llvm::DenseSet<Operation *> visitedOps;
+  void buildSchedule(mlir::Operation *op, func::FuncOp &func) {
+
+    // Schedule all the precedents of the current operation
+    //
+    visitedOps.insert(op);
+    for (Operation *precedent : precedenceMap[op]) {
+      if (!visitedOps.count(precedent)) {
+        buildSchedule(precedent, func);
+      }
+    }
+
+    (*schedule)[func].push_back(op);
+  }
+
+  void constructSchedule(func::FuncOp &func) {
+    func->walk([&](Operation *op) {
+      if (op->hasTrait<mlir::OpTrait::ReturnLike>()) {
+        Operation *outputOp = op->getOperand(0).getDefiningOp();
+        buildSchedule(outputOp, func);
+      }
+    });
+  }
 };
 
 } // namespace mlir::tt::ttnn
diff --git a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h
index e8b603815..bc6284c3a 100644
--- a/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h
+++ b/include/ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h
@@ -6,10 +6,10 @@
 #define TTMLIR_DIALECT_TTNN_ANALYSIS_MEMORYLAYOUTANALYSIS_H
 
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h"
 #include "ttmlir/Dialect/TTNN/Analysis/Edge.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
 #include "ttmlir/Dialect/TTNN/Analysis/TTNNAnalysis.h"
+#include "ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h"
 
 namespace mlir::tt::ttnn {
 
diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td
index e45fba003..7d5b10abb 100644
--- a/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td
+++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td
@@ -148,6 +148,9 @@ def TTNN_TTNNLayoutAttr: TTNN_Attr<"TTNNLayout", "ttnn_layout"> {
       bool hasShardedTensorMemoryLayout() const;
       bool hasShardedL1TensorMemoryLayout() const;
       bool hasInterleavedL1TensorMemoryLayout() const;
+      bool hasInterleavedDRAMTensorMemoryLayout() const;
+      bool hasL1BufferType() const;
+      bool hasDRAMBufferType() const;
       bool isTiled() const;
       Layout getLayout() const;
       Type getElementType() const;
diff --git a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h
index 636d5f623..58206039b 100644
--- a/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h
+++ b/include/ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h
@@ -5,9 +5,8 @@
 #ifndef TTMLIR_DIALECT_TTNN_PIPELINES_TTNNPIPELINES_H
 #define TTMLIR_DIALECT_TTNN_PIPELINES_TTNNPIPELINES_H
 
-#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h"
+#include "ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h"
 #include "ttmlir/Dialect/TTNN/Utils/PassOverrides.h"
-#include "ttmlir/Dialect/TTNN/Utils/Utils.h"
 
 #include "mlir/Pass/PassOptions.h"
 
diff --git a/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h b/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h
similarity index 88%
rename from include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h
rename to include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h
index 4a44e883d..5275e2340 100644
--- a/include/ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h
+++ b/include/ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h
@@ -2,8 +2,8 @@
 //
 // SPDX-License-Identifier: Apache-2.0
 
-#ifndef TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSISPARAMS_H
-#define TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSISPARAMS_H
+#ifndef TTMLIR_DIALECT_TTNN_UTILS_MEMORYLAYOUTANALYSISPARAMS_H
+#define TTMLIR_DIALECT_TTNN_UTILS_MEMORYLAYOUTANALYSISPARAMS_H
 
 #include <llvm/ADT/StringSwitch.h>
 #include <llvm/Support/CommandLine.h>
@@ -49,4 +49,4 @@ struct MemoryLayoutAnalysisPolicyTypeParser
 
 } // namespace mlir::tt
 
-#endif // TTMLIR_DIALECT_TT_UTILS_MEMORYLAYOUTANALYSISPARAMS_H
+#endif // TTMLIR_DIALECT_TTNN_UTILS_MEMORYLAYOUTANALYSISPARAMS_H
diff --git a/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h b/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h
index c474106e3..eccc62f26 100644
--- a/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h
+++ b/include/ttmlir/Dialect/TTNN/Utils/OptimizerOverrides.h
@@ -5,8 +5,8 @@
 #ifndef TTMLIR_DIALECT_TTNN_UTILS_OPTIMIZEROVERRIDES_H
 #define TTMLIR_DIALECT_TTNN_UTILS_OPTIMIZEROVERRIDES_H
 
-#include "ttmlir/Dialect/TT/Utils/MemoryLayoutAnalysisParams.h"
 #include "ttmlir/Dialect/TTNN/Pipelines/TTNNPipelines.h"
+#include "ttmlir/Dialect/TTNN/Utils/MemoryLayoutAnalysisParams.h"
 #include "ttmlir/Dialect/TTNN/Utils/PassOverrides.h"
 
 namespace mlir::tt::ttnn {
diff --git a/include/ttmlir/Scheduler/Scheduler.h b/include/ttmlir/Scheduler/Scheduler.h
index 817271fdc..5d4116331 100644
--- a/include/ttmlir/Scheduler/Scheduler.h
+++ b/include/ttmlir/Scheduler/Scheduler.h
@@ -23,6 +23,10 @@ class Scheduler {
   // Method to get the next set of schedulable operations
   llvm::SmallVector<mlir::Operation *> getScheduleableOps();
 
+  // Method to check if an operation is either a TTIR op or a
+  // TTNN scheduleable op.
+  bool isTTShedulableOp(mlir::Operation *op);
+
   // Method to check if an operation can be scheduled
   bool canSchedule(mlir::Operation *op);
 
diff --git a/lib/Dialect/TTNN/Analysis/CMakeLists.txt b/lib/Dialect/TTNN/Analysis/CMakeLists.txt
index 996064d79..640702f71 100644
--- a/lib/Dialect/TTNN/Analysis/CMakeLists.txt
+++ b/lib/Dialect/TTNN/Analysis/CMakeLists.txt
@@ -15,6 +15,6 @@ add_mlir_dialect_library(MLIRTTNNAnalysis
         MLIRTTNNPassesIncGen
         MLIRTTOpsIncGen
 
-        LINK_LIBS
+        LINK_LIBS PUBLIC
         MLIRScheduler
         )
diff --git a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
index c0b3ff102..23c1b306a 100644
--- a/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
+++ b/lib/Dialect/TTNN/Analysis/L1InterleavedPolicy.cpp
@@ -3,19 +3,23 @@
 // SPDX-License-Identifier: Apache-2.0
 
 #include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
-#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"
-#include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
+#include "ttmlir/Dialect/TTNN/Analysis/L1ChainConfig.h"
 #include "ttmlir/Scheduler/Scheduler.h"
 
 namespace mlir::tt::ttnn {
 
-uint64_t getOpOutputLayoutUsage(
-    Operation *op,
-    llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>> &legalLayouts,
-    DeviceAttr &deviceAttr) {
-  TTNNLayoutAttr opLayout = legalLayouts.lookup(op).front();
-  assert(opLayout.hasInterleavedL1TensorMemoryLayout());
+uint64_t getOpOutputL1Usage(Operation *op, TTNNLayoutAttr opLayout,
+                            DeviceAttr &deviceAttr) {
+  // In case the opLayout is not in L1 memory space, L1 memory usage is 0.
+  //
+  if (opLayout.hasDRAMBufferType()) {
+    return 0;
+  }
 
+  // L1 memory usage of the ops without output tensors cannot be calculated.
+  // So far, this is only false for ttnn.get_device op.
+  //
+  assert(mlir::isa<RankedTensorType>(op->getResult(0).getType()));
   llvm::ArrayRef<int64_t> opOutputTensorShape =
       mlir::cast<RankedTensorType>(op->getResult(0).getType()).getShape();
 
@@ -24,132 +28,327 @@ uint64_t getOpOutputLayoutUsage(
   return opL1OutputUsage;
 }
 
-void L1InterleavedPolicy::run() {
-  rootOp->walk([&](func::FuncOp func) {
-    DeviceAttr deviceAttr = getCurrentScopeDevice(func);
-    mlir::tt::scheduler::Scheduler scheduler(&func);
-    llvm::SmallVector<mlir::Operation *> scheduleableOps;
-    llvm::DenseMap<Operation *, TTNNLayoutAttr> selectedOpLayout;
-    Operation *currentOp = nullptr;
+L1InterleavedPolicy::OpConfig L1InterleavedPolicy::getGreedyConfig(
+    Operation *baseOp, llvm::DenseMap<Operation *, L1Usage> &opsL1Usage) {
+  uint64_t numOfOps, bitIndex, currentMask;
+  uint64_t currentL1Usage, optimalL1Usage;
+  llvm::DenseMap<Operation *, TTNNLayoutAttr> optimalLayouts;
+  llvm::SmallVector<Operation *> optimalPrecedence;
+
+  constexpr uint64_t maxNumOfOps = sizeof(numOfOps) * 8;
+  numOfOps = opsL1Usage.size();
+  assert(numOfOps <= maxNumOfOps);
+
+  optimalL1Usage = 0;
+  for (currentMask = 0; currentMask < (1 << numOfOps); currentMask++) {
+    std::bitset<maxNumOfOps> bitset(currentMask);
+    llvm::DenseMap<Operation *, TTNNLayoutAttr> currentLayouts;
+    llvm::SmallVector<Operation *> currentPrecedence, optimalL1Precedence,
+        L1Precedence;
 
-    // TODO(fbajraktari): Add algorithm description. Currently, the algorithm
-    // is the same as for DFSharding policy, but works only for L1 interleaved.
+    // Calculate the L1 usage of the current configuration.
     //
-    l1ChainConfigs->push_back(L1ChainConfig());
-    while (scheduler.hasUnscheduledOps()) {
-      scheduleableOps = scheduler.getScheduleableOps();
+    currentL1Usage = 0;
+    bitIndex = 0;
+    for (const auto &[op, l1Usage] : opsL1Usage) {
+      if (bitset[bitIndex]) {
+        // In case we have an operand with L1 interleaved layout, we need to
+        // figure out its schedule among the other operands with L1 interleaved
+        // layout. Therefore, we insert all of them into the L1Precedence where
+        // calculate the optimal L1Precedence and then concatenate it with the
+        // currentPrecedence.
+        //
+        currentL1Usage += l1Usage.outputL1Usage;
+        currentLayouts[op] = getL1InterleavedLayout(op);
+
+        // Skip the baseOp.
+        //
+        if (baseOp != op) {
+          L1Precedence.push_back(op);
+        }
+      } else {
+        // It is optimal to first schedule all ops with DRAM output layout.
+        // Therefore, we can directly insert them into the
+        // currentOptimalPrecedence.
+        //
+        currentLayouts[op] = getDRAMLayout(op);
 
-      // Before starting a l1 chain, schedule layout/memory management ops
-      // first until they are exhausted from schedulable ops.
+        // Skip the baseOp.
+        //
+        if (baseOp != op) {
+          currentPrecedence.push_back(op);
+        }
+      }
+      bitIndex += 1;
+    }
+
+    // Calculate the optimal L1Precedence.
+    //
+    bool isMaskLegal = false;
+    uint64_t minRequiredL1Usage = getAvailableL1CacheSize();
+
+    std::sort(L1Precedence.begin(), L1Precedence.end());
+    do {
+      // Check if the current order of L1Precedence is legal.
       //
-      if (l1ChainConfigs->back().isEmpty()) {
-        for (auto *op : scheduleableOps) {
-          if (isa<ToLayoutOp>(op)) {
-            currentOp = op;
-            break;
-          }
+      bool isLegal = true;
+      uint64_t intermediateL1Usage = 0;
+      uint64_t intermediateRequiredL1Usage = 0;
+      for (Operation *op : L1Precedence) {
+        if (intermediateL1Usage + opsL1Usage[op].requiredL1Usage >
+            getAvailableL1CacheSize()) {
+          isLegal = false;
+          break;
         }
+
+        intermediateRequiredL1Usage =
+            std::max(intermediateRequiredL1Usage,
+                     intermediateL1Usage + opsL1Usage[op].requiredL1Usage);
+        intermediateL1Usage += opsL1Usage[op].outputL1Usage;
       }
 
-      if (currentOp == nullptr) {
-        currentOp = scheduleableOps[0];
+      // Pick optimal L1Precedence among all legal L1Precedence.
+      // The one that requires the least amount of L1 cache overall is
+      // considered optimal.
+      //
+      if (isLegal && intermediateRequiredL1Usage < minRequiredL1Usage) {
+        isMaskLegal = true;
+        minRequiredL1Usage = intermediateRequiredL1Usage;
+        optimalL1Precedence = L1Precedence;
       }
+    } while (std::next_permutation(L1Precedence.begin(), L1Precedence.end()));
+
+    if (isMaskLegal && optimalL1Usage < currentL1Usage &&
+        currentL1Usage <= getAvailableL1CacheSize()) {
 
-      // Schedule currentOp.
+      // Append the legal L1Precedence to the currentPrecedence and therefore
+      // create a complete precedence for the baseOp and currentMask.
       //
-      scheduler.scheduleOp(currentOp);
+      currentPrecedence.insert(currentPrecedence.end(),
+                               optimalL1Precedence.begin(),
+                               optimalL1Precedence.end());
 
-      // Skip starting sharding chain if currentOp is a memory management op.
+      // Update the optimal configuration.
       //
-      if (l1ChainConfigs->back().isEmpty() && isa<ToLayoutOp>(currentOp)) {
-        currentOp = nullptr;
-        continue;
-      }
+      optimalL1Usage = currentL1Usage;
+      optimalLayouts = std::move(currentLayouts);
+      optimalPrecedence = std::move(currentPrecedence);
+    }
+  }
 
-      if (scheduler.hasUnscheduledOps()) {
-        scheduleableOps = scheduler.getScheduleableOps();
+  // Create the optimal config.
+  //
+  OpConfig optimalConfig;
+  optimalConfig.baseOp = baseOp;
+  optimalConfig.layouts = std::move(optimalLayouts);
+  optimalConfig.precedence = std::move(optimalPrecedence);
 
-        // Check if currentOp has a valid successor.
+  return optimalConfig;
+}
+
+void L1InterleavedPolicy::run() {
+  for (Operation &funcOp : rootOp->getRegion(0).getOps()) {
+    func::FuncOp func = dyn_cast<func::FuncOp>(funcOp);
+    DeviceAttr deviceAttr = getCurrentScopeDevice(func);
+
+    // Start the policy.
+    //
+    llvm::DenseMap<Operation *, OpMemSpec> OpMemSpecMap;
+    mlir::tt::scheduler::Scheduler scheduler(&func);
+    llvm::SmallVector<Operation *> scheduleableOps;
+
+    while (scheduler.hasUnscheduledOps()) {
+      scheduleableOps = scheduler.getScheduleableOps();
+
+      for (Operation *op : scheduleableOps) {
+        // Schedule the op.
         //
-        Operation *nextOp = nullptr;
-        for (auto *op : scheduleableOps) {
-          for (auto operand : op->getOperands()) {
-            if (operand.getDefiningOp() == currentOp) {
-              nextOp = op;
-              break;
-            }
+        scheduler.scheduleOp(op);
+
+        // Find optimal configuration for the op.
+        //
+        llvm::DenseMap<Operation *, L1Usage> opsL1Usage;
+        llvm::SmallVector<Operation *> opsPrecedence;
+
+        // Generate optimal configuration for the current op based on the
+        // outputs of its operands and its legal output layouts.
+        //
+        if (isAnalyzable(op)) {
+
+          // Create the OpMemSpec.
+          //
+          OpMemSpec OpMemSpec;
+          assert(hasDRAMBufferType(op));
+          OpMemSpec.layout = getDRAMLayout(op);
+          OpMemSpec.requiredL1Usage = 0;
+          OpMemSpecMap[op] = OpMemSpec;
+
+          if (op->hasOneUse() && hasL1BufferType(op)) {
+            L1Usage l1Usage;
+            l1Usage.outputL1Usage =
+                getOpOutputL1Usage(op, getL1InterleavedLayout(op), deviceAttr);
+            l1Usage.requiredL1Usage = 0;
+            opsL1Usage[op] = l1Usage;
           }
         }
 
-        if (nextOp) {
+        for (auto operand : op->getOperands()) {
+          // Skip block arguments (%arg0, %arg1, ...)
+          //
+          if (::llvm::isa<mlir::BlockArgument>(operand)) {
+            continue;
+          }
 
-          // V1: Check that currentOp is not fork/join op.
+          Operation *operandOp = operand.getDefiningOp();
+
+          // Skip non-analyzable operands.
           //
-          bool validForL1Interleaved =
-              currentOp->hasOneUse() &&
-              legalLayouts.lookup(currentOp).size() > 0 &&
-              legalLayouts.lookup(nextOp).size() > 0;
-
-          if (validForL1Interleaved) {
-            // Figure out this const based on exec data, but will be replaced
-            // with API.
+          if (isAnalyzable(operandOp)) {
+            TTNNLayoutAttr operandOpLayout = OpMemSpecMap[operandOp].layout;
+
+            // Take into consideration only the operands with L1 interleaved
+            // memory space.
             //
-            constexpr float tensorL1UsageCap = 0.8;
-            uint64_t currentOpL1OutputUsage =
-                getOpOutputLayoutUsage(currentOp, legalLayouts, deviceAttr);
-            uint64_t nextOpL1OutputUsage =
-                getOpOutputLayoutUsage(nextOp, legalLayouts, deviceAttr);
-            bool l1UsageValid = (currentOpL1OutputUsage + nextOpL1OutputUsage) <
-                                tensorL1UsageCap * usableL1CacheSize;
-
-            if (l1UsageValid) {
-              selectedOpLayout[currentOp] =
-                  legalLayouts.lookup(currentOp).front();
-
-              // Add currentOp to l1 chain config.
-              //
-              OpL1MemSpec shardSpec;
-              shardSpec.op = currentOp;
-
-              // Hardcoded tensor split factor for now, until pipeline OP
-              // support is added.
-              //
-              shardSpec.tensorSplitFactor = 1;
-              l1ChainConfigs->back().addOpL1MemSpec(std::move(shardSpec));
-
-              // Update currentOp pointer.
-              //
-              currentOp = nextOp;
-              continue;
+            if (operandOpLayout.hasInterleavedL1TensorMemoryLayout()) {
+              L1Usage l1Usage;
+              l1Usage.outputL1Usage =
+                  getOpOutputL1Usage(operandOp, operandOpLayout, deviceAttr);
+              l1Usage.requiredL1Usage = OpMemSpecMap[operandOp].requiredL1Usage;
+              opsL1Usage[operandOp] = l1Usage;
+            }
+            // In case the operand has DRAM layout, we can insert it into the
+            // precedence directly. If the op is analyzable, it means that it
+            // is definitely schedulable.
+            //
+            else {
+              opsPrecedence.push_back(operandOp);
+            }
+          }
+          // In case the operand is not analyzable, i.e. there are no legal
+          // layouts for this operand, we can insert it into the precedence
+          // directly if it is schedulable since it does not use DRAM nor L1
+          // memory.
+          //
+          else {
+            if (scheduler.isTTShedulableOp(operandOp)) {
+              opsPrecedence.push_back(operandOp);
             }
           }
         }
 
-        currentOp = nullptr;
-        if (!l1ChainConfigs->back().isEmpty()) {
-          l1ChainConfigs->back().build();
-          l1ChainConfigs->push_back(L1ChainConfig());
+        // Greedily find the optimal configuration.
+        //
+        OpConfig optimalConfig = getGreedyConfig(op, opsL1Usage);
+        for (const auto &[op, layout] : optimalConfig.layouts) {
+          OpMemSpecMap[op].layout = layout;
+        }
+
+        // Override op's precedence.
+        //
+        opsPrecedence.insert(opsPrecedence.end(),
+                             optimalConfig.precedence.begin(),
+                             optimalConfig.precedence.end());
+        precedenceMap[op] = std::move(opsPrecedence);
+
+        // Update op's requiredL1Usage if the op is analyzable.
+        //
+        if (isAnalyzable(op)) {
+          uint64_t intermediateRequiredL1Usage = 0;
+          uint64_t intermediateL1Usage = 0;
+          for (auto operand : op->getOperands()) {
+            // Skip block arguments (%arg0, %arg1, ...)
+            //
+            if (::llvm::isa<mlir::BlockArgument>(operand)) {
+              continue;
+            }
+
+            Operation *operandOp = operand.getDefiningOp();
+
+            // Skip non-analyzable operands.
+            //
+            if (isAnalyzable(operandOp)) {
+              intermediateRequiredL1Usage =
+                  std::max(intermediateRequiredL1Usage,
+                           intermediateL1Usage +
+                               OpMemSpecMap[operandOp].requiredL1Usage);
+              intermediateL1Usage += getOpOutputL1Usage(
+                  operandOp, OpMemSpecMap[operandOp].layout, deviceAttr);
+            }
+          }
+          OpMemSpecMap[op].requiredL1Usage = std::max(
+              intermediateRequiredL1Usage,
+              intermediateL1Usage +
+                  getOpOutputL1Usage(op, OpMemSpecMap[op].layout, deviceAttr));
         }
       }
     }
 
-    if (l1ChainConfigs->back().isEmpty()) {
-      l1ChainConfigs->pop_back();
-    }
+    // Construct the schedule.
+    //
+    constructSchedule(func);
 
-    // Schedule
+    // Build, Resolve and Complete the L1 chain.
+    // This implementation is only here unitl we are able to merge
+    // L1ChainConfigs.
+    // TODO(fbajraktari): Fix this hack.
     //
-    (*schedule)[func] = scheduler.getSchedule();
+    l1ChainConfigs->push_back(L1ChainConfig());
+    llvm::DenseMap<Operation *, TTNNLayoutAttr> selectedOpLayout;
+    for (auto &OpMemSpec : OpMemSpecMap) {
+      OpL1MemSpec opL1MemSpec;
+      opL1MemSpec.op = OpMemSpec.first;
+      opL1MemSpec.tensorSplitFactor = 1;
+      selectedOpLayout[OpMemSpec.first] = OpMemSpec.second.layout;
+      l1ChainConfigs->back().addOpL1MemSpec(opL1MemSpec);
+    }
+    l1ChainConfigs->back().build();
+    l1ChainConfigs->back().resolve();
+    std::unordered_set<Edge> memReconfigEdges;
+    l1ChainConfigs->back().complete(selectedOpLayout, memReconfigEdges);
+  }
+}
 
-    // Resolve l1 chain configs.
+bool L1InterleavedPolicy::isAnalyzable(Operation *op) {
+  // Skip operations that are not analyzed by the LegalGridAnalysis.
+  //
+  if (legalLayouts.count(op) > 0) {
+    // Skip operations that are filterd out by the MemoryLayoutAnalysis.
     //
-    for (auto &l1ChainConfig : *l1ChainConfigs) {
-      l1ChainConfig.resolve();
+    return legalLayouts[op].size() > 0;
+  }
+  return false;
+}
 
-      std::unordered_set<Edge> memReconfigEdges;
-      l1ChainConfig.complete(selectedOpLayout, memReconfigEdges);
-    }
-  });
+bool L1InterleavedPolicy::hasDRAMBufferType(Operation *op) {
+  return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
+                      [](TTNNLayoutAttr layout) {
+                        return layout.hasDRAMBufferType();
+                      }) != legalLayouts[op].end();
+}
+
+TTNNLayoutAttr L1InterleavedPolicy::getDRAMLayout(Operation *op) {
+  assert(hasDRAMBufferType(op));
+  auto dramLayoutIter = std::find_if(
+      legalLayouts[op].begin(), legalLayouts[op].end(),
+      [](TTNNLayoutAttr layout) { return layout.hasDRAMBufferType(); });
+  return *dramLayoutIter;
+}
+
+bool L1InterleavedPolicy::hasL1BufferType(Operation *op) {
+  return std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
+                      [](TTNNLayoutAttr layout) {
+                        return layout.hasInterleavedL1TensorMemoryLayout();
+                      }) != legalLayouts[op].end();
+}
+
+TTNNLayoutAttr L1InterleavedPolicy::getL1InterleavedLayout(Operation *op) {
+  assert(hasL1BufferType(op));
+  auto l1InterleaveLayoutIter =
+      std::find_if(legalLayouts[op].begin(), legalLayouts[op].end(),
+                   [](TTNNLayoutAttr layout) {
+                     return layout.hasInterleavedL1TensorMemoryLayout();
+                   });
+  return *l1InterleaveLayoutIter;
 }
 
 } // namespace mlir::tt::ttnn
diff --git a/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp b/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp
index b01f4cf38..9bbbccf5e 100644
--- a/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp
+++ b/lib/Dialect/TTNN/Analysis/LegalGridAnalysis.cpp
@@ -115,6 +115,14 @@ void LegalGridAnalysis::analysisImplementation() {
     return;
   }
 
+  if (!isa<RankedTensorType>(op->getResult(0).getType())) {
+    return;
+  }
+
+  if (llvm::isa<ttnn::EmptyOp>(op)) {
+    return;
+  }
+
   // Get output tensor type.
   RankedTensorType tensorType =
       mlir::cast<RankedTensorType>(op->getResult(0).getType());
diff --git a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
index a89c5842b..f3db4ed7b 100644
--- a/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
+++ b/lib/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.cpp
@@ -5,6 +5,7 @@
 #include "ttmlir/Dialect/TTNN/Analysis/MemoryLayoutAnalysis.h"
 #include "ttmlir/Dialect/TTNN/Analysis/DFShardingPolicy.h"
 #include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
+#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h"
 
 namespace mlir::tt::ttnn {
 
@@ -35,14 +36,15 @@ filterShardedOnly(const llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
 }
 
 llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
-filterL1InterleavedOnly(
+filterDRAMAndL1Interleaved(
     const llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>>
         &legalLayouts) {
   llvm::DenseMap<Operation *, std::vector<TTNNLayoutAttr>> l1InterleavedLayouts;
   for (const auto &opLayouts : legalLayouts) {
     std::vector<TTNNLayoutAttr> opL1InterleavedLayouts;
     for (const auto &layout : opLayouts.second) {
-      if (layout.hasInterleavedL1TensorMemoryLayout()) {
+      if (layout.hasDRAMBufferType() ||
+          layout.hasInterleavedL1TensorMemoryLayout()) {
         opL1InterleavedLayouts.push_back(layout);
       }
     }
@@ -68,7 +70,8 @@ void MemoryLayoutAnalysis::analysisImplementation() {
   }
   case MemoryLayoutAnalysisPolicyType::L1Interleaved: {
     L1InterleavedPolicy l1InterleavedPolicy(
-        op, l1ChainConfigs, filterL1InterleavedOnly(analysisInput.legalLayouts),
+        op, l1ChainConfigs,
+        filterDRAMAndL1Interleaved(analysisInput.legalLayouts),
         analysisResult.schedule, analysisInput.usableL1CacheSize);
     l1InterleavedPolicy.run();
     break;
diff --git a/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp b/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp
index 8aaae1261..10b54f418 100644
--- a/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp
+++ b/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp
@@ -24,6 +24,11 @@ inline bool isDeviceBufferType(BufferType bufferType) {
   return bufferType == BufferType::DRAM || bufferType == BufferType::L1;
 }
 
+// Check if tensor is in DRAM memory
+inline bool isDRAMBufferType(BufferType bufferType) {
+  return bufferType == BufferType::DRAM;
+}
+
 // Check if tensor is in L1 memory
 inline bool isL1BufferType(BufferType bufferType) {
   return bufferType == BufferType::L1;
@@ -39,6 +44,16 @@ Layout TTNNLayoutAttr::getLayout() const {
   return isTiled() ? Layout::Tile : Layout::RowMajor;
 }
 
+// Check if the tensor memory buffer type is L1
+bool TTNNLayoutAttr::hasL1BufferType() const {
+  return isL1BufferType(getBufferType());
+}
+
+// Check if the tensor memory buffer type is DRAM
+bool TTNNLayoutAttr::hasDRAMBufferType() const {
+  return isDRAMBufferType(getBufferType());
+}
+
 // Check if the tensor memory layout is sharded
 bool TTNNLayoutAttr::hasShardedTensorMemoryLayout() const {
   return (getMemLayout() == TensorMemoryLayout::HeightSharded ||
@@ -48,7 +63,7 @@ bool TTNNLayoutAttr::hasShardedTensorMemoryLayout() const {
 
 // Check if the tensor memory layout is sharded in L1 memory
 bool TTNNLayoutAttr::hasShardedL1TensorMemoryLayout() const {
-  return isL1BufferType(getBufferType()) &&
+  return hasL1BufferType() &&
          (getMemLayout() == TensorMemoryLayout::HeightSharded ||
           getMemLayout() == TensorMemoryLayout::WidthSharded ||
           getMemLayout() == TensorMemoryLayout::BlockSharded);
@@ -56,7 +71,13 @@ bool TTNNLayoutAttr::hasShardedL1TensorMemoryLayout() const {
 
 // Check if the tensor memory layout is interleaved and in L1 memory
 bool TTNNLayoutAttr::hasInterleavedL1TensorMemoryLayout() const {
-  return isL1BufferType(getBufferType()) &&
+  return hasL1BufferType() &&
+         (getMemLayout() == TensorMemoryLayout::Interleaved);
+}
+
+// Check if the tensor memory layout is interleaved and in DRAM memory
+bool TTNNLayoutAttr::hasInterleavedDRAMTensorMemoryLayout() const {
+  return hasDRAMBufferType() &&
          (getMemLayout() == TensorMemoryLayout::Interleaved);
 }
 
diff --git a/lib/Dialect/TTNN/Transforms/Optimizer.cpp b/lib/Dialect/TTNN/Transforms/Optimizer.cpp
index e5d2f86d8..783f3ea07 100644
--- a/lib/Dialect/TTNN/Transforms/Optimizer.cpp
+++ b/lib/Dialect/TTNN/Transforms/Optimizer.cpp
@@ -170,6 +170,10 @@ class TTNNOptimizer : public impl::TTNNOptimizerBase<TTNNOptimizer> {
         return;
       }
 
+      if (llvm::isa<ttnn::EmptyOp>(op)) {
+        return;
+      }
+
       RankedTensorType tensorType =
           mlir::cast<RankedTensorType>(op->getResult(0).getType());
       LegalGridAnalysis legalGridAnalysis =
diff --git a/lib/Scheduler/Scheduler.cpp b/lib/Scheduler/Scheduler.cpp
index 25923fffd..52066c5e8 100644
--- a/lib/Scheduler/Scheduler.cpp
+++ b/lib/Scheduler/Scheduler.cpp
@@ -12,7 +12,8 @@
 
 namespace mlir::tt::scheduler {
 
-bool isTTNNOp(mlir::Operation *op) {
+// TTNN op is scheduleable if it is not an EmptyOp and has at least one result.
+bool isTTNNScheduleableOp(mlir::Operation *op) {
   return isa<ttnn::TTNNDialect>(op->getDialect()) && op->getNumResults() > 0 &&
          !llvm::isa<ttnn::EmptyOp>(op);
 }
@@ -21,8 +22,8 @@ bool isTTIROp(mlir::Operation *op) {
   return isa<ttir::TTIRDialect>(op->getDialect());
 }
 
-bool isTTShedulableOp(mlir::Operation *op) {
-  return isTTNNOp(op) || isTTIROp(op);
+bool Scheduler::isTTShedulableOp(mlir::Operation *op) {
+  return isTTNNScheduleableOp(op) || isTTIROp(op);
 }
 
 // Init the dependencies map of all ops which are TTIR ops
diff --git a/test/ttmlir/Silicon/TTNN/optimizer/all_l1_interleaved_policy.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir
similarity index 79%
rename from test/ttmlir/Silicon/TTNN/optimizer/all_l1_interleaved_policy.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir
index 6fa884d79..11eb41da1 100644
--- a/test/ttmlir/Silicon/TTNN/optimizer/all_l1_interleaved_policy.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/all_l1_interleaved_policy.mlir
@@ -1,30 +1,27 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
-// RUN: FileCheck %s --input-file=%t.mlir
-// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
 #any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 module attributes {} {
   func.func @forward(%arg0: tensor<64x128xbf16>, %arg1: tensor<128x96xbf16>, %arg2: tensor<64x96xbf16>, %arg3: tensor<96x32xbf16>, %arg4: tensor<64x32xbf16>) -> tensor<64x32xbf16> {
     // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
-    // CHECK: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
     // CHECK: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
-    // CHECK: #[[LAYOUT_8:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #dram>, interleaved>
+    // CHECK: #[[LAYOUT_10:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
     %0 = tensor.empty() : tensor<64x96xbf16>
-    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_7]]>
     %1 = "ttir.matmul"(%arg0, %arg1, %0) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x128xbf16>, tensor<128x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
     %2 = tensor.empty() : tensor<64x96xbf16>
-    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_7]]>
     %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x96xbf16>, tensor<64x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
     %4 = tensor.empty() : tensor<64x96xbf16>
-    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_6]]>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x96xbf16, #[[LAYOUT_7]]>
     %5 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x96xbf16>, tensor<64x96xbf16>) -> tensor<64x96xbf16>
     %6 = tensor.empty() : tensor<64x32xbf16>
-    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_10]]>
     %7 = "ttir.matmul"(%5, %arg3, %6) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x96xbf16>, tensor<96x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
     %8 = tensor.empty() : tensor<64x32xbf16>
-    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_7]]>
+    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_10]]>
     %9 = "ttir.add"(%7, %arg4, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x32xbf16>, tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
     %10 = tensor.empty() : tensor<64x32xbf16>
-    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_8]]>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_10]]>
     %11 = "ttir.relu"(%9, %10) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
     return %11 : tensor<64x32xbf16>
   }
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir
new file mode 100644
index 000000000..fef8cdd48
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/fork_join.mlir
@@ -0,0 +1,45 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//         A
+//         |
+//         B
+//       /   \
+//      C     D
+//      |     |
+//      |     E
+//       \   /
+//         F
+//         |
+//         G
+//
+// This tests two things:
+//   1. Output of op B (fork op) should be in DRAM.
+//   2. Even though both precedence [C, E] and [E, C] for op F are legal,
+//      the optimizer should choose the one with lower requiredL1Usage. In
+//      this case, [E, C] should be chosen.
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<64x64xbf16>, %arg1: tensor<64x32xbf16>) -> tensor<64x32xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK: #[[LAYOUT_3:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8>, memref<8x8xbf16, #dram>, interleaved>
+    // CHECK: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8>, memref<8x4xbf16, #l1_>, interleaved>
+    // CHECK: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <8x8>, memref<8x8xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<64x64xbf16>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x64xbf16, #[[LAYOUT_3]]>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x64xbf16>, tensor<64x64xbf16>) -> tensor<64x64xbf16>
+    %2 = tensor.empty() : tensor<64x64xbf16>
+    %3 = "ttir.relu"(%1, %2) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x64xbf16>, tensor<64x64xbf16>) -> tensor<64x64xbf16>
+    %4 = tensor.empty() : tensor<64x32xbf16>
+    %5 = "ttir.matmul"(%1, %arg1, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x64xbf16>, tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
+    %6 = tensor.empty() : tensor<64x32xbf16>
+    %7 = "ttir.relu"(%5, %6) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
+    %8 = tensor.empty() : tensor<64x32xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_5]]>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_5]]>
+    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<64x64xbf16, #[[LAYOUT_6]]>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<64x32xbf16, #[[LAYOUT_5]]>
+    %9 = "ttir.matmul"(%3, %7, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<64x64xbf16>, tensor<64x32xbf16>, tensor<64x32xbf16>) -> tensor<64x32xbf16>
+    return %9 : tensor<64x32xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/optimizer/mnist_l1_interleaved.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir
similarity index 88%
rename from test/ttmlir/Silicon/TTNN/optimizer/mnist_l1_interleaved.mlir
rename to test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir
index ef6fae268..93a19ad6e 100644
--- a/test/ttmlir/Silicon/TTNN/optimizer/mnist_l1_interleaved.mlir
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/mnist_l1_interleaved.mlir
@@ -1,13 +1,11 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
-// RUN: FileCheck %s --input-file=%t.mlir
-// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
 #any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 #loc = loc("MNISTLinear":4294967295:0)
 module @"tt-forge-graph" attributes {} {
   func.func @main(%arg0: tensor<1x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<1x10xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x10xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<1x256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<1x10xf32> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
     // CHECK: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
     // CHECK: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #l1_>, interleaved>
-    // CHECK: #[[LAYOUT_8:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<{{.*}}, #dram>, interleaved>
     %0 = tensor.empty() : tensor<1x256xf32> loc(#loc8)
     // CHECK: %[[C:.*]] = "ttnn.matmul"[[C:.*]] -> tensor<1x256xf32, #[[LAYOUT_6]]>
     %1 = "ttir.matmul"(%arg0, %arg4, %0) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x784xf32>, tensor<784x256xf32>, tensor<1x256xf32>) -> tensor<1x256xf32> loc(#loc8)
@@ -24,7 +22,7 @@ module @"tt-forge-graph" attributes {} {
     // CHECK: %[[C:.*]] = "ttnn.add"[[C:.*]] -> tensor<1x10xf32, #[[LAYOUT_7]]>
     %9 = "ttir.add"(%7, %arg1, %8) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x10xf32>, tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> loc(#loc12)
     %10 = tensor.empty() : tensor<1x10xf32> loc(#loc13)
-    // CHECK: %{{.*}} = "ttnn.softmax"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_8]]>
+    // CHECK: %{{.*}} = "ttnn.softmax"{{.*}} -> tensor<1x10xf32, #[[LAYOUT_7]]>
     %11 = "ttir.softmax"(%9, %10) <{dimension = 1 : si32, operand_constraints = [#any_device, #any_device]}> : (tensor<1x10xf32>, tensor<1x10xf32>) -> tensor<1x10xf32> loc(#loc13)
     return %11 : tensor<1x10xf32> loc(#loc7)
   } loc(#loc)
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
new file mode 100644
index 000000000..acbb8d674
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_ABC_l1_None.mlir
@@ -0,0 +1,28 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A > L1) AND (B > L1) AND (C > L1)
+//      =>
+//  DRAM: ABC; L1: None
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8192xbf16>, %arg2: tensor<8192x8192xbf16>, %arg3: tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> {
+    // CHECK-DAG: #[[LAYOUT_2:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<1024x1024xbf16, #dram>, interleaved>
+    %0 = tensor.empty() : tensor<8192x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
+    %2 = tensor.empty() : tensor<8192x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
+    %4 = tensor.empty() : tensor<8192x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
+    return %5 : tensor<8192x8192xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
new file mode 100644
index 000000000..49aebb6a4
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AB_l1_C.mlir
@@ -0,0 +1,31 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + C > L1) AND (B + C > L1) AND (A + B > L1) AND (A < C) AND (B < C) AND (C <= L1)
+//      =>
+//  DRAM: AB; L1: C
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<5120x4096xbf16>, %arg1: tensor<5120x4096xbf16>, %arg2: tensor<4096x5120xbf16>, %arg3: tensor<4096x5120xbf16>) -> tensor<5120x5120xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_4:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<512x640xbf16, #dram>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<640x512xbf16, #dram>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<640x640xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<5120x4096xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<5120x4096xbf16, #[[LAYOUT_6]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x4096xbf16>, tensor<5120x4096xbf16>, tensor<5120x4096xbf16>) -> tensor<5120x4096xbf16>
+    %2 = tensor.empty() : tensor<4096x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_4]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16>
+    %4 = tensor.empty() : tensor<5120x5120xbf16>
+    // CHECK: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_7]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x4096xbf16>, tensor<4096x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16>
+    return %5 : tensor<5120x5120xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
new file mode 100644
index 000000000..7f41675cd
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_AC_l1_B.mlir
@@ -0,0 +1,30 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + C > L1) AND (B + C > L1) AND (A + B > L1) AND (A < B) AND (C < B) AND (B <= L1)
+//      =>
+//  DRAM: AC; L1: B
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<4096x5120xbf16>, %arg1: tensor<4096x5120xbf16>, %arg2: tensor<5120x5120xbf16>, %arg3: tensor<5120x5120xbf16>) -> tensor<4096x5120xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_3:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<512x640xbf16, #dram>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<640x640xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<4096x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_3]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<4096x5120xbf16>, tensor<4096x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16>
+    %2 = tensor.empty() : tensor<5120x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_5]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16>
+    %4 = tensor.empty() : tensor<4096x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<4096x5120xbf16, #[[LAYOUT_3]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<4096x5120xbf16>, tensor<5120x5120xbf16>, tensor<4096x5120xbf16>) -> tensor<4096x5120xbf16>
+    return %5 : tensor<4096x5120xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
new file mode 100644
index 000000000..7d4c923b4
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_A_l1_BC.mlir
@@ -0,0 +1,30 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + B + C > L1) AND (A + C < B + C) AND (A + B < B + C) AND (B + C <= L1)
+//      =>
+//  DRAM: A; L1: BC
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<2048x2048xbf16>, %arg1: tensor<2048x2048xbf16>, %arg2: tensor<2048x8192xbf16>, %arg3: tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_3:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<256x256xbf16, #dram>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<256x1024xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<2048x2048xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x2048xbf16, #[[LAYOUT_3]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x2048xbf16>, tensor<2048x2048xbf16>, tensor<2048x2048xbf16>) -> tensor<2048x2048xbf16>
+    %2 = tensor.empty() : tensor<2048x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x8192xbf16, #[[LAYOUT_5]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x8192xbf16>, tensor<2048x8192xbf16>, tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16>
+    %4 = tensor.empty() : tensor<2048x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<2048x8192xbf16, #[[LAYOUT_5]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x2048xbf16>, tensor<2048x8192xbf16>, tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16>
+    return %5 : tensor<2048x8192xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
new file mode 100644
index 000000000..c915fadd1
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_BC_l1_A.mlir
@@ -0,0 +1,30 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + C > L1) AND (B + C > L1) AND (A + B > L1) AND (B < A) AND (C < A) AND (A <= L1)
+//      =>
+//  DRAM: BC; L1: A
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<5120x5120xbf16>, %arg1: tensor<5120x5120xbf16>, %arg2: tensor<5120x4096xbf16>, %arg3: tensor<5120x4096xbf16>) -> tensor<5120x4096xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_3:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<640x512xbf16, #dram>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<640x640xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<5120x5120xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<5120x5120xbf16, #[[LAYOUT_5]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16>
+    %2 = tensor.empty() : tensor<5120x4096xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<5120x4096xbf16, #[[LAYOUT_3]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x4096xbf16>, tensor<5120x4096xbf16>, tensor<5120x4096xbf16>) -> tensor<5120x4096xbf16>
+    %4 = tensor.empty() : tensor<5120x4096xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<5120x4096xbf16, #[[LAYOUT_3]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<5120x5120xbf16>, tensor<5120x4096xbf16>, tensor<5120x4096xbf16>) -> tensor<5120x4096xbf16>
+    return %5 : tensor<5120x4096xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
new file mode 100644
index 000000000..3d2538e24
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_B_l1_AC.mlir
@@ -0,0 +1,30 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + B + C > L1) AND (B + C < A + C) AND (A + B < A + C) AND (A + C <= L1)
+//      =>
+//  DRAM: B; L1: AC
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<8192x2048xbf16>, %arg1: tensor<8192x2048xbf16>, %arg2: tensor<2048x2048xbf16>, %arg3: tensor<2048x2048xbf16>) -> tensor<8192x2048xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_3:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<256x256xbf16, #dram>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_5:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<1024x256xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<8192x2048xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x2048xbf16, #[[LAYOUT_5]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x2048xbf16>, tensor<8192x2048xbf16>, tensor<8192x2048xbf16>) -> tensor<8192x2048xbf16>
+    %2 = tensor.empty() : tensor<2048x2048xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x2048xbf16, #[[LAYOUT_3]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x2048xbf16>, tensor<2048x2048xbf16>, tensor<2048x2048xbf16>) -> tensor<2048x2048xbf16>
+    %4 = tensor.empty() : tensor<8192x2048xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<8192x2048xbf16, #[[LAYOUT_5]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x2048xbf16>, tensor<2048x2048xbf16>, tensor<8192x2048xbf16>) -> tensor<8192x2048xbf16>
+    return %5 : tensor<8192x2048xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
new file mode 100644
index 000000000..320f00ce3
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_C_l1_AB.mlir
@@ -0,0 +1,31 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + B + C > L1) AND (A + C < A + B) AND (B + C < A + B) AND (A + B <= L1)
+//      =>
+//  DRAM: C; L1: AB
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<2048x8192xbf16>, %arg1: tensor<2048x8192xbf16>, %arg2: tensor<8192x2048xbf16>, %arg3: tensor<8192x2048xbf16>) -> tensor<2048x2048xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_4:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<256x1024xbf16, #l1_>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_6:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<1024x256xbf16, #l1_>, interleaved>
+    // CHECK-DAG: #[[LAYOUT_7:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<256x256xbf16, #dram>, interleaved>
+    %0 = tensor.empty() : tensor<2048x8192xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<2048x8192xbf16, #[[LAYOUT_4]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x8192xbf16>, tensor<2048x8192xbf16>, tensor<2048x8192xbf16>) -> tensor<2048x8192xbf16>
+    %2 = tensor.empty() : tensor<8192x2048xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x2048xbf16, #[[LAYOUT_6]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x2048xbf16>, tensor<8192x2048xbf16>, tensor<8192x2048xbf16>) -> tensor<8192x2048xbf16>
+    %4 = tensor.empty() : tensor<2048x2048xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.matmul"{{.*}} -> tensor<2048x2048xbf16, #[[LAYOUT_7]]>
+    %5 = "ttir.matmul"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<2048x8192xbf16>, tensor<8192x2048xbf16>, tensor<2048x2048xbf16>) -> tensor<2048x2048xbf16>
+    return %5 : tensor<2048x2048xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
new file mode 100644
index 000000000..a21a11f87
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/simple_join_tests/dram_None_l1_ABC.mlir
@@ -0,0 +1,29 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+//
+//       A     B
+//        \   /
+//          C
+//          |
+//          D
+//
+//  (A + B + C <= L1)
+//      =>
+//  DRAM: None; L1: ABC
+//
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<32x32xbf16>, %arg1: tensor<32x32xbf16>, %arg2: tensor<32x32xbf16>, %arg3: tensor<32x32xbf16>) -> tensor<32x32xbf16> {
+    // CHECK: #[[L1_:.*]] = #ttnn.buffer_type<l1>
+    // CHECK-DAG: #[[LAYOUT_2:.*]] = #ttnn.ttnn_layout<(d0, d1) -> (d0, d1), <{{.*}}>, memref<4x4xbf16, #l1_>, interleaved>
+    %0 = tensor.empty() : tensor<32x32xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xbf16, #[[LAYOUT_2]]>
+    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16>
+    %2 = tensor.empty() : tensor<32x32xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xbf16, #[[LAYOUT_2]]>
+    %3 = "ttir.add"(%arg2, %arg3, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16>
+    %4 = tensor.empty() : tensor<32x32xbf16>
+    // CHECK-DAG: %{{.*}} = "ttnn.add"{{.*}} -> tensor<32x32xbf16, #[[LAYOUT_2]]>
+    %5 = "ttir.add"(%1, %3, %4) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<32x32xbf16>, tensor<32x32xbf16>, tensor<32x32xbf16>) -> tensor<32x32xbf16>
+    return %5 : tensor<32x32xbf16>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir
new file mode 100644
index 000000000..482079993
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/optimizer/l1_interleaved_policy/single_op.mlir
@@ -0,0 +1,10 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s | FileCheck %s
+// UNSUPPORTED: true
+#any_device_tile = #tt.operand_constraint<dram|l1|tile|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16> {
+    %0 = tensor.empty() : tensor<5120x5120xbf16>
+    %1 = "ttir.relu"(%arg0, %0) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device_tile, #any_device_tile]}> : (tensor<5120x5120xbf16>, tensor<5120x5120xbf16>) -> tensor<5120x5120xbf16>
+    return %1 : tensor<5120x5120xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/optimizer/large_tensors.mlir b/test/ttmlir/Silicon/TTNN/optimizer/large_tensors.mlir
deleted file mode 100644
index fb71dae8d..000000000
--- a/test/ttmlir/Silicon/TTNN/optimizer/large_tensors.mlir
+++ /dev/null
@@ -1,19 +0,0 @@
-// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path% enable-optimizer=true memory-layout-analysis-enabled=true memory-layout-analysis-policy=L1Interleaved" %s > %t.mlir
-// RUN: FileCheck %s --input-file=%t.mlir
-// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
-#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
-module attributes {} {
-  func.func @forward(%arg0: tensor<8192x8192xbf16>, %arg1: tensor<8192x8192xbf16>, %arg2: tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16> {
-    // CHECK: #[[LAYOUT_2:ttnn_layout2]] = #ttnn.ttnn_layout<{{.*}}, memref<{{.*}}, #dram>, {{.*}}>
-    %0 = tensor.empty() : tensor<8192x8192xbf16>
-    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
-    %1 = "ttir.add"(%arg0, %arg1, %0) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
-    %2 = tensor.empty() : tensor<8192x8192xbf16>
-    // CHECK: %{{.*}} = "ttnn.add"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
-    %3 = "ttir.add"(%1, %arg2, %2) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
-    %4 = tensor.empty() : tensor<8192x8192xbf16>
-    // CHECK: %{{.*}} = "ttnn.relu"{{.*}} -> tensor<8192x8192xbf16, #[[LAYOUT_2]]>
-    %7 = "ttir.relu"(%3, %4) <{operandSegmentSizes = array<i32: 1, 1>, operand_constraints = [#any_device, #any_device]}> : (tensor<8192x8192xbf16>, tensor<8192x8192xbf16>) -> tensor<8192x8192xbf16>
-    return %7 : tensor<8192x8192xbf16>
-  }
-}
diff --git a/test/unittests/Optimizer/CMakeLists.txt b/test/unittests/Optimizer/CMakeLists.txt
index 4e6ee799a..b05c8ae29 100644
--- a/test/unittests/Optimizer/CMakeLists.txt
+++ b/test/unittests/Optimizer/CMakeLists.txt
@@ -1,11 +1,13 @@
 add_mlir_unittest(OptimizerTests
     TestShardSolver.cpp
     TestOptimizerOverrides.cpp
+    TestL1InterleavedPolicy.cpp
 )
 
 target_link_libraries(OptimizerTests
     PRIVATE
     MLIR
     MLIRTTDialect
+    MLIRTTNNAnalysis
     MLIRTTNNPipelines
 )
diff --git a/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp b/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp
new file mode 100644
index 000000000..7d02cef56
--- /dev/null
+++ b/test/unittests/Optimizer/TestL1InterleavedPolicy.cpp
@@ -0,0 +1,193 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include <gtest/gtest.h>
+
+#include "mlir/IR/Value.h"
+#include "mlir/IR/ValueRange.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include "mlir/Dialect/Func/IR/FuncOps.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinOps.h"
+#include "mlir/IR/MLIRContext.h"
+
+#include "ttmlir/Dialect/TTNN/IR/TTNN.h"
+#include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"
+
+#include "ttmlir/Dialect/TTNN/Analysis/L1InterleavedPolicy.h"
+
+using namespace mlir::tt::ttnn;
+
+constexpr int TensorDimX = 128;
+constexpr int TensorDimY = 128;
+
+class L1InterleavedPolicyBase : public ::testing::Test {
+public:
+  mlir::MLIRContext context;
+  mlir::OwningOpRef<mlir::ModuleOp> module;
+  mlir::OpBuilder builder = mlir::OpBuilder(&context);
+  mlir::func::FuncOp func;
+  mlir::tt::DeviceAttr deviceAttr;
+
+  using OpMemSpec = L1InterleavedPolicy::OpMemSpec;
+  using OpConfig = L1InterleavedPolicy::OpConfig;
+  using L1Usage = L1InterleavedPolicy::L1Usage;
+
+  void SetUp() override {
+    context.loadDialect<TTNNDialect>();
+    module = mlir::ModuleOp::create(builder.getUnknownLoc());
+    builder.setInsertionPointToStart(&module->getBodyRegion().front());
+    createFuncOp();
+    deviceAttr = mlir::tt::getCurrentScopeDevice(func);
+  }
+
+  llvm::SmallVector<int64_t, 2> getTensorShape() {
+    return {TensorDimX, TensorDimY};
+  }
+
+  mlir::RankedTensorType getTensorRankedType() {
+    return mlir::RankedTensorType::get(getTensorShape(), builder.getF32Type());
+  }
+
+  mlir::Value createEmptyTensor() {
+    ShapeAttr shapeAttr = ShapeAttr::get(&context, getTensorShape());
+    return builder.create<EmptyOp>(builder.getUnknownLoc(),
+                                   getTensorRankedType(), nullptr, shapeAttr,
+                                   nullptr, nullptr, nullptr);
+  }
+
+  mlir::func::FuncOp createFuncOp() {
+    mlir::SmallVector<mlir::Type> input;
+    input.push_back(getTensorRankedType());
+
+    mlir::SmallVector<mlir::Type> output;
+    output.push_back(getTensorRankedType());
+
+    auto funcType = builder.getType<mlir::FunctionType>(
+        mlir::TypeRange(input), mlir::TypeRange(output));
+    func = builder.create<mlir::func::FuncOp>(builder.getUnknownLoc(), "test",
+                                              funcType);
+
+    mlir::Block *block = func.addEntryBlock();
+    block->addArgument(getTensorRankedType(), builder.getUnknownLoc());
+    block->addArgument(getTensorRankedType(), builder.getUnknownLoc());
+
+    builder.setInsertionPointToStart(block);
+
+    return func;
+  }
+
+  void addLayoutForOp(mlir::Operation *op,
+                      llvm::DenseMap<mlir::Operation *,
+                                     std::vector<TTNNLayoutAttr>> &legalLayouts,
+                      BufferType memorySpace,
+                      TensorMemoryLayout tensorMemoryLayout) {
+    if (legalLayouts.find(op) == legalLayouts.end()) {
+      legalLayouts[op] = std::vector<TTNNLayoutAttr>{TTNNLayoutAttr::get(
+          &context, getTensorRankedType().getShape(), builder.getF32Type(),
+          memorySpace, mlir::tt::GridAttr::get(&context, {8, 8}),
+          tensorMemoryLayout)};
+    } else {
+      legalLayouts[op].push_back(TTNNLayoutAttr::get(
+          &context, getTensorRankedType().getShape(), builder.getF32Type(),
+          memorySpace, mlir::tt::GridAttr::get(&context, {8, 8}),
+          tensorMemoryLayout));
+    }
+  }
+
+  void prepareOpForGreedyConfigPicker(
+      mlir::Operation *op, uint64_t outputL1Usage, uint64_t requiredL1Usage,
+      llvm::DenseMap<mlir::Operation *, std::vector<TTNNLayoutAttr>>
+          &legalLayouts,
+      llvm::DenseMap<mlir::Operation *, L1Usage> &opsL1Usage) {
+
+    // Add two legal layouts for the op with different buffer
+    // types: DRAM and L1.
+    addLayoutForOp(op, legalLayouts, BufferType::DRAM,
+                   TensorMemoryLayout::Interleaved);
+    addLayoutForOp(op, legalLayouts, BufferType::L1,
+                   TensorMemoryLayout::Interleaved);
+
+    L1Usage l1Usage;
+    l1Usage.outputL1Usage = outputL1Usage;
+    l1Usage.requiredL1Usage = requiredL1Usage;
+    opsL1Usage[op] = l1Usage;
+  }
+
+  void TearDown() override {}
+};
+
+TEST_F(L1InterleavedPolicyBase, VerifyGreedyPolicy) {
+  std::vector<L1ChainConfig> l1ChainConfigs;
+  llvm::DenseMap<mlir::Operation *, std::vector<TTNNLayoutAttr>> legalLayouts;
+  llvm::DenseMap<mlir::func::FuncOp, llvm::SmallVector<mlir::Operation *>>
+      schedule;
+  llvm::DenseMap<mlir::Operation *, L1Usage> opsL1Usage;
+  constexpr uint64_t usableL1CacheSize = 15;
+
+  // Create operand A
+  mlir::Value dest = createEmptyTensor();
+  mlir::Value lhs = func.getBody().getBlocks().front().getArgument(0);
+  mlir::Value rhs = func.getBody().getBlocks().front().getArgument(1);
+  mlir::Operation *opA =
+      builder.create<AddOp>(builder.getUnknownLoc(), lhs, rhs, dest);
+  uint64_t outputL1Usage = 2;
+  uint64_t requiredL1Usage = 8;
+  prepareOpForGreedyConfigPicker(opA, outputL1Usage, requiredL1Usage,
+                                 legalLayouts, opsL1Usage);
+
+  // Create operand B
+  dest = createEmptyTensor();
+  lhs = func.getBody().getBlocks().front().getArgument(0);
+  rhs = func.getBody().getBlocks().front().getArgument(1);
+  mlir::Operation *opB =
+      builder.create<AddOp>(builder.getUnknownLoc(), lhs, rhs, dest);
+  outputL1Usage = 3;
+  requiredL1Usage = 7;
+  prepareOpForGreedyConfigPicker(opB, outputL1Usage, requiredL1Usage,
+                                 legalLayouts, opsL1Usage);
+
+  // Create operand C
+  dest = createEmptyTensor();
+  lhs = func.getBody().getBlocks().front().getArgument(0);
+  rhs = func.getBody().getBlocks().front().getArgument(1);
+  mlir::Operation *opC =
+      builder.create<AddOp>(builder.getUnknownLoc(), lhs, rhs, dest);
+  outputL1Usage = 1;
+  requiredL1Usage = 9;
+  prepareOpForGreedyConfigPicker(opC, outputL1Usage, requiredL1Usage,
+                                 legalLayouts, opsL1Usage);
+
+  // Create base op D
+  dest = createEmptyTensor();
+  lhs = func.getBody().getBlocks().front().getArgument(0);
+  rhs = func.getBody().getBlocks().front().getArgument(1);
+  mlir::Operation *opD =
+      builder.create<AddOp>(builder.getUnknownLoc(), lhs, rhs, dest);
+  outputL1Usage = 4;
+  requiredL1Usage = 0;
+  prepareOpForGreedyConfigPicker(opD, outputL1Usage, requiredL1Usage,
+                                 legalLayouts, opsL1Usage);
+
+  // Run greedy config picker policy
+  L1InterleavedPolicy l1InterleavedPolicy(nullptr, l1ChainConfigs, legalLayouts,
+                                          schedule, usableL1CacheSize);
+  OpConfig greedyConfig = l1InterleavedPolicy.getGreedyConfig(opD, opsL1Usage);
+
+  // Sanity checks
+  ASSERT_TRUE(greedyConfig.baseOp == opD);
+  ASSERT_TRUE(greedyConfig.layouts.size() == 4);
+  ASSERT_TRUE(greedyConfig.precedence.size() == 3);
+
+  // All layouts should be using L1 buffer type
+  for (const auto &[op, layout] : greedyConfig.layouts) {
+    ASSERT_TRUE(layout.hasL1BufferType());
+  }
+
+  // Precedence order for op D should be: C, A, B
+  ASSERT_EQ(greedyConfig.precedence[0], opC);
+  ASSERT_EQ(greedyConfig.precedence[1], opA);
+  ASSERT_EQ(greedyConfig.precedence[2], opB);
+}