From edd50db9e5e00ba7ab837be4cc6d76d87b19091a Mon Sep 17 00:00:00 2001
From: Nicholas Smith <nsmith@tenstorrent.com>
Date: Fri, 22 Nov 2024 16:43:33 -0600
Subject: [PATCH] Rename tt.Layout to tt.MetalLayout

---
 docs/src/dialects-overview.md                 |  2 +-
 docs/src/specs/device.md                      |  6 +-
 docs/src/specs/tensor-layout.md               | 32 +++----
 include/ttmlir-c/TTAttrs.h                    |  6 +-
 include/ttmlir/Dialect/TT/IR/TTOpsTypes.td    | 30 +++----
 include/ttmlir/Dialect/TTIR/IR/TTIROps.td     |  4 +-
 .../ttmlir/Target/Utils/MLIRToFlatbuffer.h    | 11 +--
 lib/CAPI/TTAttrs.cpp                          | 16 ++--
 .../TTIRToTTMetal/TTIRToTTMetal.cpp           | 20 ++---
 lib/Dialect/TT/IR/TTDialect.cpp               |  6 +-
 lib/Dialect/TT/IR/TTOpsTypes.cpp              | 89 ++++++++++---------
 lib/Dialect/TTIR/IR/TTIROps.cpp               |  6 +-
 lib/Dialect/TTIR/Transforms/Allocate.cpp      |  4 +-
 lib/Dialect/TTIR/Transforms/Generic.cpp       | 12 +--
 lib/Dialect/TTIR/Transforms/Layout.cpp        | 45 +++++-----
 lib/Dialect/TTMetal/IR/TTMetalOps.cpp         |  8 +-
 lib/Target/TTMetal/TTMetalToFlatbuffer.cpp    | 20 ++---
 python/TTModule.cpp                           | 61 +++++++------
 test/python/tensor_layout.py                  | 26 +++---
 .../Dialect/TTIR/split_compound_layout.mlir   | 28 +++---
 test/ttmlir/Dialect/TTIR/test_allocate.mlir   |  2 +-
 .../ttmlir/Silicon/TTMetal/simple_reduce.mlir |  8 +-
 .../ttmlir/Silicon/TTMetal/tiled_reblock.mlir | 24 ++---
 test/ttmlir/Silicon/TTMetal/to_layout.mlir    | 18 ++--
 test/ttmlir/Silicon/TTNN/perf_unit/mnist.mlir |  4 +-
 .../tt_adapter/src/tt_adapter/mlir.py         |  2 +-
 26 files changed, 252 insertions(+), 238 deletions(-)
diff --git a/docs/src/dialects-overview.md b/docs/src/dialects-overview.md
index e886fb90c..0dbf5fbed 100644
--- a/docs/src/dialects-overview.md
+++ b/docs/src/dialects-overview.md
@@ -3,7 +3,7 @@
 Here is a brief overview of the dialects in the project, please refer to the
 individual dialect documentation for more details.:
 
-- `tt`: Common types such as, `tt.tile`, `tt.layout`, `tt.grid`, etc. and enums such as, data formats, memory spaces, iterator types etc.
+- `tt`: Common types such as, `tt.tile`, `tt.metal_layout`, `tt.grid`, etc. and enums such as, data formats, memory spaces, iterator types etc.
 - `ttir`: A high level dialect that models the tensor compute graph on tenstorrent devices. Accepts `tosa` and `linalg` input.
   - `ttir.generic`: Generically describe compute work.
   - `ttir.to_layout`: Convert between different tensor memory layouts and transfer between different memory spaces.
diff --git a/docs/src/specs/device.md b/docs/src/specs/device.md
index ae72fe638..64bc91cfa 100644
--- a/docs/src/specs/device.md
+++ b/docs/src/specs/device.md
@@ -135,7 +135,7 @@ the logical device grid:
 
 ```mlir
 tensor<16x3x64x128xf32,
-  #tt.layout<(d0, d1, d2, d3) -> (d0, d1 * 64 + d2, d3),
+  #tt.metal_layout<(d0, d1, d2, d3) -> (d0, d1 * 64 + d2, d3),
     undef,
     <2x2x4>,
     memref<8x3x1x!tt.tile<32 x 32, bfp_bf8>, #tt.memory_space<l1>>
@@ -170,7 +170,7 @@ the logical device grid:
 
 ```mlir
 tensor<256x1024xf32,
-  #tt.layout<(d0, d1) -> (d0, d1),
+  #tt.metal_layout<(d0, d1) -> (d0, d1),
     undef,
     <4x16>,
     memref<2x2x!tt.tile<32 x 32, bfp_bf8>, #tt.memory_space<l1>>
@@ -205,7 +205,7 @@ We can consider the following tensor to map onto this grid:
 
 ```mlir
 tensor<64x256x1024xf32,
-  #tt.layout<(d0, d1) -> (d0, d1),
+  #tt.metal_layout<(d0, d1) -> (d0, d1),
     undef,
     <2x4x16>,
     memref<32x2x2x!tt.tile<32 x 32, bfp_bf8>, #tt.memory_space<l1>>
diff --git a/docs/src/specs/tensor-layout.md b/docs/src/specs/tensor-layout.md
index d523f51ed..52c693189 100644
--- a/docs/src/specs/tensor-layout.md
+++ b/docs/src/specs/tensor-layout.md
@@ -33,7 +33,7 @@ been used by the TT dialect to encode the tensor's layout.  This looks like:
 
 ```mlir
 tensor<2x3x64x128xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3),
     undef,
     <1x1>,
@@ -76,7 +76,7 @@ topics:
 
 ### Dimension Collapsing
 
-Probably the most important concept in `tt.layout` is dimension collapsing.
+Probably the most important concept in `tt.metal_layout` is dimension collapsing.
 This is captured by the affine map `linear` property which provides a
 mapping from tensor dim space to a reduced physical dimensional space.  This
 single-handedly touches on most of the tensor layout goals mentioned at the
@@ -106,7 +106,7 @@ to get our remapped offset:
 This remapped offset `(262, 100)` corresponds to the row and column index of the
 collapsed physical memory.
 
-By default, the dim range `[0, -1)` is collapsed, but the `tt.layout` contructor
+By default, the dim range `[0, -1)` is collapsed, but the `tt.metal_layout` contructor
 can actually take a programmable range called `collapseIntervals`.
 `collapseIntervals` is a list of pairs, where each pair is a dim range interval,
 left inclusive, right exclusive. Let's consider a few examples:
@@ -137,7 +137,7 @@ Let's consider the original example again, but on a larger grid than `1x1`, say
 
 ```mlir
 tensor<2x3x64x128xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3),
     undef,
     <2x4>,
@@ -173,7 +173,7 @@ Here's a few more example mlir snippets:
 
 ```mlir
 tensor<8x300xf32,
-  #tt.layout<(d0, d1) -> (d0, d1),
+  #tt.metal_layout<(d0, d1) -> (d0, d1),
     undef,
     <1x2>,
     memref<8x150xf32, #tt.memory_space<l1>>
@@ -181,7 +181,7 @@ tensor<8x300xf32,
 >
 
 tensor<8x96x32xf32,
-  #tt.layout<(d0, d1, d2) -> (d0 * 96 + d1, d2),
+  #tt.metal_layout<(d0, d1, d2) -> (d0 * 96 + d1, d2),
     undef,
     <2x1>,
     memref<384x32xf32, #tt.memory_space<l1>>
@@ -189,7 +189,7 @@ tensor<8x96x32xf32,
 >
 
 tensor<8x96x32xf32,
-  #tt.layout<(d0, d1, d2) -> (d0 * 96 + d1, d1, d2),
+  #tt.metal_layout<(d0, d1, d2) -> (d0 * 96 + d1, d1, d2),
     undef,
     <2x1x2>,
     memref<384x96x16xf32, #tt.memory_space<l1>>
@@ -197,7 +197,7 @@ tensor<8x96x32xf32,
 >
 
 tensor<5x3x2x2x7x32x32xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1, d2, d3, d4, d5, d6)
       -> (d0 * 2688 + d1 * 896 + d2 * 448 + d3 * 224 + d4 * 32 + d5, d4, d5, d6),
     undef,
@@ -226,7 +226,7 @@ A tilized tensor is one with a memref that has a tile element type.
 Given some tensor with scalar layout:
 ```mlir
 tensor<3x64x128xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1, d2) -> (d0 * 64 + d1, d2),
     undef,
     <3x2>,
@@ -238,7 +238,7 @@ tensor<3x64x128xf32,
 After tilizing we'll have:
 ```mlir
 tensor<3x64x128xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1, d2) -> (d0 * 64 + d1, d2),
     undef,
     <3x2>,
@@ -256,7 +256,7 @@ intact.
 Padding can be a bit of an overloaded term, but in this context it refers to an
 out of bounds area in the physical memory allocation that has no real tensor
 data in it.  The contents of this area is tracked by `oob_val` and the padding
-area can be automatically derived from the attributes of `tt.layout`.
+area can be automatically derived from the attributes of `tt.metal_layout`.
 
 Padding is a necessary evil that arises when a tensor is not evenly divisible by
 a grid shape or tile shape.  It can also arise due to minimum Noc addressing
@@ -265,7 +265,7 @@ requirements.
 Example of non-divisible grid:
 ```mlir
 tensor<53x63xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1) -> (d0, d1),
     undef,
     <3x2>,
@@ -284,7 +284,7 @@ cores and 1 scalar column of padding on the last column of cores.
 Taking the above example a step further, we could tilize it:
 ```mlir
 tensor<53x63xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1) -> (d0, d1),
     undef,
     <3x2>,
@@ -308,7 +308,7 @@ stride between dimensions.  Consider tensor (w/ batch dim `2`):
 
 ```mlir
 tensor<2x8x32xf32,
-  #tt.layout<
+  #tt.metal_layout<
     (d0, d1, d2) -> (d0 * 8 + d1, d2),
     undef,
     <1x2>,
@@ -356,7 +356,7 @@ consider the following example with a 3d grid and `collapseIntervals=[(1, -1)]`.
 
 ```mlir
 tensor<2x3x64x128xf32,
-  #tt.layout<(d0, d1, d2, d3) -> (d0, d1 * 64 + d2, d3),
+  #tt.metal_layout<(d0, d1, d2, d3) -> (d0, d1 * 64 + d2, d3),
     undef,
     <2x2x4>,
     memref<1x3x1x!tt.tile<32 x 32, bfp_bf8>, #tt.memory_space<l1>>
@@ -387,7 +387,7 @@ under the same grid primitive that also divides tensor rows and columns.
 
 ## Concerns
 
-- `tt.layout` is deliberately flexible and tries to capture as many problematic
+- `tt.metal_layout` is deliberately flexible and tries to capture as many problematic
   use-cases we've ran into in the past in a single, succinct representation.
   This flexibility will need to be further constrained by backends to avoid
   unsupported programming of this attribute.
diff --git a/include/ttmlir-c/TTAttrs.h b/include/ttmlir-c/TTAttrs.h
index 2e164ac13..263cd1d8e 100644
--- a/include/ttmlir-c/TTAttrs.h
+++ b/include/ttmlir-c/TTAttrs.h
@@ -50,9 +50,9 @@ MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTSystemDescAttrGet(
     size_t chipCoordsSize, MlirAttribute *chipChannels,
     size_t chipChannelsSize);
 
-MLIR_CAPI_EXPORTED MlirAttribute
-ttmlirTTLayoutAttrGet(MlirContext ctx, MlirAffineMap linear, unsigned oobVal,
-                      MlirAttribute grid, MlirType memref, unsigned memLayout);
+MLIR_CAPI_EXPORTED MlirAttribute ttmlirTTMetalLayoutAttrGet(
+    MlirContext ctx, MlirAffineMap linear, unsigned oobVal, MlirAttribute grid,
+    MlirType memref, unsigned memLayout);
 
 MLIR_CAPI_EXPORTED MlirAttribute
 ttmlirTTMemorySpaceAttrGet(MlirContext ctx, uint32_t memorySpace);
diff --git a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
index 99caac0c2..d5dc22e28 100644
--- a/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
+++ b/include/ttmlir/Dialect/TT/IR/TTOpsTypes.td
@@ -214,7 +214,7 @@ def TT_SystemDescAttr : TT_Attr<"SystemDesc", "system_desc"> {
   }];
 }
 
-def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
+def TT_MetalLayoutAttr : TT_Attr<"MetalLayout", "metal_layout"> {
   let summary = "Tensor layout attribute";
   let description = [{
     The tensor layout attribute captures how tensor data is sharded across a grid of devices, cores, and
@@ -241,7 +241,7 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
     Examples:
     ```mlir
     tensor<8x300xf32,
-      #tt.layout<(d0, d1) -> (d0, d1),
+      #tt.metal_layout<(d0, d1) -> (d0, d1),
         undef,
         <1x2>,
         memref<8x150xf32, #tt.memory_space<l1>>
@@ -249,7 +249,7 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
     >
 
     tensor<8x96x32xf32,
-      #tt.layout<(d0, d1, d2) -> (d0 * 96 + d1, d2),
+      #tt.metal_layout<(d0, d1, d2) -> (d0 * 96 + d1, d2),
         undef,
         <2x1>,
         memref<384x32xf32, #tt.memory_space<l1>>
@@ -257,7 +257,7 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
     >
 
     tensor<8x96x32xf32,
-      #tt.layout<(d0, d1, d2) -> (d0 * 96 + d1, d1, d2),
+      #tt.metal_layout<(d0, d1, d2) -> (d0 * 96 + d1, d1, d2),
         undef,
         <2x1x2>,
         memref<384x96x16xf32, #tt.memory_space<l1>>
@@ -265,7 +265,7 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
     >
 
     tensor<5x3x2x2x7x32x32xf32,
-      #tt.layout<
+      #tt.metal_layout<
         (d0, d1, d2, d3, d4, d5, d6)
           -> (d0 * 2688 + d1 * 896 + d2 * 448 + d3 * 224 + d4 * 32 + d5, d4, d5, d6),
         undef,
@@ -284,7 +284,7 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
   let assemblyFormat = "`<` $linear`,` $oob_val`,` $grid`,` $memref (`,` $mem_layout^)? `>`";
 
   let extraClassDeclaration = [{
-      static LayoutAttr get(::mlir::MLIRContext *context,
+      static MetalLayoutAttr get(::mlir::MLIRContext *context,
                             ArrayRef<int64_t> tensorShape,
                             Type elementType,
                             MemorySpace memorySpace = MemorySpace::System,
@@ -292,28 +292,28 @@ def TT_LayoutAttr : TT_Attr<"Layout", "layout"> {
                             ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals = {{0, -1}},
                             OOBVal oobVal = OOBVal::Undef,
                             TensorMemoryLayout memLayout = TensorMemoryLayout::None);
-      static LayoutAttr get(::mlir::MLIRContext *context,
+      static MetalLayoutAttr get(::mlir::MLIRContext *context,
                             RankedTensorType ty,
                             MemorySpace memorySpace = MemorySpace::System,
                             GridAttr grid = {},
                             ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals = {{0, -1}},
                             OOBVal oobVal = OOBVal::Undef,
                             TensorMemoryLayout memLayout = TensorMemoryLayout::None);
-      static LayoutAttr get(::mlir::MLIRContext *context,
+      static MetalLayoutAttr get(::mlir::MLIRContext *context,
                             RankedTensorType ty,
                             MemorySpace memorySpace,
                             GridAttr grid,
                             Type elementType,
                             TensorMemoryLayout memLayout = TensorMemoryLayout::None);
-      LayoutAttr withGrid(::mlir::MLIRContext *context, ArrayRef<int64_t> tensorShape, GridAttr grid, ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals = {{0, -1}});
-      LayoutAttr withGrid(::mlir::MLIRContext *context,
+      MetalLayoutAttr withGrid(::mlir::MLIRContext *context, ArrayRef<int64_t> tensorShape, GridAttr grid, ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals = {{0, -1}});
+      MetalLayoutAttr withGrid(::mlir::MLIRContext *context,
                           RankedTensorType ty,
                           GridAttr grid,
                           ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals = {{0, -1}});
-      LayoutAttr withElementType(::mlir::MLIRContext *context, Type elementType);
-      LayoutAttr withMemorySpace(::mlir::MLIRContext *context, MemorySpace memorySpace);
-      LayoutAttr withMemoryLayout(::mlir::MLIRContext *context, TensorMemoryLayout memLayout);
-      LayoutAttr withShardShape(::mlir::MLIRContext *context, llvm::SmallVector<int64_t> shardShape);
+      MetalLayoutAttr withElementType(::mlir::MLIRContext *context, Type elementType);
+      MetalLayoutAttr withMemorySpace(::mlir::MLIRContext *context, MemorySpace memorySpace);
+      MetalLayoutAttr withMemoryLayout(::mlir::MLIRContext *context, TensorMemoryLayout memLayout);
+      MetalLayoutAttr withShardShape(::mlir::MLIRContext *context, llvm::SmallVector<int64_t> shardShape);
 
       uint64_t getMemrefSizeBytes() const;
       MemorySpace getMemorySpace() const;
@@ -400,7 +400,7 @@ def TT_DeviceAttr : TT_Attr<"Device", "device", []> {
       // - DeviceL1: This ends up being exactly the shard size
       // - DeviceDRAM: Is more nuanced because the whole tensor size gets paged and interleaved between all dram channels,
       //   due to paging and rounding the footprint ends up being close to: the_whole_tensor / num_dram_channels
-      uint64_t getLayoutSizeBytes(ArrayRef<int64_t> tensorShape, LayoutAttr layout, MemorySpace memorySpace) const;
+      uint64_t getLayoutSizeBytes(ArrayRef<int64_t> tensorShape, MetalLayoutAttr layout, MemorySpace memorySpace) const;
 
       // Returns the footprint size in bytes of the tensor distributed across the given memory space.
       // Forwards to getLayoutSizeBytes, see comment there for more info.
diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
index b3701b22e..6c0debce3 100644
--- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
+++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -114,8 +114,8 @@ def TTIR_ToLayoutOp : TTIR_Op<"to_layout", [DestinationStyleOpInterface, TTIROpI
         - Some combination of the above
 
       ```llvm
-      #layout = #tt.layout<8192x128x1, undef, <1x1>, memref<64x128xf32, #system>>
-      #layout1 = #tt.layout<8192x128x1, undef, <1x1>, memref<64x128xf32, #l1_>>
+      #layout = #tt.metal_layout<8192x128x1, undef, <1x1>, memref<64x128xf32, #system>>
+      #layout1 = #tt.metal_layout<8192x128x1, undef, <1x1>, memref<64x128xf32, #l1_>>
       %1 = "ttir.to_layout"(%arg0, %0) : (tensor<64x128xf32, #layout>, tensor<64x128xf32, #layout1>) -> tensor<64x128xf32, #layout1>
       ```
     }];
diff --git a/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h b/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h
index ac23b9bb0..d5be2bb97 100644
--- a/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h
+++ b/include/ttmlir/Target/Utils/MLIRToFlatbuffer.h
@@ -18,8 +18,9 @@
 namespace mlir::tt {
 
 flatbuffers::Offset<::tt::target::LayoutDesc>
-layoutAttrToFlatbuffer(FlatbufferObjectCache &cache, LayoutAttr attr,
-                       ArrayRef<int64_t> logicalShape, DeviceAttr deviceAttr);
+metalLayoutAttrToFlatbuffer(FlatbufferObjectCache &cache, MetalLayoutAttr attr,
+                            ArrayRef<int64_t> logicalShape,
+                            DeviceAttr deviceAttr);
 
 flatbuffers::Offset<::tt::target::LayoutDesc> ttnnLayoutAttrToFlatbuffer(
     FlatbufferObjectCache &cache, ttnn::TTNNLayoutAttr attr,
@@ -438,9 +439,9 @@ toFlatbuffer(FlatbufferObjectCache &cache, ElementsAttr elementsAttr) {
 inline flatbuffers::Offset<::tt::target::LayoutDesc>
 encodingToFlatbuffer(FlatbufferObjectCache &cache, Attribute attr,
                      ArrayRef<int64_t> logicalShape, DeviceAttr deviceAttr) {
-  if (isa<LayoutAttr>(attr)) {
-    return layoutAttrToFlatbuffer(cache, cast<LayoutAttr>(attr), logicalShape,
-                                  deviceAttr);
+  if (isa<MetalLayoutAttr>(attr)) {
+    return metalLayoutAttrToFlatbuffer(cache, cast<MetalLayoutAttr>(attr),
+                                       logicalShape, deviceAttr);
   }
 
   assert(isa<ttnn::TTNNLayoutAttr>(attr) && "unsupported layout attr");
diff --git a/lib/CAPI/TTAttrs.cpp b/lib/CAPI/TTAttrs.cpp
index 196dc09f4..c329f41d5 100644
--- a/lib/CAPI/TTAttrs.cpp
+++ b/lib/CAPI/TTAttrs.cpp
@@ -119,15 +119,15 @@ MlirAttribute ttmlirTTSystemDescAttrGet(
       chipCapabilitiesUnwrapped, chipCoordsUnwrapped, chipChannelsUnwrapped));
 }
 
-MlirAttribute ttmlirTTLayoutAttrGet(MlirContext ctx, MlirAffineMap linear,
-                                    unsigned oobVal, MlirAttribute grid,
-                                    MlirType memref, unsigned memLayout) {
+MlirAttribute ttmlirTTMetalLayoutAttrGet(MlirContext ctx, MlirAffineMap linear,
+                                         unsigned oobVal, MlirAttribute grid,
+                                         MlirType memref, unsigned memLayout) {
   mlir::AffineMap affineMap = mlir::AffineMap::getFromOpaquePointer(linear.ptr);
-  return wrap(LayoutAttr::get(unwrap(ctx), affineMap,
-                              static_cast<OOBVal>(oobVal),
-                              mlir::cast<GridAttr>(unwrap(grid)),
-                              mlir::cast<MemRefType>(unwrap(memref)),
-                              static_cast<TensorMemoryLayout>(memLayout)));
+  return wrap(MetalLayoutAttr::get(unwrap(ctx), affineMap,
+                                   static_cast<OOBVal>(oobVal),
+                                   mlir::cast<GridAttr>(unwrap(grid)),
+                                   mlir::cast<MemRefType>(unwrap(memref)),
+                                   static_cast<TensorMemoryLayout>(memLayout)));
 }
 
 MlirAttribute ttmlirTTMemorySpaceAttrGet(MlirContext ctx,
diff --git a/lib/Conversion/TTIRToTTMetal/TTIRToTTMetal.cpp b/lib/Conversion/TTIRToTTMetal/TTIRToTTMetal.cpp
index a3bbddc1d..60c032819 100644
--- a/lib/Conversion/TTIRToTTMetal/TTIRToTTMetal.cpp
+++ b/lib/Conversion/TTIRToTTMetal/TTIRToTTMetal.cpp
@@ -199,8 +199,8 @@ class TTIRToTTMetalLayoutRewriter : public OpRewritePattern<ttir::ToLayoutOp> {
   LogicalResult relayout(ttir::ToLayoutOp op, PatternRewriter &rewriter) const {
     auto inputTy = mlir::cast<RankedTensorType>(op.getInput().getType());
     auto outputTy = mlir::cast<RankedTensorType>(op.getType());
-    auto inputLayout = mlir::cast<tt::LayoutAttr>(inputTy.getEncoding());
-    auto outputLayout = mlir::cast<tt::LayoutAttr>(outputTy.getEncoding());
+    auto inputLayout = mlir::cast<tt::MetalLayoutAttr>(inputTy.getEncoding());
+    auto outputLayout = mlir::cast<tt::MetalLayoutAttr>(outputTy.getEncoding());
     tt::DeviceAttr device = op.getDevice();
     assert(device);
     tt::SystemDescAttr systemDesc = op.getSystemDesc();
@@ -342,8 +342,8 @@ class TTIRToTTMetalLayoutRewriter : public OpRewritePattern<ttir::ToLayoutOp> {
   LogicalResult reformat(ttir::ToLayoutOp op, PatternRewriter &rewriter) const {
     auto inputTy = mlir::cast<RankedTensorType>(op.getInput().getType());
     auto outputTy = mlir::cast<RankedTensorType>(op.getType());
-    auto inputLayout = mlir::cast<tt::LayoutAttr>(inputTy.getEncoding());
-    auto outputLayout = mlir::cast<tt::LayoutAttr>(outputTy.getEncoding());
+    auto inputLayout = mlir::cast<tt::MetalLayoutAttr>(inputTy.getEncoding());
+    auto outputLayout = mlir::cast<tt::MetalLayoutAttr>(outputTy.getEncoding());
     bool shouldTilize = not inputLayout.isTiled() && outputLayout.isTiled();
     bool shouldUntilize = inputLayout.isTiled() && not outputLayout.isTiled();
     assert(shouldTilize ^ shouldUntilize);
@@ -448,10 +448,10 @@ class TTIRToTTMetalLayoutRewriter : public OpRewritePattern<ttir::ToLayoutOp> {
       return failure();
     }
     assert(inputTy.getShape() == outputTy.getShape());
-    assert(mlir::isa<tt::LayoutAttr>(inputTy.getEncoding()));
-    assert(mlir::isa<tt::LayoutAttr>(outputTy.getEncoding()));
-    auto inputLayout = mlir::cast<tt::LayoutAttr>(inputTy.getEncoding());
-    auto outputLayout = mlir::cast<tt::LayoutAttr>(outputTy.getEncoding());
+    assert(mlir::isa<tt::MetalLayoutAttr>(inputTy.getEncoding()));
+    assert(mlir::isa<tt::MetalLayoutAttr>(outputTy.getEncoding()));
+    auto inputLayout = mlir::cast<tt::MetalLayoutAttr>(inputTy.getEncoding());
+    auto outputLayout = mlir::cast<tt::MetalLayoutAttr>(outputTy.getEncoding());
 
     auto components = op.compoundComponents();
     bool isCompound = (static_cast<int>(components.isLayoutChange) +
@@ -1308,10 +1308,10 @@ class TTIRToTTMetalDispatchRewriter : public OpRewritePattern<ttir::GenericOp> {
                   SmallVector<TTIRToTTMetalLayoutRewriter::NocTx>>
   calculateDataMovement(ArrayAttr iteratorTypes, const RankedTensorType &src,
                         const RankedTensorType &dst, DeviceAttr device) const {
-    auto srcLayout = mlir::cast<tt::LayoutAttr>(src.getEncoding());
+    auto srcLayout = mlir::cast<tt::MetalLayoutAttr>(src.getEncoding());
     assert(srcLayout.isTiled());
 
-    auto dstLayout = mlir::cast<tt::LayoutAttr>(dst.getEncoding());
+    auto dstLayout = mlir::cast<tt::MetalLayoutAttr>(dst.getEncoding());
     assert(dstLayout.isTiled());
 
     assert(iteratorTypes.size() >= 2 && "Expected at least 2 iterator types");
diff --git a/lib/Dialect/TT/IR/TTDialect.cpp b/lib/Dialect/TT/IR/TTDialect.cpp
index 6f629d697..1ac8a2223 100644
--- a/lib/Dialect/TT/IR/TTDialect.cpp
+++ b/lib/Dialect/TT/IR/TTDialect.cpp
@@ -13,13 +13,13 @@
 using namespace mlir;
 using namespace mlir::tt;
 
-// This is needed to hoist tt.layout attributes as named attributes declared at
-// the module level.
+// This is needed to hoist tt.metal_layout attributes as named attributes
+// declared at the module level.
 struct TTOpAsmDialectInterface : public OpAsmDialectInterface {
   using OpAsmDialectInterface::OpAsmDialectInterface;
 
   AliasResult getAlias(Attribute attr, raw_ostream &os) const override {
-    if (llvm::isa<LayoutAttr>(attr)) {
+    if (llvm::isa<MetalLayoutAttr>(attr)) {
       os << "layout";
       return AliasResult::OverridableAlias;
     }
diff --git a/lib/Dialect/TT/IR/TTOpsTypes.cpp b/lib/Dialect/TT/IR/TTOpsTypes.cpp
index bbdd4e259..12166e443 100644
--- a/lib/Dialect/TT/IR/TTOpsTypes.cpp
+++ b/lib/Dialect/TT/IR/TTOpsTypes.cpp
@@ -466,7 +466,7 @@ calculateLogicalShardShape(mlir::ArrayRef<int64_t> tensorShape,
   return shardShape;
 }
 
-LayoutAttr LayoutAttr::get(
+MetalLayoutAttr MetalLayoutAttr::get(
     ::mlir::MLIRContext *context, ArrayRef<int64_t> tensorShape,
     Type elementType, MemorySpace memorySpace, GridAttr grid,
     ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals,
@@ -483,7 +483,7 @@ LayoutAttr LayoutAttr::get(
   return get(context, linear, oobVal, grid, memref, memLayout);
 }
 
-LayoutAttr LayoutAttr::get(
+MetalLayoutAttr MetalLayoutAttr::get(
     ::mlir::MLIRContext *context, RankedTensorType ty, MemorySpace memorySpace,
     GridAttr grid,
     ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals,
@@ -493,9 +493,11 @@ LayoutAttr LayoutAttr::get(
              collapseIntervals, oobVal, memLayout);
 }
 
-LayoutAttr LayoutAttr::get(::mlir::MLIRContext *context, RankedTensorType ty,
-                           MemorySpace memorySpace, GridAttr grid,
-                           Type elementType, TensorMemoryLayout memLayout) {
+MetalLayoutAttr MetalLayoutAttr::get(::mlir::MLIRContext *context,
+                                     RankedTensorType ty,
+                                     MemorySpace memorySpace, GridAttr grid,
+                                     Type elementType,
+                                     TensorMemoryLayout memLayout) {
   assert(ty);
   assert(grid);
   return get(context, ty.getShape(), elementType, memorySpace, grid, {{0, -1}},
@@ -506,7 +508,7 @@ LayoutAttr LayoutAttr::get(::mlir::MLIRContext *context, RankedTensorType ty,
 // compute the physical shape of the tensor, i.e the shape of the tensor
 // after the dimensions have been collapsed onto a grid.
 llvm::SmallVector<int64_t>
-LayoutAttr::getPhysicalShape(ArrayRef<int64_t> logicalShape) const {
+MetalLayoutAttr::getPhysicalShape(ArrayRef<int64_t> logicalShape) const {
   llvm::SmallVector<int64_t> physicalShape(getGrid().getShape().size());
   SmallVector<AffineExpr> logicalShapeExprs(
       llvm::map_range(logicalShape, [context = getContext()](std::int64_t e) {
@@ -525,7 +527,7 @@ LayoutAttr::getPhysicalShape(ArrayRef<int64_t> logicalShape) const {
 }
 
 llvm::SmallVector<int64_t>
-LayoutAttr::getStride(ArrayRef<int64_t> logicalShape) const {
+MetalLayoutAttr::getStride(ArrayRef<int64_t> logicalShape) const {
 
   llvm::SmallVector<int64_t> stride(logicalShape.size());
 
@@ -574,7 +576,7 @@ LayoutAttr::getStride(ArrayRef<int64_t> logicalShape) const {
 }
 
 llvm::SmallVector<int64_t>
-LayoutAttr::getShardShape(bool convertTileToScalar) const {
+MetalLayoutAttr::getShardShape(bool convertTileToScalar) const {
   SmallVector<int64_t> shardShape(getMemref().getShape());
   auto elementType = getElementType();
   if (mlir::isa<TileType>(elementType) && convertTileToScalar) {
@@ -583,11 +585,11 @@ LayoutAttr::getShardShape(bool convertTileToScalar) const {
   return shardShape;
 }
 
-mlir::Type LayoutAttr::getElementType() const {
+mlir::Type MetalLayoutAttr::getElementType() const {
   return getMemref().getElementType();
 }
 
-mlir::Type LayoutAttr::getScalarElementType() const {
+mlir::Type MetalLayoutAttr::getScalarElementType() const {
   auto elementType = getElementType();
   if (mlir::isa<TileType>(elementType)) {
     return mlir::cast<TileType>(elementType).getElementType();
@@ -595,33 +597,33 @@ mlir::Type LayoutAttr::getScalarElementType() const {
   return elementType;
 }
 
-bool LayoutAttr::hasShardedTensorMemoryLayout() const {
+bool MetalLayoutAttr::hasShardedTensorMemoryLayout() const {
   return (getMemLayout() == TensorMemoryLayout::HeightSharded or
           getMemLayout() == TensorMemoryLayout::WidthSharded or
           getMemLayout() == TensorMemoryLayout::BlockSharded);
 }
 
-bool LayoutAttr::hasInterleavedTensorMemoryLayout() const {
+bool MetalLayoutAttr::hasInterleavedTensorMemoryLayout() const {
   return (getMemLayout() == TensorMemoryLayout::Interleaved);
 }
 
-bool LayoutAttr::hasShardedL1TensorMemoryLayout() const {
+bool MetalLayoutAttr::hasShardedL1TensorMemoryLayout() const {
   return ::mlir::tt::isL1MemorySpace(getMemorySpace()) and
          (getMemLayout() == TensorMemoryLayout::HeightSharded or
           getMemLayout() == TensorMemoryLayout::WidthSharded or
           getMemLayout() == TensorMemoryLayout::BlockSharded);
 }
 
-bool LayoutAttr::hasInterleavedL1TensorMemoryLayout() const {
+bool MetalLayoutAttr::hasInterleavedL1TensorMemoryLayout() const {
   return ::mlir::tt::isL1MemorySpace(getMemorySpace()) and
          (getMemLayout() == TensorMemoryLayout::Interleaved);
 }
 
-bool LayoutAttr::isTiled() const {
+bool MetalLayoutAttr::isTiled() const {
   return ::mlir::isa<::mlir::tt::TileType>(getElementType());
 }
 
-uint64_t LayoutAttr::getElementSizeBytes() const {
+uint64_t MetalLayoutAttr::getElementSizeBytes() const {
   mlir::Type elementType = getElementType();
   if (mlir::isa<TileType>(elementType)) {
     auto tileType = mlir::cast<TileType>(elementType);
@@ -630,7 +632,7 @@ uint64_t LayoutAttr::getElementSizeBytes() const {
   return elementType.getIntOrFloatBitWidth() / 8;
 }
 
-uint64_t LayoutAttr::getMemrefSizeBytes() const {
+uint64_t MetalLayoutAttr::getMemrefSizeBytes() const {
   MemRefType ty = getMemref();
   auto shape = ty.getShape();
   uint64_t size = getElementSizeBytes();
@@ -638,57 +640,60 @@ uint64_t LayoutAttr::getMemrefSizeBytes() const {
                          std::multiplies<uint64_t>());
 }
 
-LayoutAttr LayoutAttr::withGrid(
+MetalLayoutAttr MetalLayoutAttr::withGrid(
     ::mlir::MLIRContext *context, ArrayRef<int64_t> tensorShape, GridAttr grid,
     ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals) {
   return get(context, tensorShape, getElementType(), getMemorySpace(), grid,
              collapseIntervals, getOobVal(), getMemLayout());
 }
 
-LayoutAttr LayoutAttr::withGrid(
+MetalLayoutAttr MetalLayoutAttr::withGrid(
     ::mlir::MLIRContext *context, RankedTensorType ty, GridAttr grid,
     ArrayRef<std::pair<std::int64_t, std::int64_t>> collapseIntervals) {
   assert(ty);
-  return LayoutAttr::withGrid(context, ty.getShape(), grid, collapseIntervals);
+  return MetalLayoutAttr::withGrid(context, ty.getShape(), grid,
+                                   collapseIntervals);
 }
 
-LayoutAttr LayoutAttr::withElementType(::mlir::MLIRContext *context,
-                                       Type elementType) {
-  return LayoutAttr::get(
+MetalLayoutAttr MetalLayoutAttr::withElementType(::mlir::MLIRContext *context,
+                                                 Type elementType) {
+  return MetalLayoutAttr::get(
       context, getLinear(), getOobVal(), getGrid(),
       buildMemRef<MemorySpace, MemorySpaceAttr>(context, getShardShape(),
                                                 elementType, getMemorySpace()),
       getMemLayout());
 }
 
-LayoutAttr LayoutAttr::withMemorySpace(::mlir::MLIRContext *context,
-                                       MemorySpace memorySpace) {
-  return LayoutAttr::get(
+MetalLayoutAttr MetalLayoutAttr::withMemorySpace(::mlir::MLIRContext *context,
+                                                 MemorySpace memorySpace) {
+  return MetalLayoutAttr::get(
       context, getLinear(), getOobVal(), getGrid(),
       buildMemRef<MemorySpace, MemorySpaceAttr>(context, getShardShape(),
                                                 getElementType(), memorySpace),
       getMemLayout());
 }
 
-LayoutAttr LayoutAttr::withMemoryLayout(::mlir::MLIRContext *context,
-                                        TensorMemoryLayout memLayout) {
-  return LayoutAttr::get(
+MetalLayoutAttr
+MetalLayoutAttr::withMemoryLayout(::mlir::MLIRContext *context,
+                                  TensorMemoryLayout memLayout) {
+  return MetalLayoutAttr::get(
       context, getLinear(), getOobVal(), getGrid(),
       buildMemRef<MemorySpace, MemorySpaceAttr>(
           context, getShardShape(), getElementType(), getMemorySpace()),
       memLayout);
 }
 
-LayoutAttr LayoutAttr::withShardShape(::mlir::MLIRContext *context,
-                                      llvm::SmallVector<int64_t> shardShape) {
-  return LayoutAttr::get(
+MetalLayoutAttr
+MetalLayoutAttr::withShardShape(::mlir::MLIRContext *context,
+                                llvm::SmallVector<int64_t> shardShape) {
+  return MetalLayoutAttr::get(
       context, getLinear(), getOobVal(), getGrid(),
       buildMemRef<MemorySpace, MemorySpaceAttr>(
           context, shardShape, getElementType(), getMemorySpace()),
       getMemLayout());
 }
 
-MemorySpace LayoutAttr::getMemorySpace() const {
+MemorySpace MetalLayoutAttr::getMemorySpace() const {
   return mlir::cast<mlir::tt::MemorySpaceAttr>(getMemref().getMemorySpace())
       .getValue();
 }
@@ -696,7 +701,7 @@ MemorySpace LayoutAttr::getMemorySpace() const {
 // Returns shape of the tensor after tilization is applied to the two inner most
 // dimensions.
 llvm::SmallVector<int64_t>
-LayoutAttr::getTiledShape(llvm::ArrayRef<int64_t> tensorShape) const {
+MetalLayoutAttr::getTiledShape(llvm::ArrayRef<int64_t> tensorShape) const {
   assert(isTiled() && "Expected a tiled layout");
 
   mlir::AffineMap linear = getLinear();
@@ -716,7 +721,7 @@ LayoutAttr::getTiledShape(llvm::ArrayRef<int64_t> tensorShape) const {
   return ttmlir::utils::evalShape(tiled, tensorShape);
 }
 
-mlir::AffineMap LayoutAttr::getIdentityTileLinearMap() const {
+mlir::AffineMap MetalLayoutAttr::getIdentityTileLinearMap() const {
   assert(isTiled() && "Expected a tiled layout");
 
   return mlir::AffineMap::getMultiDimIdentityMap(getLinear().getNumResults(),
@@ -735,7 +740,7 @@ mlir::AffineMap LayoutAttr::getIdentityTileLinearMap() const {
 //   (d0, d1)[2, 3] ->
 //     (0, d0 floordiv 2, d1 floordiv 3, (d0 mod 2) * 3 + d1 mod 3)
 //
-mlir::AffineMap LayoutAttr::replaceMemoryMapSymbolsWithShardShape(
+mlir::AffineMap MetalLayoutAttr::replaceMemoryMapSymbolsWithShardShape(
     AffineMap physicalMemoryMap) const {
   mlir::SmallVector<int64_t> shardShape =
       getShardShape(false /*convertTileToScalar*/);
@@ -763,8 +768,8 @@ mlir::AffineMap LayoutAttr::replaceMemoryMapSymbolsWithShardShape(
 // grid. Then it composes the logical grid projection with physical memory
 // mapping.
 mlir::AffineMap
-LayoutAttr::projectOnto(mlir::AffineMap linearMap,
-                        mlir::AffineMap physicalMemoryMap) const {
+MetalLayoutAttr::projectOnto(mlir::AffineMap linearMap,
+                             mlir::AffineMap physicalMemoryMap) const {
   assert(getGrid().getShape().size() == physicalMemoryMap.getNumDims() &&
          "Layout and device grids must have same number of dimensions");
   assert(getLinear().getNumResults() == physicalMemoryMap.getNumDims() &&
@@ -1013,7 +1018,7 @@ DeviceAttr DeviceAttr::get(::mlir::MLIRContext *context,
 // Sample the last index in the tensor to get the last addressable element of
 // the tensor to determine its footprint in memory.
 uint64_t DeviceAttr::getLayoutSizeBytes(ArrayRef<int64_t> tensorScalarShape,
-                                        LayoutAttr layout,
+                                        MetalLayoutAttr layout,
                                         MemorySpace memorySpace) const {
   SmallVector<int64_t> shape = layout.isTiled()
                                    ? layout.getTiledShape(tensorScalarShape)
@@ -1035,9 +1040,9 @@ uint64_t DeviceAttr::getLayoutSizeBytes(ArrayRef<int64_t> tensorScalarShape,
 uint64_t DeviceAttr::getTensorSizeBytes(RankedTensorType tensorType,
                                         MemorySpace memorySpace) const {
   assert(tensorType.getEncoding());
-  return getLayoutSizeBytes(tensorType.getShape(),
-                            mlir::cast<LayoutAttr>(tensorType.getEncoding()),
-                            memorySpace);
+  return getLayoutSizeBytes(
+      tensorType.getShape(),
+      mlir::cast<MetalLayoutAttr>(tensorType.getEncoding()), memorySpace);
 }
 
 ::mlir::LogicalResult
diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp
index 8f404323e..11cfbb8fb 100644
--- a/lib/Dialect/TTIR/IR/TTIROps.cpp
+++ b/lib/Dialect/TTIR/IR/TTIROps.cpp
@@ -908,9 +908,9 @@ ::mlir::LogicalResult mlir::tt::ttir::ToLayoutOp::verify() {
 mlir::tt::ttir::ToLayoutOp::CompoundComponents
 mlir::tt::ttir::ToLayoutOp::compoundComponents() {
   auto inputLayout =
-      mlir::cast<tt::LayoutAttr>(getInput().getType().getEncoding());
+      mlir::cast<tt::MetalLayoutAttr>(getInput().getType().getEncoding());
   auto outputLayout =
-      mlir::cast<tt::LayoutAttr>(getOutput().getType().getEncoding());
+      mlir::cast<tt::MetalLayoutAttr>(getOutput().getType().getEncoding());
   bool isLayoutChange = inputLayout.getLinear() != outputLayout.getLinear();
   bool isGridChange = inputLayout.getGrid() != outputLayout.getGrid();
   bool isShardChange =
@@ -1216,7 +1216,7 @@ ::mlir::LogicalResult mlir::tt::ttir::MatmulOp::verify() {
 
 // AllocOp verification
 ::mlir::LogicalResult mlir::tt::ttir::AllocOp::verify() {
-  auto layout = mlir::dyn_cast_or_null<mlir::tt::LayoutAttr>(
+  auto layout = mlir::dyn_cast_or_null<mlir::tt::MetalLayoutAttr>(
       getResult().getType().getEncoding());
   if (not layout) {
     return emitOpError("Result type missing layout attribute");
diff --git a/lib/Dialect/TTIR/Transforms/Allocate.cpp b/lib/Dialect/TTIR/Transforms/Allocate.cpp
index 37e788385..a643f041c 100644
--- a/lib/Dialect/TTIR/Transforms/Allocate.cpp
+++ b/lib/Dialect/TTIR/Transforms/Allocate.cpp
@@ -22,13 +22,13 @@ inline MemorySpace getMemorySpace(MemRefType memref) {
   return mlir::cast<MemorySpaceAttr>(memref.getMemorySpace()).getValue();
 }
 
-inline MemorySpace getMemorySpace(LayoutAttr layout) {
+inline MemorySpace getMemorySpace(MetalLayoutAttr layout) {
   return getMemorySpace(layout.getMemref());
 }
 
 inline MemorySpace getMemorySpace(RankedTensorType ty) {
   assert(ty.getEncoding());
-  auto layout = mlir::cast<LayoutAttr>(ty.getEncoding());
+  auto layout = mlir::cast<MetalLayoutAttr>(ty.getEncoding());
   return getMemorySpace(layout);
 }
 
diff --git a/lib/Dialect/TTIR/Transforms/Generic.cpp b/lib/Dialect/TTIR/Transforms/Generic.cpp
index 005e12c07..3bf96f3cd 100644
--- a/lib/Dialect/TTIR/Transforms/Generic.cpp
+++ b/lib/Dialect/TTIR/Transforms/Generic.cpp
@@ -257,7 +257,7 @@ class TTIRGenericRegionRewriter
     auto resEncoding =
         mlir::cast<RankedTensorType>(op->getResult(0).getType()).getEncoding();
     if (resEncoding) {
-      auto resLayout = mlir::cast<LayoutAttr>(resEncoding);
+      auto resLayout = mlir::cast<MetalLayoutAttr>(resEncoding);
       gridAttr = resLayout.getGrid();
     }
 
@@ -339,7 +339,7 @@ struct TTIRGenericOperandsToMemrefRewriter
         auto matchingOperand = generic.getMatchingOperand(blockArgNumber);
         auto operandType = matchingOperand.getType();
 
-        auto bufferLayout = mlir::cast<LayoutAttr>(
+        auto bufferLayout = mlir::cast<MetalLayoutAttr>(
             mlir::cast<RankedTensorType>(operandType).getEncoding());
         auto bufferType = operandType;
 
@@ -349,7 +349,7 @@ struct TTIRGenericOperandsToMemrefRewriter
           assert(static_cast<size_t>(cbIndex) < generic.getCbs().size());
           auto cb = generic.getCbs()[cbIndex];
           auto cbType = cb.getType();
-          auto cbLayout = mlir::cast<LayoutAttr>(
+          auto cbLayout = mlir::cast<MetalLayoutAttr>(
               mlir::cast<RankedTensorType>(cbType).getEncoding());
           bufferLayout = cbLayout;
           bufferType = cbType;
@@ -387,7 +387,7 @@ class TTIRGenericRegionMemrefTypeConverter : public TypeConverter {
       if (mlir::isa<BufferAttr>(encoding)) {
         return type;
       }
-      auto layout = mlir::cast<LayoutAttr>(type.getEncoding());
+      auto layout = mlir::cast<MetalLayoutAttr>(type.getEncoding());
       auto buffer =
           BufferAttr::get(ctx, layout.getMemref(), BufferAccess::Alias);
       return RankedTensorType::get(buffer.getShape(), type.getElementType(),
@@ -451,11 +451,11 @@ class TTIRGenericOpCBsRewriter : public OpRewritePattern<GenericOp> {
 
       // Enforcing tiled layout as in kernel we always want to work with tiles.
       auto desiredElementType = rewriter.getType<TileType>(ty.getElementType());
-      auto desiredLayout = rewriter.getAttr<LayoutAttr>(
+      auto desiredLayout = rewriter.getAttr<MetalLayoutAttr>(
           ty, MemorySpace::DeviceL1, generic.getGrid(), desiredElementType);
 
       auto operandTy = operand.getType();
-      auto operandLayout = mlir::cast<LayoutAttr>(
+      auto operandLayout = mlir::cast<MetalLayoutAttr>(
           mlir::cast<RankedTensorType>(operandTy).getEncoding());
 
       if (desiredLayout.getGrid() == operandLayout.getGrid()) {
diff --git a/lib/Dialect/TTIR/Transforms/Layout.cpp b/lib/Dialect/TTIR/Transforms/Layout.cpp
index d7eef6732..c3ccbf1a4 100644
--- a/lib/Dialect/TTIR/Transforms/Layout.cpp
+++ b/lib/Dialect/TTIR/Transforms/Layout.cpp
@@ -38,20 +38,21 @@ class TTIRLayoutTensorTypeConverter : public TypeConverter {
   TTIRLayoutTensorTypeConverter(MLIRContext *ctx, MemorySpace initMemorySpace,
                                 GridAttr deviceGrid) {
     addConversion([](Type type) { return type; });
-    addConversion([ctx, initMemorySpace,
-                   deviceGrid](RankedTensorType type) -> Type {
-      auto layout = type.getEncoding();
-      if (layout) {
-        return type;
-      }
-      std::int64_t deviceGridRank = deviceGrid.getShape().size();
-      // Default to single core grid
-      auto tensorGrid = GridAttr::get(ctx, deviceGridRank);
-      // Default to initMemorySpace, the optimizer might decide otherwise
-      auto newLayout = LayoutAttr::get(ctx, type, initMemorySpace, tensorGrid);
-      return RankedTensorType::get(type.getShape(), type.getElementType(),
-                                   newLayout);
-    });
+    addConversion(
+        [ctx, initMemorySpace, deviceGrid](RankedTensorType type) -> Type {
+          auto layout = type.getEncoding();
+          if (layout) {
+            return type;
+          }
+          std::int64_t deviceGridRank = deviceGrid.getShape().size();
+          // Default to single core grid
+          auto tensorGrid = GridAttr::get(ctx, deviceGridRank);
+          // Default to initMemorySpace, the optimizer might decide otherwise
+          auto newLayout =
+              MetalLayoutAttr::get(ctx, type, initMemorySpace, tensorGrid);
+          return RankedTensorType::get(type.getShape(), type.getElementType(),
+                                       newLayout);
+        });
   }
 };
 
@@ -129,7 +130,7 @@ createToLayoutOp(PatternRewriter &rewriter, Location loc, Value input,
                  TensorMemoryLayout desiredMemLayout, bool tiled) {
 
   auto ty = mlir::cast<RankedTensorType>(input.getType());
-  auto currLayout = mlir::cast<LayoutAttr>(ty.getEncoding());
+  auto currLayout = mlir::cast<MetalLayoutAttr>(ty.getEncoding());
   auto currMemorySpace = currLayout.getMemorySpace();
   auto currElementType = currLayout.getElementType();
   auto currMemLayout = currLayout.getMemLayout();
@@ -142,9 +143,9 @@ createToLayoutOp(PatternRewriter &rewriter, Location loc, Value input,
     return std::nullopt;
   }
 
-  auto desiredLayout =
-      rewriter.getAttr<LayoutAttr>(ty, desiredMemorySpace, currLayout.getGrid(),
-                                   desiredElementType, desiredMemLayout);
+  auto desiredLayout = rewriter.getAttr<MetalLayoutAttr>(
+      ty, desiredMemorySpace, currLayout.getGrid(), desiredElementType,
+      desiredMemLayout);
 
   tensor::EmptyOp existingEmpty = input.getDefiningOp<tensor::EmptyOp>();
   if (existingEmpty) {
@@ -343,7 +344,7 @@ class TTIRSplitCompoundLayoutRewriter : public OpRewritePattern<ToLayoutOp> {
   using OpRewritePattern<ToLayoutOp>::OpRewritePattern;
 
   Value createToLayoutOp(PatternRewriter &rewriter, Location loc, Value input,
-                         LayoutAttr desiredLayout) const {
+                         MetalLayoutAttr desiredLayout) const {
     auto ty = mlir::cast<RankedTensorType>(input.getType());
     auto output = rewriter.create<tensor::EmptyOp>(
         loc, ty.getShape(), ty.getElementType(), desiredLayout);
@@ -353,7 +354,7 @@ class TTIRSplitCompoundLayoutRewriter : public OpRewritePattern<ToLayoutOp> {
   }
 
   Value bounce(PatternRewriter &rewriter, ToLayoutOp op,
-               LayoutAttr bounceLayout) const {
+               MetalLayoutAttr bounceLayout) const {
     auto bounced =
         createToLayoutOp(rewriter, op.getLoc(), op.getInput(), bounceLayout);
     return rewriter.replaceOpWithNewOp<ttir::ToLayoutOp>(
@@ -375,8 +376,8 @@ class TTIRSplitCompoundLayoutRewriter : public OpRewritePattern<ToLayoutOp> {
 
     auto inputType = mlir::cast<RankedTensorType>(op.getInput().getType());
     auto outputType = mlir::cast<RankedTensorType>(op.getOutput().getType());
-    auto inputLayout = mlir::cast<LayoutAttr>(inputType.getEncoding());
-    auto outputLayout = mlir::cast<LayoutAttr>(outputType.getEncoding());
+    auto inputLayout = mlir::cast<MetalLayoutAttr>(inputType.getEncoding());
+    auto outputLayout = mlir::cast<MetalLayoutAttr>(outputType.getEncoding());
 
     bool inputL1 = inputLayout.getMemorySpace() == MemorySpace::DeviceL1;
     bool outputL1 = outputLayout.getMemorySpace() == MemorySpace::DeviceL1;
diff --git a/lib/Dialect/TTMetal/IR/TTMetalOps.cpp b/lib/Dialect/TTMetal/IR/TTMetalOps.cpp
index 49baf51e0..7f78c1afc 100644
--- a/lib/Dialect/TTMetal/IR/TTMetalOps.cpp
+++ b/lib/Dialect/TTMetal/IR/TTMetalOps.cpp
@@ -17,7 +17,7 @@ namespace mlir::tt::ttmetal {
 ::mlir::LogicalResult HostWriteOp::verify() {
   ::mlir::RankedTensorType outputTy = getOutput().getType();
   auto outputLayout =
-      mlir::dyn_cast_or_null<mlir::tt::LayoutAttr>(outputTy.getEncoding());
+      mlir::dyn_cast_or_null<mlir::tt::MetalLayoutAttr>(outputTy.getEncoding());
   if (not outputLayout) {
     return emitOpError("Input tensor missing layout attribute");
   }
@@ -30,7 +30,7 @@ ::mlir::LogicalResult HostWriteOp::verify() {
 ::mlir::LogicalResult HostReadOp::verify() {
   ::mlir::RankedTensorType outputTy = getOutput().getType();
   auto outputLayout =
-      mlir::dyn_cast_or_null<mlir::tt::LayoutAttr>(outputTy.getEncoding());
+      mlir::dyn_cast_or_null<mlir::tt::MetalLayoutAttr>(outputTy.getEncoding());
   if (not outputLayout) {
     return emitOpError("Input tensor missing layout attribute");
   }
@@ -41,7 +41,7 @@ ::mlir::LogicalResult HostReadOp::verify() {
 }
 
 ::mlir::LogicalResult AllocOp::verify() {
-  auto layout = mlir::dyn_cast_or_null<mlir::tt::LayoutAttr>(
+  auto layout = mlir::dyn_cast_or_null<mlir::tt::MetalLayoutAttr>(
       getResult().getType().getEncoding());
   if (not layout) {
     return emitOpError("Result type missing layout attribute");
@@ -76,7 +76,7 @@ ::mlir::LogicalResult AllocOp::verify() {
 ::mlir::LogicalResult DispatchOp::verify() {
   // Assert inputs/outputs device memspace
   for (auto operand : getOperands()) {
-    auto layout = mlir::dyn_cast_or_null<mlir::tt::LayoutAttr>(
+    auto layout = mlir::dyn_cast_or_null<mlir::tt::MetalLayoutAttr>(
         mlir::cast<mlir::RankedTensorType>(operand.getType()).getEncoding());
     if (not layout) {
       return emitOpError("Input tensor missing layout attribute");
diff --git a/lib/Target/TTMetal/TTMetalToFlatbuffer.cpp b/lib/Target/TTMetal/TTMetalToFlatbuffer.cpp
index 47e15accf..e82deaf63 100644
--- a/lib/Target/TTMetal/TTMetalToFlatbuffer.cpp
+++ b/lib/Target/TTMetal/TTMetalToFlatbuffer.cpp
@@ -62,18 +62,18 @@ memrefAttrToFlatbuffer(FlatbufferObjectCache &cache, MemRefType memref,
       toFlatbuffer(cache, memLayout), size);
 }
 
-flatbuffers::Offset<::tt::target::LayoutDesc>
-layoutAttrToFlatbuffer(FlatbufferObjectCache &cache, LayoutAttr layoutAttr,
-                       ArrayRef<int64_t> logicalShape, DeviceAttr deviceAttr) {
-  auto strideInt64 = layoutAttr.getStride(logicalShape);
+flatbuffers::Offset<::tt::target::LayoutDesc> metalLayoutAttrToFlatbuffer(
+    FlatbufferObjectCache &cache, MetalLayoutAttr metalLayoutAttr,
+    ArrayRef<int64_t> logicalShape, DeviceAttr deviceAttr) {
+  auto strideInt64 = metalLayoutAttr.getStride(logicalShape);
   std::vector<int32_t> stride(strideInt64.begin(), strideInt64.end());
-  auto coreRangeSet =
-      toFlatbuffer(cache, layoutAttr.getGrid(), deviceAttr.getWorkerGrid());
+  auto coreRangeSet = toFlatbuffer(cache, metalLayoutAttr.getGrid(),
+                                   deviceAttr.getWorkerGrid());
   return ::tt::target::CreateLayoutDescDirect(
-      *cache.fbb, &stride, toFlatbuffer(cache, layoutAttr.getOobVal()),
+      *cache.fbb, &stride, toFlatbuffer(cache, metalLayoutAttr.getOobVal()),
       &coreRangeSet,
-      cache.getOrCreate(layoutAttr.getMemref(), memrefAttrToFlatbuffer,
-                        layoutAttr.getMemLayout()));
+      cache.getOrCreate(metalLayoutAttr.getMemref(), memrefAttrToFlatbuffer,
+                        metalLayoutAttr.getMemLayout()));
 }
 
 } // namespace mlir::tt
@@ -277,7 +277,7 @@ static std::shared_ptr<void> translateModuleToFlatbuffer(
           argumentAllocations[input.getArgNumber()]);
       assert(
           argAlloc.getMemorySpace() ==
-              mlir::cast<tt::LayoutAttr>(
+              mlir::cast<tt::MetalLayoutAttr>(
                   mlir::cast<RankedTensorType>(input.getType()).getEncoding())
                   .getMemorySpace() &&
           "argument allocation memory space does not match tensor type "
diff --git a/python/TTModule.cpp b/python/TTModule.cpp
index c70d7df97..b8d543410 100644
--- a/python/TTModule.cpp
+++ b/python/TTModule.cpp
@@ -16,14 +16,14 @@
 
 namespace mlir::ttmlir::python {
 void populateTTModule(py::module &m) {
-  tt_attribute_class<tt::LayoutAttr>(m, "LayoutAttr")
+  tt_attribute_class<tt::MetalLayoutAttr>(m, "MetalLayoutAttr")
       .def_static("get",
                   [](MlirContext ctx, MlirType rankedTensorType,
                      uint32_t memorySpaceValue, MlirAttribute grid,
                      std::vector<std::pair<std::int64_t, std::int64_t>>
                          collapseIntervals,
                      uint32_t oobValValue, uint32_t memLayoutValue) {
-                    return wrap(tt::LayoutAttr::get(
+                    return wrap(tt::MetalLayoutAttr::get(
                         unwrap(ctx),
                         mlir::cast<RankedTensorType>(unwrap(rankedTensorType)),
                         static_cast<tt::MemorySpace>(memorySpaceValue),
@@ -37,7 +37,7 @@ void populateTTModule(py::module &m) {
                      std::vector<std::pair<std::int64_t, std::int64_t>>
                          collapseIntervals) {
                     return wrap(
-                        mlir::cast<tt::LayoutAttr>(unwrap(self))
+                        mlir::cast<tt::MetalLayoutAttr>(unwrap(self))
                             .withGrid(unwrap(ctx), tensorShape,
                                       mlir::cast<tt::GridAttr>(unwrap(grid)),
                                       collapseIntervals));
@@ -47,7 +47,7 @@ void populateTTModule(py::module &m) {
                      std::vector<std::int64_t> tensorShape, MlirAttribute grid,
                      std::vector<std::pair<std::int64_t, std::int64_t>>
                          collapseIntervals) {
-                    return mlir::cast<tt::LayoutAttr>(unwrap(self))
+                    return mlir::cast<tt::MetalLayoutAttr>(unwrap(self))
                         .withGrid(unwrap(ctx), tensorShape,
                                   mlir::cast<tt::GridAttr>(unwrap(grid)),
                                   collapseIntervals);
@@ -55,13 +55,13 @@ void populateTTModule(py::module &m) {
       .def_static(
           "with_element_type",
           [](MlirContext ctx, MlirAttribute self, MlirType elementType) {
-            return wrap(mlir::cast<tt::LayoutAttr>(unwrap(self))
+            return wrap(mlir::cast<tt::MetalLayoutAttr>(unwrap(self))
                             .withElementType(unwrap(ctx), unwrap(elementType)));
           })
       .def_static(
           "with_element_type_",
           [](MlirContext ctx, MlirAttribute self, MlirType elementType) {
-            return mlir::cast<tt::LayoutAttr>(unwrap(self))
+            return mlir::cast<tt::MetalLayoutAttr>(unwrap(self))
                 .withElementType(unwrap(ctx), unwrap(elementType));
           })
       .def("getLayout",
@@ -73,38 +73,45 @@ void populateTTModule(py::module &m) {
                  mlir::cast<RankedTensorType>(unwrap(type));
              assert(tensor.getEncoding()); // Make sure that this Tensor has an
                                            // encoding value
-             tt::LayoutAttr layout =
-                 mlir::cast<tt::LayoutAttr>(tensor.getEncoding());
+             tt::MetalLayoutAttr layout =
+                 mlir::cast<tt::MetalLayoutAttr>(tensor.getEncoding());
              return layout;
            })
-      .def("wrapped", [](tt::LayoutAttr const &self) { return wrap(self); })
-      .def_property_readonly(
-          "stride",
-          [](tt::LayoutAttr const &self, std::vector<int64_t> logicalShape) {
-            auto stride = self.getStride(logicalShape);
-            return std::vector<std::int64_t>(stride.begin(), stride.end());
-          })
-      .def_property_readonly("oobval", &tt::LayoutAttr::getOobVal)
+      .def("wrapped",
+           [](tt::MetalLayoutAttr const &self) { return wrap(self); })
+      .def_property_readonly("stride",
+                             [](tt::MetalLayoutAttr const &self,
+                                std::vector<int64_t> logicalShape) {
+                               auto stride = self.getStride(logicalShape);
+                               return std::vector<std::int64_t>(stride.begin(),
+                                                                stride.end());
+                             })
+      .def_property_readonly("oobval", &tt::MetalLayoutAttr::getOobVal)
       .def_property_readonly("oobval_as_int",
-                             [](tt::LayoutAttr la) {
+                             [](tt::MetalLayoutAttr la) {
                                return static_cast<uint32_t>(la.getOobVal());
                              })
-      .def_property_readonly("grid_attr", &tt::LayoutAttr::getGrid)
+      .def_property_readonly("grid_attr", &tt::MetalLayoutAttr::getGrid)
       .def_property_readonly(
-          "memref", [](tt::LayoutAttr self) { return wrap(self.getMemref()); })
-      .def_property_readonly("memory_space", &tt::LayoutAttr::getMemorySpace)
+          "memref",
+          [](tt::MetalLayoutAttr self) { return wrap(self.getMemref()); })
+      .def_property_readonly("memory_space",
+                             &tt::MetalLayoutAttr::getMemorySpace)
       .def_property_readonly("memory_space_as_int",
-                             [](tt::LayoutAttr la) {
+                             [](tt::MetalLayoutAttr la) {
                                return static_cast<uint32_t>(
                                    la.getMemorySpace());
                              })
-      .def_property_readonly("shard_shape", &tt::LayoutAttr::getShardShape)
-      .def_property_readonly("memory_layout", &tt::LayoutAttr::getMemLayout)
+      .def_property_readonly("shard_shape", &tt::MetalLayoutAttr::getShardShape)
+      .def_property_readonly("memory_layout",
+                             &tt::MetalLayoutAttr::getMemLayout)
       .def_property_readonly(
-          "linear", [](tt::LayoutAttr self) { return wrap(self.getLinear()); })
-      .def_property_readonly("memory_layout_as_int", [](tt::LayoutAttr la) {
-        return static_cast<uint32_t>(la.getMemLayout());
-      });
+          "linear",
+          [](tt::MetalLayoutAttr self) { return wrap(self.getLinear()); })
+      .def_property_readonly("memory_layout_as_int",
+                             [](tt::MetalLayoutAttr la) {
+                               return static_cast<uint32_t>(la.getMemLayout());
+                             });
 
   tt_attribute_class<tt::GridAttr>(m, "GridAttr")
       .def_static("get",
diff --git a/test/python/tensor_layout.py b/test/python/tensor_layout.py
index 39a9a728b..2dbf249e9 100644
--- a/test/python/tensor_layout.py
+++ b/test/python/tensor_layout.py
@@ -34,7 +34,7 @@ def createTensorLayout(
         shape, F32Type.get(ctx), None, Location.unknown(ctx)
     )
     memoryLayout = getTensorMemoryLayout(memorySpace)
-    layout = tt.ir.LayoutAttr.get(
+    layout = tt.ir.MetalLayoutAttr.get(
         ctx, tensorTy, memorySpace, grid, collapseIntervals, oobVal, memoryLayout
     )
     return RankedTensorType.get(shape, F32Type.get(ctx), layout, Location.unknown(ctx))
@@ -42,7 +42,7 @@ def createTensorLayout(
 
 def tilize(tensor, dataType, tileShape=[32, 32]):
     assert len(tileShape) == 2
-    return tt.ir.LayoutAttr.with_element_type_(
+    return tt.ir.MetalLayoutAttr.with_element_type_(
         ctx,
         tensor.encoding,
         tt.ir.TileType.get(ctx, tileShape[0], tileShape[1], dataType),
@@ -52,15 +52,15 @@ def tilize(tensor, dataType, tileShape=[32, 32]):
 def parallelize(tensor, grid, collapseIntervals=[(0, -1)]):
     if isinstance(grid, list) or isinstance(grid, tuple):
         grid = tt.ir.GridAttr.get(ctx, list(grid))
-    return tt.ir.LayoutAttr.with_grid_(
+    return tt.ir.MetalLayoutAttr.with_grid_(
         ctx, tensor.encoding, tensor.shape, grid, collapseIntervals
     )
 
 
 t0 = createTensorLayout([2, 3, 64, 128], [2, 4])
-# CHECK: tensor<2x3x64x128xf32, #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x4>, memref<192x32xf32, #tt.memory_space<l1>>, interleaved>>
+# CHECK: tensor<2x3x64x128xf32, #tt.metal_layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x4>, memref<192x32xf32, #tt.memory_space<l1>>, interleaved>>
 print(t0)
-# CHECK: #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x4>, memref<6x1x!tt.tile<32x32, bfp_bf8>, #tt.memory_space<l1>>, interleaved>
+# CHECK: #tt.metal_layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x4>, memref<6x1x!tt.tile<32x32, bfp_bf8>, #tt.memory_space<l1>>, interleaved>
 print(tilize(t0, tt.DataType.BFP_BFloat8).wrapped())
 print(parallelize(t0, [3, 2]).wrapped())
 
@@ -69,24 +69,24 @@ def parallelize(tensor, grid, collapseIntervals=[(0, -1)]):
 print(parallelize(t1, [3, 2]).wrapped())
 
 t2 = createTensorLayout([128], [4], collapseIntervals=[(0, -1)])
-# CHECK: tensor<128xf32, #tt.layout<(d0) -> (d0), undef, <4>, memref<32xf32, #tt.memory_space<l1>>, interleaved>>
+# CHECK: tensor<128xf32, #tt.metal_layout<(d0) -> (d0), undef, <4>, memref<32xf32, #tt.memory_space<l1>>, interleaved>>
 print(t2)
-# CHECK: #tt.layout<(d0) -> (d0), undef, <2>, memref<64xf32, #tt.memory_space<l1>>, interleaved>
+# CHECK: #tt.metal_layout<(d0) -> (d0), undef, <2>, memref<64xf32, #tt.memory_space<l1>>, interleaved>
 print(parallelize(t2, [2]).wrapped())
-# CHECK: #tt.layout<(d0) -> (0, d0), undef, <1x2>, memref<1x64xf32, #tt.memory_space<l1>>, interleaved>
+# CHECK: #tt.metal_layout<(d0) -> (0, d0), undef, <1x2>, memref<1x64xf32, #tt.memory_space<l1>>, interleaved>
 print(parallelize(t2, [1, 2]).wrapped())
 
 t3 = createTensorLayout([128], [1, 4], collapseIntervals=[(0, -1)])
-# CHECK: tensor<128xf32, #tt.layout<(d0) -> (0, d0), undef, <1x4>, memref<1x32xf32, #tt.memory_space<l1>>, interleaved>>
+# CHECK: tensor<128xf32, #tt.metal_layout<(d0) -> (0, d0), undef, <1x4>, memref<1x32xf32, #tt.memory_space<l1>>, interleaved>>
 print(t3)
-# CHECK: #tt.layout<(d0) -> (0, d0), undef, <1x4>, memref<1x1x!tt.tile<32x32, bfp_bf8>, #tt.memory_space<l1>>, interleaved>
+# CHECK: #tt.metal_layout<(d0) -> (0, d0), undef, <1x4>, memref<1x1x!tt.tile<32x32, bfp_bf8>, #tt.memory_space<l1>>, interleaved>
 print(tilize(t3, tt.DataType.BFP_BFloat8).wrapped())
 
 t4 = createTensorLayout([128], [1, 2, 4], collapseIntervals=[(0, -1)])
-# CHECK: tensor<128xf32, #tt.layout<(d0) -> (0, 0, d0), undef, <1x2x4>, memref<1x1x32xf32, #tt.memory_space<l1>>, interleaved>>
+# CHECK: tensor<128xf32, #tt.metal_layout<(d0) -> (0, 0, d0), undef, <1x2x4>, memref<1x1x32xf32, #tt.memory_space<l1>>, interleaved>>
 print(t4)
 
-# CHECK: #tt.layout<(d0) -> (0, 0, d0), undef, <1x2x4>, memref<1x1x1x!tt.tile<32x32, bfp_bf8>, #tt.memory_space<l1>>, interleaved>
+# CHECK: #tt.metal_layout<(d0) -> (0, 0, d0), undef, <1x2x4>, memref<1x1x1x!tt.tile<32x32, bfp_bf8>, #tt.memory_space<l1>>, interleaved>
 print(tilize(t4, tt.DataType.BFP_BFloat8).wrapped())
-# CHECK: #tt.layout<(d0) -> (0, d0), undef, <1x2>, memref<1x64xf32, #tt.memory_space<l1>>, interleaved>
+# CHECK: #tt.metal_layout<(d0) -> (0, d0), undef, <1x2>, memref<1x64xf32, #tt.memory_space<l1>>, interleaved>
 print(parallelize(t4, [1, 2]).wrapped())
diff --git a/test/ttmlir/Dialect/TTIR/split_compound_layout.mlir b/test/ttmlir/Dialect/TTIR/split_compound_layout.mlir
index 2335fb0df..42cab3d1f 100644
--- a/test/ttmlir/Dialect/TTIR/split_compound_layout.mlir
+++ b/test/ttmlir/Dialect/TTIR/split_compound_layout.mlir
@@ -3,21 +3,21 @@
 #dram = #tt.memory_space<dram>
 #l1_ = #tt.memory_space<l1>
 
-// CHECK-DAG: #[[row_major1x1:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
-// CHECK-DAG: #[[row_major1x1_T:.*]] = #tt.layout<(d0, d1) -> (d1, d0), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
-// CHECK-DAG: #[[row_major2x2:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<32x64xf32, #l1_>, interleaved>
-// CHECK-DAG: #[[tile1x1_f32:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #l1_>, interleaved>
-// CHECK-DAG: #[[tile1x1_bf16:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, bf16>, #l1_>, interleaved>
-// CHECK-DAG: #[[tile1x1_f32_dram:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #dram>, interleaved>
-// CHECK-DAG: #[[tile2x2_f32:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<1x2x!tt.tile<32x32, f32>, #l1_>, interleaved>
+// CHECK-DAG: #[[row_major1x1:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
+// CHECK-DAG: #[[row_major1x1_T:.*]] = #tt.metal_layout<(d0, d1) -> (d1, d0), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
+// CHECK-DAG: #[[row_major2x2:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<32x64xf32, #l1_>, interleaved>
+// CHECK-DAG: #[[tile1x1_f32:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #l1_>, interleaved>
+// CHECK-DAG: #[[tile1x1_bf16:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, bf16>, #l1_>, interleaved>
+// CHECK-DAG: #[[tile1x1_f32_dram:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #dram>, interleaved>
+// CHECK-DAG: #[[tile2x2_f32:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<1x2x!tt.tile<32x32, f32>, #l1_>, interleaved>
 
-#row_major1x1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
-#row_major1x1_T = #tt.layout<(d0, d1) -> (d1, d0), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
-#row_major2x2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<32x64xf32, #l1_>, interleaved>
-#tile1x1_f32 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #l1_>, interleaved>
-#tile1x1_bf16 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, bf16>, #l1_>, interleaved>
-#tile1x1_f32_dram = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #dram>, interleaved>
-#tile2x2_f32 = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<1x2x!tt.tile<32x32, f32>, #l1_>, interleaved>
+#row_major1x1 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
+#row_major1x1_T = #tt.metal_layout<(d0, d1) -> (d1, d0), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
+#row_major2x2 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<32x64xf32, #l1_>, interleaved>
+#tile1x1_f32 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #l1_>, interleaved>
+#tile1x1_bf16 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, bf16>, #l1_>, interleaved>
+#tile1x1_f32_dram = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32x32, f32>, #dram>, interleaved>
+#tile2x2_f32 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<1x2x!tt.tile<32x32, f32>, #l1_>, interleaved>
 
 func.func @noncompound_linear(%in: tensor<64x128xf32, #row_major1x1>) -> tensor<64x128xf32, #row_major1x1_T> {
     %out = tensor.empty() : tensor<64x128xf32, #row_major1x1_T>
diff --git a/test/ttmlir/Dialect/TTIR/test_allocate.mlir b/test/ttmlir/Dialect/TTIR/test_allocate.mlir
index a80a8c1c9..5888cf3f6 100644
--- a/test/ttmlir/Dialect/TTIR/test_allocate.mlir
+++ b/test/ttmlir/Dialect/TTIR/test_allocate.mlir
@@ -1,7 +1,7 @@
 // RUN: ttmlir-opt --ttir-load-system-desc --ttir-implicit-device --ttir-allocate %s | FileCheck %s
 #any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 #l1_ = #tt.memory_space<l1>
-#layout = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
+#layout = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>, interleaved>
 module attributes {} {
   func.func @forward(%arg0: tensor<64x128xf32, #layout>, %arg1: tensor<64x128xf32, #layout>) -> tensor<64x128xf32, #layout> {
     // CHECK: %[[C:.*]] = "ttir.alloc"[[C:.*]]
diff --git a/test/ttmlir/Silicon/TTMetal/simple_reduce.mlir b/test/ttmlir/Silicon/TTMetal/simple_reduce.mlir
index 1674ae0d3..cdde621c2 100644
--- a/test/ttmlir/Silicon/TTMetal/simple_reduce.mlir
+++ b/test/ttmlir/Silicon/TTMetal/simple_reduce.mlir
@@ -1,8 +1,8 @@
 // RUN: ttmlir-opt --ttir-to-ttmetal-backend-pipeline="system-desc-path=%system_desc_path%"  %s | FileCheck %s
 #any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
 #l1_ = #tt.memory_space<l1>
-#layout1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <4x4>, memref<64x96xf32, #l1_>>
-#layout2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <4x1>, memref<64x32xf32, #l1_>>
+#layout1 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <4x4>, memref<64x96xf32, #l1_>>
+#layout2 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <4x1>, memref<64x32xf32, #l1_>>
 
 func.func @reduceW(%arg0: tensor<256x384xf32, #layout1>) -> tensor<256x32xf32, #layout2> {
   %0 = tensor.empty() : tensor<256x32xf32, #layout2>
@@ -15,7 +15,7 @@ func.func @reduceW(%arg0: tensor<256x384xf32, #layout1>) -> tensor<256x32xf32, #
   return %1 : tensor<256x32xf32, #layout2>
 }
 
-#layout3 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x4>, memref<32x96xf32, #l1_>>
+#layout3 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x4>, memref<32x96xf32, #l1_>>
 func.func @reduceH(%arg0: tensor<256x384xf32, #layout1>) -> tensor<32x384xf32, #layout3> {
   %0 = tensor.empty() : tensor<32x384xf32, #layout3>
   // CHECK: %[[C:.*]] = "ttmetal.dispatch"[[C:.*]]
@@ -27,7 +27,7 @@ func.func @reduceH(%arg0: tensor<256x384xf32, #layout1>) -> tensor<32x384xf32, #
   return %1 : tensor<32x384xf32, #layout3>
 }
 
-#layout4 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<32x32xf32, #l1_>>
+#layout4 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<32x32xf32, #l1_>>
 func.func @reduceWH(%arg0: tensor<256x384xf32, #layout1>) -> tensor<32x32xf32, #layout4> {
   %0 = tensor.empty() : tensor<32x32xf32, #layout4>
   // CHECK: %[[C:.*]] = "ttmetal.dispatch"[[C:.*]]
diff --git a/test/ttmlir/Silicon/TTMetal/tiled_reblock.mlir b/test/ttmlir/Silicon/TTMetal/tiled_reblock.mlir
index 64cf5f57a..d7d3cea1d 100644
--- a/test/ttmlir/Silicon/TTMetal/tiled_reblock.mlir
+++ b/test/ttmlir/Silicon/TTMetal/tiled_reblock.mlir
@@ -4,10 +4,10 @@
 
 #l1_ = #tt.memory_space<l1>
 
-#untilized = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>>
-#tilized = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32 x 32, f32>, #l1_>>
-#tilized2x2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<1x2x!tt.tile<32 x 32, f32>, #l1_>>
-#untilized2x2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<32x64xf32, #l1_>>
+#untilized = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>>
+#tilized = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32 x 32, f32>, #l1_>>
+#tilized2x2 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<1x2x!tt.tile<32 x 32, f32>, #l1_>>
+#untilized2x2 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<32x64xf32, #l1_>>
 func.func @tilize_reblock_2D(%arg0: tensor<64x128xf32, #untilized>) -> tensor<64x128xf32, #untilized2x2> {
   // CHECK: %[[C:.*]] = "ttmetal.alloc"[[C:.*]]
   %0 = tensor.empty() : tensor<64x128xf32, #tilized>
@@ -25,10 +25,10 @@ func.func @tilize_reblock_2D(%arg0: tensor<64x128xf32, #untilized>) -> tensor<64
 }
 
 
-#untilized4D = #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <1x1>, memref<384x128xf32, #l1_>>
-#tilized4D = #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <1x1>, memref<12x4x!tt.tile<32 x 32, f32>, #l1_>>
-#tilized4D_2x2 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x2>, memref<6x2x!tt.tile<32 x 32, f32>, #l1_>>
-#untilized4D_2x2 = #tt.layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x2>, memref<192x64xf32, #l1_>>
+#untilized4D = #tt.metal_layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <1x1>, memref<384x128xf32, #l1_>>
+#tilized4D = #tt.metal_layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <1x1>, memref<12x4x!tt.tile<32 x 32, f32>, #l1_>>
+#tilized4D_2x2 = #tt.metal_layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x2>, memref<6x2x!tt.tile<32 x 32, f32>, #l1_>>
+#untilized4D_2x2 = #tt.metal_layout<(d0, d1, d2, d3) -> (d0 * 192 + d1 * 64 + d2, d3), undef, <2x2>, memref<192x64xf32, #l1_>>
 func.func @tilize_reblock_4D(%arg0: tensor<2x3x64x128xf32, #untilized4D>) -> tensor<2x3x64x128xf32, #untilized4D_2x2> {
   // CHECK: %[[C:.*]] = "ttmetal.alloc"[[C:.*]]
   %0 = tensor.empty() : tensor<2x3x64x128xf32, #tilized4D>
@@ -48,10 +48,10 @@ func.func @tilize_reblock_4D(%arg0: tensor<2x3x64x128xf32, #untilized4D>) -> ten
   return %5 : tensor<2x3x64x128xf32, #untilized4D_2x2>
 }
 
-#untilized_big = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<96x192xf32, #l1_>>
-#tilized_big = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<3x6x!tt.tile<32 x 32, f32>, #l1_>>
-#tilized_big_3x2 = #tt.layout<(d0, d1) -> (d0, d1), undef, <3x2>, memref<1x3x!tt.tile<32 x 32, f32>, #l1_>>
-#tilized_big_3x6 = #tt.layout<(d0, d1) -> (d0, d1), undef, <3x6>, memref<1x1x!tt.tile<32 x 32, f32>, #l1_>>
+#untilized_big = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<96x192xf32, #l1_>>
+#tilized_big = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<3x6x!tt.tile<32 x 32, f32>, #l1_>>
+#tilized_big_3x2 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <3x2>, memref<1x3x!tt.tile<32 x 32, f32>, #l1_>>
+#tilized_big_3x6 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <3x6>, memref<1x1x!tt.tile<32 x 32, f32>, #l1_>>
 func.func @tilize_reblock_big(%arg0: tensor<96x192xf32, #untilized_big>) -> tensor<96x192xf32, #untilized_big> {
   // move to tilized 1x1
   // CHECK: %[[C:.*]] = "ttmetal.alloc"[[C:.*]]
diff --git a/test/ttmlir/Silicon/TTMetal/to_layout.mlir b/test/ttmlir/Silicon/TTMetal/to_layout.mlir
index 015e65175..e5318c6c1 100644
--- a/test/ttmlir/Silicon/TTMetal/to_layout.mlir
+++ b/test/ttmlir/Silicon/TTMetal/to_layout.mlir
@@ -5,8 +5,8 @@
 #l1_ = #tt.memory_space<l1>
 #dram = #tt.memory_space<dram>
 
-#layout = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<4x16xf32, #l1_>>
-#layout1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<2x8xf32, #l1_>>
+#layout = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<4x16xf32, #l1_>>
+#layout1 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<2x8xf32, #l1_>>
 func.func @simple(%arg0: tensor<4x16xf32, #layout>) -> tensor<4x16xf32, #layout1> {
   %0 = tensor.empty() : tensor<4x16xf32, #layout1>
   // CHECK: %[[C:.*]] = "ttmetal.dispatch"[[C:.*]]
@@ -14,8 +14,8 @@ func.func @simple(%arg0: tensor<4x16xf32, #layout>) -> tensor<4x16xf32, #layout1
   return %1 : tensor<4x16xf32, #layout1>
 }
 
-#untilized = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>>
-#tilized = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32 x 32, f32>, #l1_>>
+#untilized = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<64x128xf32, #l1_>>
+#tilized = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<2x4x!tt.tile<32 x 32, f32>, #l1_>>
 func.func @tilize(%arg0: tensor<64x128xf32, #untilized>) -> tensor<64x128xf32, #untilized> {
   %0 = tensor.empty() : tensor<64x128xf32, #tilized>
   // CHECK: %[[C:.*]] = "ttmetal.dispatch"[[C:.*]]
@@ -26,11 +26,11 @@ func.func @tilize(%arg0: tensor<64x128xf32, #untilized>) -> tensor<64x128xf32, #
   return %3 : tensor<64x128xf32, #untilized>
 }
 
-#untilized_dram = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<16x64xf32, #dram>>
-#untilized_l1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<16x64xf32, #l1_>>
-#untilized2x2_dram = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<8x32xf32, #dram>>
-#untilized2x2_l1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<8x32xf32, #l1_>>
-#untilized1x4_l1 = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x4>, memref<16x16xf32, #l1_>>
+#untilized_dram = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<16x64xf32, #dram>>
+#untilized_l1 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<16x64xf32, #l1_>>
+#untilized2x2_dram = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<8x32xf32, #dram>>
+#untilized2x2_l1 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <2x2>, memref<8x32xf32, #l1_>>
+#untilized1x4_l1 = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x4>, memref<16x16xf32, #l1_>>
 func.func @dram_to_l1(%arg0: tensor<16x64xf32, #untilized_dram>) -> tensor<16x64xf32, #untilized_l1> {
   %0 = tensor.empty() : tensor<16x64xf32, #untilized_l1>
   // CHECK: %[[C:.*]] = "ttmetal.dispatch"[[C:.*]]
diff --git a/test/ttmlir/Silicon/TTNN/perf_unit/mnist.mlir b/test/ttmlir/Silicon/TTNN/perf_unit/mnist.mlir
index ba995925d..0193ec36b 100644
--- a/test/ttmlir/Silicon/TTNN/perf_unit/mnist.mlir
+++ b/test/ttmlir/Silicon/TTNN/perf_unit/mnist.mlir
@@ -4,8 +4,8 @@
 #loc = loc("MNISTLinear":4294967295:0)
 module @"tt-forge-graph" attributes {} {
   func.func @main(%arg0: tensor<1x784xf32> loc("MNISTLinear":4294967295:0), %arg1: tensor<1x10xf32> loc("MNISTLinear":4294967295:0), %arg2: tensor<256x10xf32> loc("MNISTLinear":4294967295:0), %arg3: tensor<1x256xf32> loc("MNISTLinear":4294967295:0), %arg4: tensor<784x256xf32> loc("MNISTLinear":4294967295:0)) -> tensor<1x10xf32> {
-    // CHECK: #[[LAYOUT_10:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x8>, memref<1x32xf32, #l1_>, block_sharded>
-    // CHECK: #[[LAYOUT_11:.*]] = #tt.layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<1x10xf32, #l1_>, block_sharded>
+    // CHECK: #[[LAYOUT_10:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x8>, memref<1x32xf32, #l1_>, block_sharded>
+    // CHECK: #[[LAYOUT_11:.*]] = #tt.metal_layout<(d0, d1) -> (d0, d1), undef, <1x1>, memref<1x10xf32, #l1_>, block_sharded>
     %0 = tensor.empty() : tensor<1x256xf32> loc(#loc8)
     // CHECK: %[[C:.*]] = "ttnn.matmul"[[C:.*]] -> tensor<1x256xf32, #[[LAYOUT_10]]>
     %1 = "ttir.matmul"(%arg0, %arg4, %0) <{operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x784xf32>, tensor<784x256xf32>, tensor<1x256xf32>) -> tensor<1x256xf32> loc(#loc8)
diff --git a/tools/explorer/tt_adapter/src/tt_adapter/mlir.py b/tools/explorer/tt_adapter/src/tt_adapter/mlir.py
index 5233e844c..b9ae471ca 100644
--- a/tools/explorer/tt_adapter/src/tt_adapter/mlir.py
+++ b/tools/explorer/tt_adapter/src/tt_adapter/mlir.py
@@ -348,7 +348,7 @@ def parse_dimension(attr):
 
 @AttrHandler.register_handler("tt.layout")
 def parse_tt_layout(attr):
-    layout = tt.ir.LayoutAttr.maybe_downcast(attr)
+    layout = tt.ir.MetalLayoutAttr.maybe_downcast(attr)
     result = []
     result.append(graph_builder.KeyValue(key="linear", value=str(layout.linear)))
     result.append(