diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
index 8782f63ae1..54fc4c0970 100644
--- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
+++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -1048,6 +1048,48 @@ def TTIR_ClampOp : TTIR_DPSOp<"clamp"> {
     let hasVerifier = 1;
 }
 
+def TTIR_ArangeOp : TTIR_Op<"arange"> {
+  let summary = "Arange operation.";
+  let description = [{
+    Tensor arange operation.
+
+    Produces a tensor with values from `start` to `end` (exclusive) with a step size of `step`, along the dimension specified by `arange_dimension`.
+
+    Examples:
+      %0 = "ttir.arange"() {start = 0 : i64, end = 5 : i64 step = 1 : i64, arange_dimension = 0 : i64} : () -> tensor<5xi64>
+      // %0: [0, 1, 2, 3, 4]
+
+      %1 = "ttir.arange"() {start = 0 : i64, end = 10 : i64, step = 2 : i64, arange_dimension = 0 : i64} : () -> tensor<5xf32>
+      // %1: [0.0, 2.0, 4.0, 6.0, 8.0]
+
+      %2 = "ttir.arange"() {start = 0 : i64, end = 5 : i64, step = 1 : i64, arange_dimension = 0 : i64} : () -> tensor<5x3xi64>
+      // %2: [
+              [0, 0, 0],
+              [1, 1, 1],
+              [2, 2, 2],
+              [3, 3, 3],
+              [4, 4, 4]
+             ]
+
+      %3 = "ttir.arange"() {start = 0 : i64, end = 3 : i64, step = 1 : i64, arange_dimension = 1 : i64} : () -> tensor<5x3xi64>
+      // %3: [
+              [0, 1, 2],
+              [0, 1, 2],
+              [0, 1, 2],
+              [0, 1, 2],
+              [0, 1, 2]
+             ]
+  }];
+
+  let arguments = (ins SI64Attr:$start,
+                       SI64Attr:$end,
+                       SI64Attr:$step,
+                       I64Attr:$arange_dimension);
+
+  let results = (outs AnyRankedTensor:$result);
+  let hasVerifier = 1;
+}
+
 def TTIR_ConstantOp : TTIR_Op<"constant", [ConstantLike,
                                            AllShapesMatch<["value", "result"]>]> {
     let summary = "Constant op.";
diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
index 910ed7dfd9..5cfaa6bac2 100644
--- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
+++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
@@ -759,6 +759,30 @@ def TTNN_EmptyOp : TTNN_Op<"empty", [NoMemoryEffect]> {
     let hasVerifier = 1;
 }
 
+def TTNN_ArangeOp : TTNN_Op<"arange"> {
+  let summary = "Arange operation.";
+  let description = [{
+    Tensor arange operation.
+
+    Produces a (1, 1, 1, N)-shaped tensor with values from `start` to `end` (exclusive) with a step size of `step`.
+
+    Examples:
+      %0 = "ttnn.arange"() {start = 0 : i64, end = 5 : i64 step = 1 : i64} : () -> tensor<1x1x1x5xi64>
+      // %0: [[[[0, 1, 2, 3, 4]]]]
+
+      %1 = "ttnn.arange"() {start = 0 : i64, end = 10 : i64, step = 2 : i64} : () -> tensor<1x1x1x5xf32>
+      // %1: [[[[0.0, 2.0, 4.0, 6.0, 8.0]]]]
+  }];
+
+  let arguments = (ins I64Attr:$start,
+                       I64Attr:$end,
+                       I64Attr:$step,
+                       TT_Device:$device);
+
+  let results = (outs AnyRankedTensor:$result);
+  let hasVerifier = 1;
+}
+
 def TTNN_FullOp : TTNN_Op<"full"> {
     let summary = "Full op.";
     let description = [{
diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs
index ec493e6496..2d01a9a68b 100644
--- a/include/ttmlir/Target/TTNN/program.fbs
+++ b/include/ttmlir/Target/TTNN/program.fbs
@@ -61,6 +61,15 @@ table FullOp {
   out: tt.target.TensorRef;
 }
 
+table ArangeOp {
+  start: float;
+  end: float;
+  step: float;
+  dtype: DataType;
+  out: tt.target.TensorRef;
+  device: tt.target.DeviceRef;
+}
+
 enum EltwiseOpType: uint32 {
   Add = 0,
   Multiply = 1,
@@ -261,6 +270,7 @@ union OpType {
   MaxPool2dOp,
   DeallocateOp,
   AllGatherOp,
+  ArangeOp,
 }
 
 table Operation {
diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
index 28bf4f71de..8db1b44e69 100644
--- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
+++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp
@@ -1201,6 +1201,36 @@ class StableHLOToTTIRGatherOpConversionPattern
   }
 };
 
+template <typename SrcIotaOp, typename Adaptor = typename SrcIotaOp::Adaptor>
+class StableHLOToTTIROpIotaOpConversionPattern
+    : public OpConversionPattern<SrcIotaOp> {
+
+  using OpConversionPattern<SrcIotaOp>::OpConversionPattern;
+
+public:
+  LogicalResult
+  matchAndRewrite(SrcIotaOp srcOp, Adaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+
+    RankedTensorType outputType = mlir::cast<RankedTensorType>(
+        this->getTypeConverter()->convertType(srcOp.getResult().getType()));
+    rewriter.replaceOpWithNewOp<ttir::ArangeOp>(
+        srcOp, outputType, 0, outputType.getDimSize(adaptor.getIotaDimension()),
+        1, adaptor.getIotaDimension());
+
+    // Dynamic Iota has an output_shape attribute but the output shape is
+    // already known by the result type This is to remove the operand that will
+    // become dead code
+    for (auto operand : adaptor.getOperands()) {
+      if (operand.getDefiningOp()) {
+        rewriter.eraseOp(operand.getDefiningOp());
+      }
+    }
+
+    return success();
+  }
+};
+
 void addElementwiseUnaryOpsConversionPatterns(MLIRContext *ctx,
                                               RewritePatternSet &patterns,
                                               TypeConverter &typeConverter) {
@@ -1365,6 +1395,15 @@ void addGatherOpConversionPattern(MLIRContext *ctx, RewritePatternSet &patterns,
   patterns.add<StableHLOToTTIRGatherOpConversionPattern>(typeConverter, ctx);
 }
 
+void addIotaOpConversionPattern(MLIRContext *ctx, RewritePatternSet &patterns,
+                                TypeConverter &typeConverter) {
+  patterns.add<StableHLOToTTIROpIotaOpConversionPattern<stablehlo::IotaOp>>(
+      typeConverter, ctx);
+  patterns
+      .add<StableHLOToTTIROpIotaOpConversionPattern<stablehlo::DynamicIotaOp>>(
+          typeConverter, ctx);
+}
+
 } // namespace
 
 namespace mlir::tt {
@@ -1389,6 +1428,7 @@ void populateStableHLOToTTIRPatterns(MLIRContext *ctx,
   addSliceOpConversionPattern(ctx, patterns, typeConverter);
   addClampOpConversionPattern(ctx, patterns, typeConverter);
   addGatherOpConversionPattern(ctx, patterns, typeConverter);
+  addIotaOpConversionPattern(ctx, patterns, typeConverter);
 }
 
 } // namespace mlir::tt
diff --git a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp
index 9c5afd41e6..ed7eb0be82 100644
--- a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp
+++ b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp
@@ -897,6 +897,143 @@ struct SelectToSliceConversionPattern
   }
 };
 
+/*
+ * This pattern rewrites ArangeOp by forcing the arange_dimension to be
+ * rightmost dimension of the output tensor. This is done by replacing the
+ * ArangeOp with a new one that has this property, and then transposing out last
+ * dimension to the dimension specified by the original ArangeOp, and also
+ * inserting a reshape to match the rank of the intended output and broadcasts
+ * to repeat the data along the other dimensions.
+ *
+ * The ArangeOp that is generated here will be equivalent to how ttnn::ArangeOp
+ * behaves. The reason this pass is done in TTIR rather than generated when we
+ * want to lower to TTNN is because in the future we will want to consteval the
+ * ArangeOp, but have the option to not include repeated data in the constant
+ * tensor and broadcast at runtime instead. Consteval will be implemented for
+ * the TTIR dialect only and so this explication of the TMs implicit in ArangeOp
+ * must be done in TTIR.
+ */
+struct ArangeForceLastDimensionPattern
+    : public OpConversionPattern<ttir::ArangeOp> {
+public:
+  using OpConversionPattern<ttir::ArangeOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(ttir::ArangeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+
+    const RankedTensorType outputType =
+        mlir::cast<RankedTensorType>(op.getResult().getType());
+
+    int64_t arangeDimension = adaptor.getArangeDimension();
+    int64_t arangeDimensionNegative = arangeDimension - outputType.getRank();
+    int64_t start = adaptor.getStart();
+    int64_t end = adaptor.getEnd();
+    int64_t step = adaptor.getStep();
+
+    int64_t arangeLength = (end - start) / step;
+
+    ArrayRef<int64_t> ttnnShape = {1, 1, 1, arangeLength};
+    if (ttnnShape == outputType.getShape()) {
+      return success();
+    }
+
+    RankedTensorType arangeOutputType = RankedTensorType::get(
+        SmallVector<int64_t>({1, 1, 1, arangeLength}),
+        outputType.getElementType(), outputType.getEncoding());
+
+    Value output =
+        rewriter
+            .create<ttir::ArangeOp>( // perform arange on the last dimension to
+                                     // match how ttnn behaves
+                op.getLoc(), arangeOutputType, start, end, step, 3)
+            .getResult();
+
+    std::vector<int64_t> outputShape = arangeOutputType.getShape().vec();
+    // Must transpose the output so that the data changes along the axis defined
+    // by arangeDimension
+    if (arangeDimensionNegative != -1) {
+      std::vector<int64_t> transposeShape = outputShape;
+      transposeShape[arangeDimensionNegative + transposeShape.size()] =
+          arangeLength;
+      transposeShape[arangeOutputType.getRank() - 1] = 1;
+      RankedTensorType transposeType = RankedTensorType::get(
+          transposeShape, arangeOutputType.getElementType(),
+          arangeOutputType.getEncoding());
+
+      tensor::EmptyOp dpsOutput = rewriter.create<tensor::EmptyOp>(
+          op.getLoc(), transposeShape, transposeType.getElementType());
+
+      output = rewriter.create<ttir::TransposeOp>(
+          op.getLoc(), transposeType, output, dpsOutput,
+          arangeDimensionNegative + transposeShape.size(),
+          arangeOutputType.getRank() - 1,
+          rewriter.getArrayAttr(SmallVector<Attribute>(
+              2, rewriter.getAttr<OperandConstraintAttr>(
+                     OperandConstraint::AnyDeviceTile))));
+
+      outputShape = transposeShape;
+    }
+
+    // Must match up the rank of the output with the rank of the intended output
+    // from the original arange, with the arangeDimension in the correct
+    // position
+    if (outputType.getRank() != static_cast<int64_t>(outputShape.size())) {
+      std::vector<int32_t> reshapeShape;
+      for (uint32_t i = 0; i < outputType.getRank(); i++) {
+        i == arangeDimension ? reshapeShape.push_back(end)
+                             : reshapeShape.push_back(1);
+      }
+
+      RankedTensorType reshapeType = RankedTensorType::get(
+          SmallVector<int64_t>(reshapeShape.begin(), reshapeShape.end()),
+          outputType.getElementType(), outputType.getEncoding());
+      tensor::EmptyOp dpsOutput = rewriter.create<tensor::EmptyOp>(
+          op.getLoc(),
+          SmallVector<int64_t>(reshapeShape.begin(), reshapeShape.end()),
+          reshapeType.getElementType());
+      output = rewriter.create<ttir::ReshapeOp>(
+          op.getLoc(), reshapeType, output, dpsOutput,
+          rewriter.getI32ArrayAttr(reshapeShape),
+          rewriter.getArrayAttr(SmallVector<Attribute>(
+              2, rewriter.getAttr<OperandConstraintAttr>(
+                     OperandConstraint::AnyDeviceTile))));
+
+      outputShape =
+          std::vector<int64_t>(reshapeShape.begin(), reshapeShape.end());
+    }
+
+    // Must broadcast the rest of the dimensions
+    SmallVector<Attribute> broadcastDims;
+    for (uint32_t i = 0; i < outputShape.size(); i++) {
+      if (i != arangeDimension && outputShape[i] != outputType.getShape()[i]) {
+        outputShape[i] = outputType.getShape()[i];
+        broadcastDims.push_back(rewriter.getI64IntegerAttr(i));
+      }
+    }
+    if (!broadcastDims.empty()) {
+      RankedTensorType broadcastType = RankedTensorType::get(
+          outputShape, outputType.getElementType(), outputType.getEncoding());
+
+      tensor::EmptyOp dpsOutput = rewriter.create<tensor::EmptyOp>(
+          op.getLoc(), outputShape, outputType.getElementType());
+
+      output = rewriter.create<ttir::BroadcastOp>(
+          op.getLoc(), broadcastType, output, dpsOutput,
+          rewriter.getArrayAttr(broadcastDims),
+          rewriter.getArrayAttr(SmallVector<Attribute>(
+              2, rewriter.getAttr<OperandConstraintAttr>(
+                     OperandConstraint::AnyDeviceTile))));
+
+      assert(mlir::cast<RankedTensorType>(output.getType()).getShape() ==
+                 outputType.getShape() &&
+             "Output shape must match the shape of the input tensor");
+    }
+    rewriter.replaceOp(op, output);
+    return success();
+  }
+};
+
 void populateTTIRToTTIRDecompositionPatterns(MLIRContext *ctx,
                                              RewritePatternSet &patterns,
                                              TypeConverter &typeConverter) {
@@ -906,6 +1043,7 @@ void populateTTIRToTTIRDecompositionPatterns(MLIRContext *ctx,
   patterns.add<GetDimensionSizeToConstantConversionPattern>(typeConverter, ctx);
   patterns.add<GatherToEmbeddingConversionPattern>(typeConverter, ctx);
   patterns.add<SelectToSliceConversionPattern>(typeConverter, ctx);
+  patterns.add<ArangeForceLastDimensionPattern>(typeConverter, ctx);
 }
 
 } // namespace mlir::tt
diff --git a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp
index d91084f59d..e244eea8fb 100644
--- a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp
+++ b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp
@@ -53,6 +53,14 @@ struct TTIRToTTIRDecompositionPass
     target.addIllegalOp<ttir::GatherOp>();
     target.addIllegalOp<ttir::SelectOp>();
 
+    // These are the ops that must satisfy some conditions after this pass
+    target.addDynamicallyLegalOp<ttir::ArangeOp>([&](ttir::ArangeOp op) {
+      auto shape = op.getResult().getType().getShape();
+      return (static_cast<int64_t>(op.getArangeDimension()) == 3 &&
+              shape.size() == 4 && shape[0] == 1 && shape[1] == 1 &&
+              shape[2] == 1);
+    });
+
     TypeConverter typeConverter;
     // All types map 1:1.
     typeConverter.addConversion([](Type type) { return type; });
diff --git a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
index 12e29a9609..10aaa7a731 100644
--- a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
+++ b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
@@ -908,6 +908,34 @@ class AllGatherOpConversionPattern
   }
 };
 
+class ArangeOpConversionPattern : public OpConversionPattern<ttir::ArangeOp> {
+public:
+  using OpConversionPattern<ttir::ArangeOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(ttir::ArangeOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+
+    RankedTensorType outputType =
+        mlir::cast<RankedTensorType>(op.getResult().getType());
+    assert(static_cast<int64_t>(adaptor.getArangeDimension()) ==
+               outputType.getRank() - 1 &&
+           "Arange dimension must be the final dimension of the output tensor "
+           "to convert to ttnn.arange");
+
+    auto newOutputType = RankedTensorType::get(outputType.getShape(),
+                                               outputType.getElementType(),
+                                               outputType.getEncoding());
+
+    Value device = getOrInsertDevice(rewriter, op);
+    rewriter.replaceOpWithNewOp<ttnn::ArangeOp>(
+        op, newOutputType, adaptor.getStart(), adaptor.getEnd(),
+        adaptor.getStep(), device);
+
+    return success();
+  }
+};
+
 namespace mlir::tt {
 
 void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns,
@@ -973,7 +1001,8 @@ void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns,
            Conv2dOpConversionPattern,
            MaxPool2dOpConversionPattern,
            SubtractOpConversionPattern,
-           AllGatherOpConversionPattern
+           AllGatherOpConversionPattern,
+           ArangeOpConversionPattern
            >(typeConverter, ctx);
   // ANCHOR_END: op_rewriter_pattern_set
   // clang-format on
diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
index 92862cd9da..d67b0b5c1d 100644
--- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
+++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -668,8 +668,8 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   // Tensor ops
   //
   patterns
-      .add<EmptyOpConversionPattern, DefaultOpConversionPattern<ttnn::FullOp>>(
-          typeConverter, ctx);
+      .add<EmptyOpConversionPattern, DefaultOpConversionPattern<ttnn::FullOp>,
+           DefaultOpConversionPattern<ttnn::ArangeOp>>(typeConverter, ctx);
 
   // Eltwise unary ops
   //
diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp
index 5946cb2fe3..a5626d19dc 100644
--- a/lib/Dialect/TTIR/IR/TTIROps.cpp
+++ b/lib/Dialect/TTIR/IR/TTIROps.cpp
@@ -45,6 +45,37 @@ ::mlir::LogicalResult mlir::tt::ttir::ClampOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ArangeOp
+//===----------------------------------------------------------------------===//
+
+::mlir::LogicalResult mlir::tt::ttir::ArangeOp::verify() {
+  int64_t start = getStart();
+  int64_t end = getEnd();
+  int64_t step = getStep();
+
+  if (step == 0) {
+    return emitOpError("Step value cannot be zero");
+  }
+
+  int64_t numValues = (end - start) / step;
+
+  if (numValues <= 0) {
+    return emitOpError() << "Invalid range: start=" << start << ", end=" << end
+                         << ", step=" << step;
+  }
+
+  if (numValues != getType().getDimSize(getArangeDimension())) {
+    return emitOpError() << "Output tensor shape must be " << numValues
+                         << " at dim " << getArangeDimension()
+                         << " (since start=" << start << ", end=" << end
+                         << ", step=" << step << "), but got "
+                         << getType().getDimSize(getArangeDimension());
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // ConstantOp
 //===----------------------------------------------------------------------===//
diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp
index 4abd74d62e..45d7a112ca 100644
--- a/lib/Dialect/TTNN/IR/TTNNOps.cpp
+++ b/lib/Dialect/TTNN/IR/TTNNOps.cpp
@@ -140,6 +140,32 @@ ::mlir::LogicalResult mlir::tt::ttnn::MaxPool2dOp::verify() {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// ArangeOp
+//===----------------------------------------------------------------------===//
+
+::mlir::LogicalResult mlir::tt::ttnn::ArangeOp::verify() {
+
+  if (getStep() == 0) {
+    return emitOpError("Step cannot be zero.");
+  }
+
+  int64_t numValues = (getEnd() - getStart()) / getStep();
+
+  if (numValues <= 0) {
+    return emitOpError("Invalid range: start=")
+           << getStart() << ", end=" << getEnd() << ", step=" << getStep();
+  }
+
+  std::vector<int64_t> expectedShape = {1, 1, 1, numValues};
+  if (getType().getShape().vec() != expectedShape) {
+    return emitOpError() << "Output tensor shape must be " << expectedShape
+                         << ", but got " << getType().getShape();
+  }
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // EmptyOp
 //===----------------------------------------------------------------------===//
diff --git a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp
index eebfdc13f3..2d4a2ff8f5 100644
--- a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp
+++ b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp
@@ -214,6 +214,28 @@ createToLayoutOp(PatternRewriter &rewriter, Location loc, Value input,
         .getResult();
   }
 
+  // If the input tensor is an arange, we want to set the desired layout just
+  // like the other creation ops. However, a caveat is that in ttnn, arange is
+  // hardcoded to be ROW_MAJOR. So we must ensure that the layout we assign to
+  // it is ROW_MAJOR - and to make it tile layout we still must insert
+  // ToLayoutOp on its output. We can do this by setting the element type to
+  // ty.getElementType() in case desiredElementType is a TileType.
+  ttir::ArangeOp existingArange = input.getDefiningOp<ttir::ArangeOp>();
+  if (existingArange) {
+    TTNNLayoutAttr arangeLayout = rewriter.getAttr<TTNNLayoutAttr>(
+        ty.getShape(), ty.getElementType(), desiredBufferType,
+        tensorConfig.getGrid(), desiredMemLayout, g_defaultCollapseDims);
+    input =
+        rewriter
+            .replaceOpWithNewOp<ttir::ArangeOp>(
+                existingArange,
+                mlir::RankedTensorType::get(ty.getShape(), ty.getElementType(),
+                                            arangeLayout),
+                existingArange.getStart(), existingArange.getEnd(),
+                existingArange.getStep(), existingArange.getArangeDimension())
+            .getResult();
+  }
+
   // If the input tensor is not a constant or empty tensor, we need to create a
   // new tensor with the desired layout which will be used as the output of the
   // ToLayoutOp
@@ -281,6 +303,13 @@ class TTNNLayoutDPSOperandsRewriter
         continue;
       }
 
+      // If the operand is a BroadcastOp or a ToLayout op do not put a
+      // ToLayoutOp on its output
+      if (operand.get().getDefiningOp<ttir::BroadcastOp>() ||
+          operand.get().getDefiningOp<ttir::ToLayoutOp>()) {
+        continue;
+      }
+
       // Read operand constrait for current operand
       OperandConstraint operandConstraint =
           mlir::cast<OperandConstraintAttr>(
diff --git a/lib/Target/TTNN/TTNNToFlatbuffer.cpp b/lib/Target/TTNN/TTNNToFlatbuffer.cpp
index 30b83014d4..5727294601 100644
--- a/lib/Target/TTNN/TTNNToFlatbuffer.cpp
+++ b/lib/Target/TTNN/TTNNToFlatbuffer.cpp
@@ -333,6 +333,20 @@ createOp(FlatbufferObjectCache &cache, FullOp op) {
                         kHostAllocatedSize));
 }
 
+::flatbuffers::Offset<::tt::target::ttnn::ArangeOp>
+createOp(FlatbufferObjectCache &cache, ArangeOp op) {
+
+  auto dtype = elementTypeToDataType(op.getResult().getType().getElementType());
+  auto device = getOperandThroughDPSOps(op.getDevice());
+  return ::tt::target::ttnn::CreateArangeOp(
+      *cache.fbb, static_cast<float>(op.getStart()),
+      static_cast<float>(op.getEnd()), static_cast<float>(op.getStep()),
+      toFlatbuffer(cache, dtype),
+      cache.getOrCreate(op.getResult(), tensorValueToFlatbuffer,
+                        kHostAllocatedAddress, kHostAllocatedSize),
+      cache.at<::tt::target::DeviceRef>(device));
+}
+
 // ANCHOR: adding_an_op_matmul_serialize_to_binary
 ::flatbuffers::Offset<::tt::target::ttnn::MatmulOp>
 createOp(FlatbufferObjectCache &cache, MatmulOp op) {
@@ -869,6 +883,9 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op,
   if (auto geluOp = dyn_cast<GeluOp>(op); geluOp) {
     return createOperation(cache, createEltwiseOp(cache, geluOp), debugString);
   }
+  if (auto arangeOp = dyn_cast<ArangeOp>(op); arangeOp) {
+    return createOperation(cache, createOp(cache, arangeOp), debugString);
+  }
 
   llvm_unreachable("unhandled op in emitTTNNOperation");
 }
diff --git a/runtime/lib/ttnn/operations/CMakeLists.txt b/runtime/lib/ttnn/operations/CMakeLists.txt
index 4edc4780b9..38115803f0 100644
--- a/runtime/lib/ttnn/operations/CMakeLists.txt
+++ b/runtime/lib/ttnn/operations/CMakeLists.txt
@@ -5,6 +5,7 @@ set(TTNN_OPS_SRCS
   ${CMAKE_CURRENT_SOURCE_DIR}/include/tt/runtime/ttnn/operations/eltwise/ternary/utils.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/ccl/all_gather.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/conv/conv2d.cpp
+  ${CMAKE_CURRENT_SOURCE_DIR}/creation/arange.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/creation/empty.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/creation/full.cpp
   ${CMAKE_CURRENT_SOURCE_DIR}/data_movement/concat.cpp
diff --git a/runtime/lib/ttnn/operations/creation/arange.cpp b/runtime/lib/ttnn/operations/creation/arange.cpp
new file mode 100644
index 0000000000..953040166e
--- /dev/null
+++ b/runtime/lib/ttnn/operations/creation/arange.cpp
@@ -0,0 +1,34 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#include "arange.h"
+#include "tt/runtime/detail/logger.h"
+#include "tt/runtime/ttnn/operations/utils.h"
+#include <functional>
+#include <variant>
+
+namespace tt::runtime::ttnn::operations::creation {
+void run(const ::tt::target::ttnn::ArangeOp *op, ProgramContext &context) {
+  ProgramTensorPool &tensorPool = context.getTensorPool();
+  ::ttnn::Tensor out;
+
+  if (op->device()) {
+    // ttnn::arange supports no device (host) and single device
+    DeviceVariant targetDevice =
+        context.getTargetDevice(op->device()->global_id());
+
+    LOG_ASSERT(std::holds_alternative<std::reference_wrapper<::ttnn::Device>>(
+                   targetDevice),
+               "ttnn::arange does not support MeshDevice.");
+
+    out = ::ttnn::arange(
+        op->start(), op->end(), op->step(), utils::getDataType(op->out()),
+        std::get<std::reference_wrapper<::ttnn::Device>>(targetDevice));
+  } else {
+    out = ::ttnn::arange(op->start(), op->end(), op->step(),
+                         utils::getDataType(op->out()));
+  }
+  utils::updateTensorPool(tensorPool, out, op->out()->global_id());
+}
+} // namespace tt::runtime::ttnn::operations::creation
diff --git a/runtime/lib/ttnn/operations/creation/arange.h b/runtime/lib/ttnn/operations/creation/arange.h
new file mode 100644
index 0000000000..157ee2dc61
--- /dev/null
+++ b/runtime/lib/ttnn/operations/creation/arange.h
@@ -0,0 +1,17 @@
+// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
+//
+// SPDX-License-Identifier: Apache-2.0
+
+#ifndef RUNTIME_LIB_TTNN_OPERATIONS_CREATION_ARANGE_H
+#define RUNTIME_LIB_TTNN_OPERATIONS_CREATION_ARANGE_H
+
+#include "tt/runtime/ttnn/types.h"
+#include "ttmlir/Target/TTNN/program_generated.h"
+
+namespace tt::runtime::ttnn::operations::creation {
+
+void run(const ::tt::target::ttnn::ArangeOp *op, ProgramContext &context);
+
+} // namespace tt::runtime::ttnn::operations::creation
+
+#endif
diff --git a/runtime/lib/ttnn/program.cpp b/runtime/lib/ttnn/program.cpp
index 8cfa013891..f372f82dc3 100644
--- a/runtime/lib/ttnn/program.cpp
+++ b/runtime/lib/ttnn/program.cpp
@@ -4,6 +4,7 @@
 #include "operations/ccl/all_gather.h"
 #include "operations/context/get_device.h"
 #include "operations/conv/conv2d.h"
+#include "operations/creation/arange.h"
 #include "operations/creation/empty.h"
 #include "operations/creation/full.h"
 #include "operations/data_movement/concat.h"
@@ -186,6 +187,9 @@ void ProgramExecutor::runOperation(const ::tt::target::ttnn::Operation *op) {
   case ::tt::target::ttnn::OpType::AllGatherOp: {
     return operations::ccl::run(op->type_as_AllGatherOp(), context);
   }
+  case ::tt::target::ttnn::OpType::ArangeOp: {
+    return operations::creation::run(op->type_as_ArangeOp(), context);
+  }
   default: {
     LOG_FATAL("Unsupported operation type");
   }
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir
new file mode 100644
index 0000000000..43241ac6f0
--- /dev/null
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir
@@ -0,0 +1,11 @@
+// REQUIRES: stablehlo
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module @jit_dnamic_iota attributes {} {
+  func.func public @test_dynamic_iota() -> tensor<1x32x128x128xf32> {
+    // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]]
+    %output_shape = stablehlo.constant dense<[1, 32, 128, 128]> : tensor<4xi64>
+    %0 = "stablehlo.dynamic_iota"(%output_shape) {iota_dimension = 1: i64} : (tensor<4xi64>) -> tensor<1x32x128x128xf32>
+    return %0 : tensor<1x32x128x128xf32>
+  }
+}
diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir
new file mode 100644
index 0000000000..857a621bb0
--- /dev/null
+++ b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir
@@ -0,0 +1,10 @@
+// REQUIRES: stablehlo
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module @jit_iota attributes {} {
+  func.func public @test_iota() -> tensor<1x32x128x128xf32> {
+    // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]]
+    %0 = "stablehlo.iota"() {iota_dimension = 1: i64} : () -> tensor<1x32x128x128xf32>
+    return %0 : tensor<1x32x128x128xf32>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir
new file mode 100644
index 0000000000..6f72e56f17
--- /dev/null
+++ b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir
@@ -0,0 +1,11 @@
+// RUN: ttmlir-opt --ttir-to-ttir-decomposition %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> {
+    // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttir.transpose"[[C:.*]]
+    // CHECK: %[[C:.*]] = "ttir.broadcast"[[C:.*]]
+    %1 = "ttir.arange"() <{start = 0: si64, end = 32: si64, step = 1: si64, arange_dimension = 1: i64}> : () -> tensor<1x32x128x128xf32>
+    return %1 : tensor<1x32x128x128xf32>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTIR/decompositions/select_decomposition_tests.mlir b/test/ttmlir/Dialect/TTIR/Decomposition/select_decomposition_tests.mlir
similarity index 100%
rename from test/ttmlir/Dialect/TTIR/decompositions/select_decomposition_tests.mlir
rename to test/ttmlir/Dialect/TTIR/Decomposition/select_decomposition_tests.mlir
diff --git a/test/ttmlir/Dialect/TTNN/arange/arange_tests_negative.mlir b/test/ttmlir/Dialect/TTNN/arange/arange_tests_negative.mlir
new file mode 100644
index 0000000000..dc3f09fbaf
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/arange/arange_tests_negative.mlir
@@ -0,0 +1,12 @@
+// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s
+// Negative tests for matmul operation
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> {
+    // CHECK: error: 'ttir.arange' op Output tensor shape must be 16 at dim 1 (since start=0, end=32, step=2), but got 32
+    %1 = "ttir.arange"() <{start = 0: si64, end = 32: si64, step = 2: si64, arange_dimension = 1: i64}> : () -> tensor<1x32x128x128xf32>
+    %dps = tensor.empty() : tensor<1x32x128x128xf32>
+    %2 = "ttir.multiply"(%arg0, %1, %dps) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32>
+    return %2 : tensor<1x32x128x128xf32>
+  }
+}
diff --git a/test/ttmlir/Dialect/TTNN/arange/arange_tests_positive.mlir b/test/ttmlir/Dialect/TTNN/arange/arange_tests_positive.mlir
new file mode 100644
index 0000000000..4c04e138bb
--- /dev/null
+++ b/test/ttmlir/Dialect/TTNN/arange/arange_tests_positive.mlir
@@ -0,0 +1,11 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> {
+    // CHECK: %[[C:.*]] = "ttnn.arange"[[C:.*]]
+    %1 = "ttir.arange"() <{start = 0: si64, end = 32: si64, step = 1: si64, arange_dimension = 1: i64}> : () -> tensor<1x32x128x128xf32>
+    %dps = tensor.empty() : tensor<1x32x128x128xf32>
+    %2 = "ttir.multiply"(%arg0, %1, %dps) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32>
+    return %2 : tensor<1x32x128x128xf32>
+  }
+}
diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim2.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim2.mlir
new file mode 100644
index 0000000000..d911ec6fe2
--- /dev/null
+++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim2.mlir
@@ -0,0 +1,15 @@
+// REQUIRES: stablehlo
+// RUN: rm -rf %t.ttnn
+// RUN: rm -rf %t.mlir
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \
+// RUN:     ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// RUN: FileCheck --input-file=%t.mlir %s
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> {
+    // CHECK: ttnn.arange
+    %0 = "stablehlo.iota"() {iota_dimension = 2: i64} : () -> tensor<1x1x32x128xbf16>
+    %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16>
+    return %2 : tensor<1x1x32x128xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim3.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim3.mlir
new file mode 100644
index 0000000000..01aa0e91b3
--- /dev/null
+++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim3.mlir
@@ -0,0 +1,16 @@
+// REQUIRES: stablehlo
+// RUN: rm -rf %t.ttnn
+// RUN: rm -rf %t.mlir
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \
+// RUN:     ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// RUN: FileCheck --input-file=%t.mlir %s
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> {
+    %output_shape = stablehlo.constant dense<[1, 1, 32, 128]> : tensor<4xi64>
+    // CHECK: ttnn.arange
+    %0 = "stablehlo.dynamic_iota"(%output_shape) {iota_dimension = 3: i64} : (tensor<4xi64>) -> tensor<1x1x32x128xbf16>
+    %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16>
+    return %2 : tensor<1x1x32x128xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim2.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim2.mlir
new file mode 100644
index 0000000000..d911ec6fe2
--- /dev/null
+++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim2.mlir
@@ -0,0 +1,15 @@
+// REQUIRES: stablehlo
+// RUN: rm -rf %t.ttnn
+// RUN: rm -rf %t.mlir
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \
+// RUN:     ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// RUN: FileCheck --input-file=%t.mlir %s
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> {
+    // CHECK: ttnn.arange
+    %0 = "stablehlo.iota"() {iota_dimension = 2: i64} : () -> tensor<1x1x32x128xbf16>
+    %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16>
+    return %2 : tensor<1x1x32x128xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim3.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim3.mlir
new file mode 100644
index 0000000000..a231432abc
--- /dev/null
+++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim3.mlir
@@ -0,0 +1,15 @@
+// REQUIRES: stablehlo
+// RUN: rm -rf %t.ttnn
+// RUN: rm -rf %t.mlir
+// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \
+// RUN:     ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+// RUN: FileCheck --input-file=%t.mlir %s
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> {
+    // CHECK: ttnn.arange
+    %0 = "stablehlo.iota"() {iota_dimension = 3: i64} : () -> tensor<1x1x32x128xbf16>
+    %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16>
+    return %2 : tensor<1x1x32x128xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim2.mlir b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim2.mlir
new file mode 100644
index 0000000000..ec509a1b6f
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim2.mlir
@@ -0,0 +1,13 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> {
+    // CHECK: %[[C:.*]] = "ttnn.arange"[[C:.*]]
+    %0 = "ttir.arange"() <{start = 0: si64, end = 64: si64, step = 2: si64, arange_dimension = 2: i64}> : () -> tensor<1x1x32x128xbf16>
+    %1 = tensor.empty() : tensor<1x1x32x128xbf16>
+    %2 = "ttir.multiply"(%arg0, %0, %1) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16>
+    return %2 : tensor<1x1x32x128xbf16>
+  }
+}
diff --git a/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim3.mlir b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim3.mlir
new file mode 100644
index 0000000000..196e757096
--- /dev/null
+++ b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim3.mlir
@@ -0,0 +1,13 @@
+// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir
+// RUN: FileCheck %s --input-file=%t.mlir
+// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn
+#any_device = #tt.operand_constraint<dram|l1|scalar|tile|any_device|any_device_tile>
+module attributes {} {
+  func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> {
+    // CHECK: %[[C:.*]] = "ttnn.arange"[[C:.*]]
+    %0 = "ttir.arange"() <{start = 0: si64, end = 128: si64, step = 1: si64, arange_dimension = 3: i64}> : () -> tensor<1x1x32x128xbf16>
+    %1 = tensor.empty() : tensor<1x1x32x128xbf16>
+    %2 = "ttir.multiply"(%arg0, %0, %1) <{operandSegmentSizes = array<i32: 2, 1>, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16>
+    return %2 : tensor<1x1x32x128xbf16>
+  }
+}