diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td index 8782f63ae1..54fc4c0970 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td @@ -1048,6 +1048,48 @@ def TTIR_ClampOp : TTIR_DPSOp<"clamp"> { let hasVerifier = 1; } +def TTIR_ArangeOp : TTIR_Op<"arange"> { + let summary = "Arange operation."; + let description = [{ + Tensor arange operation. + + Produces a tensor with values from `start` to `end` (exclusive) with a step size of `step`, along the dimension specified by `arange_dimension`. + + Examples: + %0 = "ttir.arange"() {start = 0 : i64, end = 5 : i64 step = 1 : i64, arange_dimension = 0 : i64} : () -> tensor<5xi64> + // %0: [0, 1, 2, 3, 4] + + %1 = "ttir.arange"() {start = 0 : i64, end = 10 : i64, step = 2 : i64, arange_dimension = 0 : i64} : () -> tensor<5xf32> + // %1: [0.0, 2.0, 4.0, 6.0, 8.0] + + %2 = "ttir.arange"() {start = 0 : i64, end = 5 : i64, step = 1 : i64, arange_dimension = 0 : i64} : () -> tensor<5x3xi64> + // %2: [ + [0, 0, 0], + [1, 1, 1], + [2, 2, 2], + [3, 3, 3], + [4, 4, 4] + ] + + %3 = "ttir.arange"() {start = 0 : i64, end = 3 : i64, step = 1 : i64, arange_dimension = 1 : i64} : () -> tensor<5x3xi64> + // %3: [ + [0, 1, 2], + [0, 1, 2], + [0, 1, 2], + [0, 1, 2], + [0, 1, 2] + ] + }]; + + let arguments = (ins SI64Attr:$start, + SI64Attr:$end, + SI64Attr:$step, + I64Attr:$arange_dimension); + + let results = (outs AnyRankedTensor:$result); + let hasVerifier = 1; +} + def TTIR_ConstantOp : TTIR_Op<"constant", [ConstantLike, AllShapesMatch<["value", "result"]>]> { let summary = "Constant op."; diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td index 910ed7dfd9..5cfaa6bac2 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td @@ -759,6 +759,30 @@ def TTNN_EmptyOp : TTNN_Op<"empty", [NoMemoryEffect]> { let hasVerifier = 1; } +def TTNN_ArangeOp : TTNN_Op<"arange"> { + let summary = "Arange operation."; + let description = [{ + Tensor arange operation. + + Produces a (1, 1, 1, N)-shaped tensor with values from `start` to `end` (exclusive) with a step size of `step`. + + Examples: + %0 = "ttnn.arange"() {start = 0 : i64, end = 5 : i64 step = 1 : i64} : () -> tensor<1x1x1x5xi64> + // %0: [[[[0, 1, 2, 3, 4]]]] + + %1 = "ttnn.arange"() {start = 0 : i64, end = 10 : i64, step = 2 : i64} : () -> tensor<1x1x1x5xf32> + // %1: [[[[0.0, 2.0, 4.0, 6.0, 8.0]]]] + }]; + + let arguments = (ins I64Attr:$start, + I64Attr:$end, + I64Attr:$step, + TT_Device:$device); + + let results = (outs AnyRankedTensor:$result); + let hasVerifier = 1; +} + def TTNN_FullOp : TTNN_Op<"full"> { let summary = "Full op."; let description = [{ diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs index ec493e6496..2d01a9a68b 100644 --- a/include/ttmlir/Target/TTNN/program.fbs +++ b/include/ttmlir/Target/TTNN/program.fbs @@ -61,6 +61,15 @@ table FullOp { out: tt.target.TensorRef; } +table ArangeOp { + start: float; + end: float; + step: float; + dtype: DataType; + out: tt.target.TensorRef; + device: tt.target.DeviceRef; +} + enum EltwiseOpType: uint32 { Add = 0, Multiply = 1, @@ -261,6 +270,7 @@ union OpType { MaxPool2dOp, DeallocateOp, AllGatherOp, + ArangeOp, } table Operation { diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp index 28bf4f71de..8db1b44e69 100644 --- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp +++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp @@ -1201,6 +1201,36 @@ class StableHLOToTTIRGatherOpConversionPattern } }; +template +class StableHLOToTTIROpIotaOpConversionPattern + : public OpConversionPattern { + + using OpConversionPattern::OpConversionPattern; + +public: + LogicalResult + matchAndRewrite(SrcIotaOp srcOp, Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + RankedTensorType outputType = mlir::cast( + this->getTypeConverter()->convertType(srcOp.getResult().getType())); + rewriter.replaceOpWithNewOp( + srcOp, outputType, 0, outputType.getDimSize(adaptor.getIotaDimension()), + 1, adaptor.getIotaDimension()); + + // Dynamic Iota has an output_shape attribute but the output shape is + // already known by the result type This is to remove the operand that will + // become dead code + for (auto operand : adaptor.getOperands()) { + if (operand.getDefiningOp()) { + rewriter.eraseOp(operand.getDefiningOp()); + } + } + + return success(); + } +}; + void addElementwiseUnaryOpsConversionPatterns(MLIRContext *ctx, RewritePatternSet &patterns, TypeConverter &typeConverter) { @@ -1365,6 +1395,15 @@ void addGatherOpConversionPattern(MLIRContext *ctx, RewritePatternSet &patterns, patterns.add(typeConverter, ctx); } +void addIotaOpConversionPattern(MLIRContext *ctx, RewritePatternSet &patterns, + TypeConverter &typeConverter) { + patterns.add>( + typeConverter, ctx); + patterns + .add>( + typeConverter, ctx); +} + } // namespace namespace mlir::tt { @@ -1389,6 +1428,7 @@ void populateStableHLOToTTIRPatterns(MLIRContext *ctx, addSliceOpConversionPattern(ctx, patterns, typeConverter); addClampOpConversionPattern(ctx, patterns, typeConverter); addGatherOpConversionPattern(ctx, patterns, typeConverter); + addIotaOpConversionPattern(ctx, patterns, typeConverter); } } // namespace mlir::tt diff --git a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp index 9c5afd41e6..ed7eb0be82 100644 --- a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp +++ b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecomposition.cpp @@ -897,6 +897,143 @@ struct SelectToSliceConversionPattern } }; +/* + * This pattern rewrites ArangeOp by forcing the arange_dimension to be + * rightmost dimension of the output tensor. This is done by replacing the + * ArangeOp with a new one that has this property, and then transposing out last + * dimension to the dimension specified by the original ArangeOp, and also + * inserting a reshape to match the rank of the intended output and broadcasts + * to repeat the data along the other dimensions. + * + * The ArangeOp that is generated here will be equivalent to how ttnn::ArangeOp + * behaves. The reason this pass is done in TTIR rather than generated when we + * want to lower to TTNN is because in the future we will want to consteval the + * ArangeOp, but have the option to not include repeated data in the constant + * tensor and broadcast at runtime instead. Consteval will be implemented for + * the TTIR dialect only and so this explication of the TMs implicit in ArangeOp + * must be done in TTIR. + */ +struct ArangeForceLastDimensionPattern + : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ttir::ArangeOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + const RankedTensorType outputType = + mlir::cast(op.getResult().getType()); + + int64_t arangeDimension = adaptor.getArangeDimension(); + int64_t arangeDimensionNegative = arangeDimension - outputType.getRank(); + int64_t start = adaptor.getStart(); + int64_t end = adaptor.getEnd(); + int64_t step = adaptor.getStep(); + + int64_t arangeLength = (end - start) / step; + + ArrayRef ttnnShape = {1, 1, 1, arangeLength}; + if (ttnnShape == outputType.getShape()) { + return success(); + } + + RankedTensorType arangeOutputType = RankedTensorType::get( + SmallVector({1, 1, 1, arangeLength}), + outputType.getElementType(), outputType.getEncoding()); + + Value output = + rewriter + .create( // perform arange on the last dimension to + // match how ttnn behaves + op.getLoc(), arangeOutputType, start, end, step, 3) + .getResult(); + + std::vector outputShape = arangeOutputType.getShape().vec(); + // Must transpose the output so that the data changes along the axis defined + // by arangeDimension + if (arangeDimensionNegative != -1) { + std::vector transposeShape = outputShape; + transposeShape[arangeDimensionNegative + transposeShape.size()] = + arangeLength; + transposeShape[arangeOutputType.getRank() - 1] = 1; + RankedTensorType transposeType = RankedTensorType::get( + transposeShape, arangeOutputType.getElementType(), + arangeOutputType.getEncoding()); + + tensor::EmptyOp dpsOutput = rewriter.create( + op.getLoc(), transposeShape, transposeType.getElementType()); + + output = rewriter.create( + op.getLoc(), transposeType, output, dpsOutput, + arangeDimensionNegative + transposeShape.size(), + arangeOutputType.getRank() - 1, + rewriter.getArrayAttr(SmallVector( + 2, rewriter.getAttr( + OperandConstraint::AnyDeviceTile)))); + + outputShape = transposeShape; + } + + // Must match up the rank of the output with the rank of the intended output + // from the original arange, with the arangeDimension in the correct + // position + if (outputType.getRank() != static_cast(outputShape.size())) { + std::vector reshapeShape; + for (uint32_t i = 0; i < outputType.getRank(); i++) { + i == arangeDimension ? reshapeShape.push_back(end) + : reshapeShape.push_back(1); + } + + RankedTensorType reshapeType = RankedTensorType::get( + SmallVector(reshapeShape.begin(), reshapeShape.end()), + outputType.getElementType(), outputType.getEncoding()); + tensor::EmptyOp dpsOutput = rewriter.create( + op.getLoc(), + SmallVector(reshapeShape.begin(), reshapeShape.end()), + reshapeType.getElementType()); + output = rewriter.create( + op.getLoc(), reshapeType, output, dpsOutput, + rewriter.getI32ArrayAttr(reshapeShape), + rewriter.getArrayAttr(SmallVector( + 2, rewriter.getAttr( + OperandConstraint::AnyDeviceTile)))); + + outputShape = + std::vector(reshapeShape.begin(), reshapeShape.end()); + } + + // Must broadcast the rest of the dimensions + SmallVector broadcastDims; + for (uint32_t i = 0; i < outputShape.size(); i++) { + if (i != arangeDimension && outputShape[i] != outputType.getShape()[i]) { + outputShape[i] = outputType.getShape()[i]; + broadcastDims.push_back(rewriter.getI64IntegerAttr(i)); + } + } + if (!broadcastDims.empty()) { + RankedTensorType broadcastType = RankedTensorType::get( + outputShape, outputType.getElementType(), outputType.getEncoding()); + + tensor::EmptyOp dpsOutput = rewriter.create( + op.getLoc(), outputShape, outputType.getElementType()); + + output = rewriter.create( + op.getLoc(), broadcastType, output, dpsOutput, + rewriter.getArrayAttr(broadcastDims), + rewriter.getArrayAttr(SmallVector( + 2, rewriter.getAttr( + OperandConstraint::AnyDeviceTile)))); + + assert(mlir::cast(output.getType()).getShape() == + outputType.getShape() && + "Output shape must match the shape of the input tensor"); + } + rewriter.replaceOp(op, output); + return success(); + } +}; + void populateTTIRToTTIRDecompositionPatterns(MLIRContext *ctx, RewritePatternSet &patterns, TypeConverter &typeConverter) { @@ -906,6 +1043,7 @@ void populateTTIRToTTIRDecompositionPatterns(MLIRContext *ctx, patterns.add(typeConverter, ctx); patterns.add(typeConverter, ctx); patterns.add(typeConverter, ctx); + patterns.add(typeConverter, ctx); } } // namespace mlir::tt diff --git a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp index d91084f59d..e244eea8fb 100644 --- a/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp +++ b/lib/Conversion/TTIRToTTIRDecomposition/TTIRToTTIRDecompositionPass.cpp @@ -53,6 +53,14 @@ struct TTIRToTTIRDecompositionPass target.addIllegalOp(); target.addIllegalOp(); + // These are the ops that must satisfy some conditions after this pass + target.addDynamicallyLegalOp([&](ttir::ArangeOp op) { + auto shape = op.getResult().getType().getShape(); + return (static_cast(op.getArangeDimension()) == 3 && + shape.size() == 4 && shape[0] == 1 && shape[1] == 1 && + shape[2] == 1); + }); + TypeConverter typeConverter; // All types map 1:1. typeConverter.addConversion([](Type type) { return type; }); diff --git a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp index 12e29a9609..10aaa7a731 100644 --- a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp +++ b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp @@ -908,6 +908,34 @@ class AllGatherOpConversionPattern } }; +class ArangeOpConversionPattern : public OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(ttir::ArangeOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + RankedTensorType outputType = + mlir::cast(op.getResult().getType()); + assert(static_cast(adaptor.getArangeDimension()) == + outputType.getRank() - 1 && + "Arange dimension must be the final dimension of the output tensor " + "to convert to ttnn.arange"); + + auto newOutputType = RankedTensorType::get(outputType.getShape(), + outputType.getElementType(), + outputType.getEncoding()); + + Value device = getOrInsertDevice(rewriter, op); + rewriter.replaceOpWithNewOp( + op, newOutputType, adaptor.getStart(), adaptor.getEnd(), + adaptor.getStep(), device); + + return success(); + } +}; + namespace mlir::tt { void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns, @@ -973,7 +1001,8 @@ void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns, Conv2dOpConversionPattern, MaxPool2dOpConversionPattern, SubtractOpConversionPattern, - AllGatherOpConversionPattern + AllGatherOpConversionPattern, + ArangeOpConversionPattern >(typeConverter, ctx); // ANCHOR_END: op_rewriter_pattern_set // clang-format on diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp index 92862cd9da..d67b0b5c1d 100644 --- a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp +++ b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp @@ -668,8 +668,8 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx, // Tensor ops // patterns - .add>( - typeConverter, ctx); + .add, + DefaultOpConversionPattern>(typeConverter, ctx); // Eltwise unary ops // diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp index 5946cb2fe3..a5626d19dc 100644 --- a/lib/Dialect/TTIR/IR/TTIROps.cpp +++ b/lib/Dialect/TTIR/IR/TTIROps.cpp @@ -45,6 +45,37 @@ ::mlir::LogicalResult mlir::tt::ttir::ClampOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// ArangeOp +//===----------------------------------------------------------------------===// + +::mlir::LogicalResult mlir::tt::ttir::ArangeOp::verify() { + int64_t start = getStart(); + int64_t end = getEnd(); + int64_t step = getStep(); + + if (step == 0) { + return emitOpError("Step value cannot be zero"); + } + + int64_t numValues = (end - start) / step; + + if (numValues <= 0) { + return emitOpError() << "Invalid range: start=" << start << ", end=" << end + << ", step=" << step; + } + + if (numValues != getType().getDimSize(getArangeDimension())) { + return emitOpError() << "Output tensor shape must be " << numValues + << " at dim " << getArangeDimension() + << " (since start=" << start << ", end=" << end + << ", step=" << step << "), but got " + << getType().getDimSize(getArangeDimension()); + } + + return success(); +} + //===----------------------------------------------------------------------===// // ConstantOp //===----------------------------------------------------------------------===// diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp index 4abd74d62e..45d7a112ca 100644 --- a/lib/Dialect/TTNN/IR/TTNNOps.cpp +++ b/lib/Dialect/TTNN/IR/TTNNOps.cpp @@ -140,6 +140,32 @@ ::mlir::LogicalResult mlir::tt::ttnn::MaxPool2dOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// ArangeOp +//===----------------------------------------------------------------------===// + +::mlir::LogicalResult mlir::tt::ttnn::ArangeOp::verify() { + + if (getStep() == 0) { + return emitOpError("Step cannot be zero."); + } + + int64_t numValues = (getEnd() - getStart()) / getStep(); + + if (numValues <= 0) { + return emitOpError("Invalid range: start=") + << getStart() << ", end=" << getEnd() << ", step=" << getStep(); + } + + std::vector expectedShape = {1, 1, 1, numValues}; + if (getType().getShape().vec() != expectedShape) { + return emitOpError() << "Output tensor shape must be " << expectedShape + << ", but got " << getType().getShape(); + } + + return success(); +} + //===----------------------------------------------------------------------===// // EmptyOp //===----------------------------------------------------------------------===// diff --git a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp index eebfdc13f3..2d4a2ff8f5 100644 --- a/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp +++ b/lib/Dialect/TTNN/Transforms/TTNNLayout.cpp @@ -214,6 +214,28 @@ createToLayoutOp(PatternRewriter &rewriter, Location loc, Value input, .getResult(); } + // If the input tensor is an arange, we want to set the desired layout just + // like the other creation ops. However, a caveat is that in ttnn, arange is + // hardcoded to be ROW_MAJOR. So we must ensure that the layout we assign to + // it is ROW_MAJOR - and to make it tile layout we still must insert + // ToLayoutOp on its output. We can do this by setting the element type to + // ty.getElementType() in case desiredElementType is a TileType. + ttir::ArangeOp existingArange = input.getDefiningOp(); + if (existingArange) { + TTNNLayoutAttr arangeLayout = rewriter.getAttr( + ty.getShape(), ty.getElementType(), desiredBufferType, + tensorConfig.getGrid(), desiredMemLayout, g_defaultCollapseDims); + input = + rewriter + .replaceOpWithNewOp( + existingArange, + mlir::RankedTensorType::get(ty.getShape(), ty.getElementType(), + arangeLayout), + existingArange.getStart(), existingArange.getEnd(), + existingArange.getStep(), existingArange.getArangeDimension()) + .getResult(); + } + // If the input tensor is not a constant or empty tensor, we need to create a // new tensor with the desired layout which will be used as the output of the // ToLayoutOp @@ -281,6 +303,13 @@ class TTNNLayoutDPSOperandsRewriter continue; } + // If the operand is a BroadcastOp or a ToLayout op do not put a + // ToLayoutOp on its output + if (operand.get().getDefiningOp() || + operand.get().getDefiningOp()) { + continue; + } + // Read operand constrait for current operand OperandConstraint operandConstraint = mlir::cast( diff --git a/lib/Target/TTNN/TTNNToFlatbuffer.cpp b/lib/Target/TTNN/TTNNToFlatbuffer.cpp index 30b83014d4..5727294601 100644 --- a/lib/Target/TTNN/TTNNToFlatbuffer.cpp +++ b/lib/Target/TTNN/TTNNToFlatbuffer.cpp @@ -333,6 +333,20 @@ createOp(FlatbufferObjectCache &cache, FullOp op) { kHostAllocatedSize)); } +::flatbuffers::Offset<::tt::target::ttnn::ArangeOp> +createOp(FlatbufferObjectCache &cache, ArangeOp op) { + + auto dtype = elementTypeToDataType(op.getResult().getType().getElementType()); + auto device = getOperandThroughDPSOps(op.getDevice()); + return ::tt::target::ttnn::CreateArangeOp( + *cache.fbb, static_cast(op.getStart()), + static_cast(op.getEnd()), static_cast(op.getStep()), + toFlatbuffer(cache, dtype), + cache.getOrCreate(op.getResult(), tensorValueToFlatbuffer, + kHostAllocatedAddress, kHostAllocatedSize), + cache.at<::tt::target::DeviceRef>(device)); +} + // ANCHOR: adding_an_op_matmul_serialize_to_binary ::flatbuffers::Offset<::tt::target::ttnn::MatmulOp> createOp(FlatbufferObjectCache &cache, MatmulOp op) { @@ -869,6 +883,9 @@ emitTTNNOperation(FlatbufferObjectCache &cache, Operation *op, if (auto geluOp = dyn_cast(op); geluOp) { return createOperation(cache, createEltwiseOp(cache, geluOp), debugString); } + if (auto arangeOp = dyn_cast(op); arangeOp) { + return createOperation(cache, createOp(cache, arangeOp), debugString); + } llvm_unreachable("unhandled op in emitTTNNOperation"); } diff --git a/runtime/lib/ttnn/operations/CMakeLists.txt b/runtime/lib/ttnn/operations/CMakeLists.txt index 4edc4780b9..38115803f0 100644 --- a/runtime/lib/ttnn/operations/CMakeLists.txt +++ b/runtime/lib/ttnn/operations/CMakeLists.txt @@ -5,6 +5,7 @@ set(TTNN_OPS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/include/tt/runtime/ttnn/operations/eltwise/ternary/utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ccl/all_gather.cpp ${CMAKE_CURRENT_SOURCE_DIR}/conv/conv2d.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/creation/arange.cpp ${CMAKE_CURRENT_SOURCE_DIR}/creation/empty.cpp ${CMAKE_CURRENT_SOURCE_DIR}/creation/full.cpp ${CMAKE_CURRENT_SOURCE_DIR}/data_movement/concat.cpp diff --git a/runtime/lib/ttnn/operations/creation/arange.cpp b/runtime/lib/ttnn/operations/creation/arange.cpp new file mode 100644 index 0000000000..953040166e --- /dev/null +++ b/runtime/lib/ttnn/operations/creation/arange.cpp @@ -0,0 +1,34 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include "arange.h" +#include "tt/runtime/detail/logger.h" +#include "tt/runtime/ttnn/operations/utils.h" +#include +#include + +namespace tt::runtime::ttnn::operations::creation { +void run(const ::tt::target::ttnn::ArangeOp *op, ProgramContext &context) { + ProgramTensorPool &tensorPool = context.getTensorPool(); + ::ttnn::Tensor out; + + if (op->device()) { + // ttnn::arange supports no device (host) and single device + DeviceVariant targetDevice = + context.getTargetDevice(op->device()->global_id()); + + LOG_ASSERT(std::holds_alternative>( + targetDevice), + "ttnn::arange does not support MeshDevice."); + + out = ::ttnn::arange( + op->start(), op->end(), op->step(), utils::getDataType(op->out()), + std::get>(targetDevice)); + } else { + out = ::ttnn::arange(op->start(), op->end(), op->step(), + utils::getDataType(op->out())); + } + utils::updateTensorPool(tensorPool, out, op->out()->global_id()); +} +} // namespace tt::runtime::ttnn::operations::creation diff --git a/runtime/lib/ttnn/operations/creation/arange.h b/runtime/lib/ttnn/operations/creation/arange.h new file mode 100644 index 0000000000..157ee2dc61 --- /dev/null +++ b/runtime/lib/ttnn/operations/creation/arange.h @@ -0,0 +1,17 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef RUNTIME_LIB_TTNN_OPERATIONS_CREATION_ARANGE_H +#define RUNTIME_LIB_TTNN_OPERATIONS_CREATION_ARANGE_H + +#include "tt/runtime/ttnn/types.h" +#include "ttmlir/Target/TTNN/program_generated.h" + +namespace tt::runtime::ttnn::operations::creation { + +void run(const ::tt::target::ttnn::ArangeOp *op, ProgramContext &context); + +} // namespace tt::runtime::ttnn::operations::creation + +#endif diff --git a/runtime/lib/ttnn/program.cpp b/runtime/lib/ttnn/program.cpp index 8cfa013891..f372f82dc3 100644 --- a/runtime/lib/ttnn/program.cpp +++ b/runtime/lib/ttnn/program.cpp @@ -4,6 +4,7 @@ #include "operations/ccl/all_gather.h" #include "operations/context/get_device.h" #include "operations/conv/conv2d.h" +#include "operations/creation/arange.h" #include "operations/creation/empty.h" #include "operations/creation/full.h" #include "operations/data_movement/concat.h" @@ -186,6 +187,9 @@ void ProgramExecutor::runOperation(const ::tt::target::ttnn::Operation *op) { case ::tt::target::ttnn::OpType::AllGatherOp: { return operations::ccl::run(op->type_as_AllGatherOp(), context); } + case ::tt::target::ttnn::OpType::ArangeOp: { + return operations::creation::run(op->type_as_ArangeOp(), context); + } default: { LOG_FATAL("Unsupported operation type"); } diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir new file mode 100644 index 0000000000..43241ac6f0 --- /dev/null +++ b/test/ttmlir/Conversion/StableHLOToTTIR/dynamic_iota_op.mlir @@ -0,0 +1,11 @@ +// REQUIRES: stablehlo +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s +#any_device = #tt.operand_constraint +module @jit_dnamic_iota attributes {} { + func.func public @test_dynamic_iota() -> tensor<1x32x128x128xf32> { + // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]] + %output_shape = stablehlo.constant dense<[1, 32, 128, 128]> : tensor<4xi64> + %0 = "stablehlo.dynamic_iota"(%output_shape) {iota_dimension = 1: i64} : (tensor<4xi64>) -> tensor<1x32x128x128xf32> + return %0 : tensor<1x32x128x128xf32> + } +} diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir new file mode 100644 index 0000000000..857a621bb0 --- /dev/null +++ b/test/ttmlir/Conversion/StableHLOToTTIR/iota_op.mlir @@ -0,0 +1,10 @@ +// REQUIRES: stablehlo +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s +#any_device = #tt.operand_constraint +module @jit_iota attributes {} { + func.func public @test_iota() -> tensor<1x32x128x128xf32> { + // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]] + %0 = "stablehlo.iota"() {iota_dimension = 1: i64} : () -> tensor<1x32x128x128xf32> + return %0 : tensor<1x32x128x128xf32> + } +} diff --git a/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir new file mode 100644 index 0000000000..6f72e56f17 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/Decomposition/arange_decomposition.mlir @@ -0,0 +1,11 @@ +// RUN: ttmlir-opt --ttir-to-ttir-decomposition %s | FileCheck %s +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> { + // CHECK: %[[C:.*]] = "ttir.arange"[[C:.*]] + // CHECK: %[[C:.*]] = "ttir.transpose"[[C:.*]] + // CHECK: %[[C:.*]] = "ttir.broadcast"[[C:.*]] + %1 = "ttir.arange"() <{start = 0: si64, end = 32: si64, step = 1: si64, arange_dimension = 1: i64}> : () -> tensor<1x32x128x128xf32> + return %1 : tensor<1x32x128x128xf32> + } +} diff --git a/test/ttmlir/Dialect/TTIR/decompositions/select_decomposition_tests.mlir b/test/ttmlir/Dialect/TTIR/Decomposition/select_decomposition_tests.mlir similarity index 100% rename from test/ttmlir/Dialect/TTIR/decompositions/select_decomposition_tests.mlir rename to test/ttmlir/Dialect/TTIR/Decomposition/select_decomposition_tests.mlir diff --git a/test/ttmlir/Dialect/TTNN/arange/arange_tests_negative.mlir b/test/ttmlir/Dialect/TTNN/arange/arange_tests_negative.mlir new file mode 100644 index 0000000000..dc3f09fbaf --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/arange/arange_tests_negative.mlir @@ -0,0 +1,12 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for matmul operation +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> { + // CHECK: error: 'ttir.arange' op Output tensor shape must be 16 at dim 1 (since start=0, end=32, step=2), but got 32 + %1 = "ttir.arange"() <{start = 0: si64, end = 32: si64, step = 2: si64, arange_dimension = 1: i64}> : () -> tensor<1x32x128x128xf32> + %dps = tensor.empty() : tensor<1x32x128x128xf32> + %2 = "ttir.multiply"(%arg0, %1, %dps) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> + return %2 : tensor<1x32x128x128xf32> + } +} diff --git a/test/ttmlir/Dialect/TTNN/arange/arange_tests_positive.mlir b/test/ttmlir/Dialect/TTNN/arange/arange_tests_positive.mlir new file mode 100644 index 0000000000..4c04e138bb --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/arange/arange_tests_positive.mlir @@ -0,0 +1,11 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline %s | FileCheck %s +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> { + // CHECK: %[[C:.*]] = "ttnn.arange"[[C:.*]] + %1 = "ttir.arange"() <{start = 0: si64, end = 32: si64, step = 1: si64, arange_dimension = 1: i64}> : () -> tensor<1x32x128x128xf32> + %dps = tensor.empty() : tensor<1x32x128x128xf32> + %2 = "ttir.multiply"(%arg0, %1, %dps) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>, tensor<1x32x128x128xf32>) -> tensor<1x32x128x128xf32> + return %2 : tensor<1x32x128x128xf32> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim2.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim2.mlir new file mode 100644 index 0000000000..d911ec6fe2 --- /dev/null +++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim2.mlir @@ -0,0 +1,15 @@ +// REQUIRES: stablehlo +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: FileCheck --input-file=%t.mlir %s +module attributes {} { + func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> { + // CHECK: ttnn.arange + %0 = "stablehlo.iota"() {iota_dimension = 2: i64} : () -> tensor<1x1x32x128xbf16> + %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> + return %2 : tensor<1x1x32x128xbf16> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim3.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim3.mlir new file mode 100644 index 0000000000..01aa0e91b3 --- /dev/null +++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_dynamic_iota_dim3.mlir @@ -0,0 +1,16 @@ +// REQUIRES: stablehlo +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: FileCheck --input-file=%t.mlir %s +module attributes {} { + func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> { + %output_shape = stablehlo.constant dense<[1, 1, 32, 128]> : tensor<4xi64> + // CHECK: ttnn.arange + %0 = "stablehlo.dynamic_iota"(%output_shape) {iota_dimension = 3: i64} : (tensor<4xi64>) -> tensor<1x1x32x128xbf16> + %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> + return %2 : tensor<1x1x32x128xbf16> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim2.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim2.mlir new file mode 100644 index 0000000000..d911ec6fe2 --- /dev/null +++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim2.mlir @@ -0,0 +1,15 @@ +// REQUIRES: stablehlo +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: FileCheck --input-file=%t.mlir %s +module attributes {} { + func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> { + // CHECK: ttnn.arange + %0 = "stablehlo.iota"() {iota_dimension = 2: i64} : () -> tensor<1x1x32x128xbf16> + %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> + return %2 : tensor<1x1x32x128xbf16> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim3.mlir b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim3.mlir new file mode 100644 index 0000000000..a231432abc --- /dev/null +++ b/test/ttmlir/Silicon/StableHLO/Iota/simple_device_iota_dim3.mlir @@ -0,0 +1,15 @@ +// REQUIRES: stablehlo +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: FileCheck --input-file=%t.mlir %s +module attributes {} { + func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> { + // CHECK: ttnn.arange + %0 = "stablehlo.iota"() {iota_dimension = 3: i64} : () -> tensor<1x1x32x128xbf16> + %2 = "stablehlo.multiply"(%arg0, %0) : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> + return %2 : tensor<1x1x32x128xbf16> + } +} diff --git a/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim2.mlir b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim2.mlir new file mode 100644 index 0000000000..ec509a1b6f --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim2.mlir @@ -0,0 +1,13 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> { + // CHECK: %[[C:.*]] = "ttnn.arange"[[C:.*]] + %0 = "ttir.arange"() <{start = 0: si64, end = 64: si64, step = 2: si64, arange_dimension = 2: i64}> : () -> tensor<1x1x32x128xbf16> + %1 = tensor.empty() : tensor<1x1x32x128xbf16> + %2 = "ttir.multiply"(%arg0, %0, %1) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> + return %2 : tensor<1x1x32x128xbf16> + } +} diff --git a/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim3.mlir b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim3.mlir new file mode 100644 index 0000000000..196e757096 --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/arange/simple_device_arange_dim3.mlir @@ -0,0 +1,13 @@ +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +#any_device = #tt.operand_constraint +module attributes {} { + func.func @forward(%arg0: tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> { + // CHECK: %[[C:.*]] = "ttnn.arange"[[C:.*]] + %0 = "ttir.arange"() <{start = 0: si64, end = 128: si64, step = 1: si64, arange_dimension = 3: i64}> : () -> tensor<1x1x32x128xbf16> + %1 = tensor.empty() : tensor<1x1x32x128xbf16> + %2 = "ttir.multiply"(%arg0, %0, %1) <{operandSegmentSizes = array, operand_constraints = [#any_device, #any_device, #any_device]}> : (tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>, tensor<1x1x32x128xbf16>) -> tensor<1x1x32x128xbf16> + return %2 : tensor<1x1x32x128xbf16> + } +}