From 9d08234451fffd48855cd43d2088d8a820782a7b Mon Sep 17 00:00:00 2001 From: Nick Smith <127986401+nsmithtt@users.noreply.github.com> Date: Wed, 13 Nov 2024 11:11:51 -0800 Subject: [PATCH] Uplift third_party/tt-metal to 2024-11-01 (3a24131822 w/ fixes) (#1119) * Uplift third_party/tt-metal to 3a24131822 (Nov1) + fixes - Switch to tt-metal branch w/ 3 PR's cherry picked to solve refactoring issues with TensorLayout - Cherry pick conv2D fix (and revert of another change to resolve conflicts) * Runtime - Update createBufferFromTensorRef() for Buffer API changes in tt-metal - Matches changes from tt-metal PR 14394 * Build Fixes and ignore warning as error for deprecated declaration in tt-metal - Apply small_vector_shape workaround to 2 places in runtime.cpp (Also tested against tt-forge-fe, passes) --------- Co-authored-by: Kyle Mabee Co-authored-by: Bezulj Marko --- runtime/include/tt/runtime/detail/ttmetal.h | 10 ++++----- runtime/include/tt/runtime/detail/ttnn.h | 1 + runtime/lib/common/system_desc.cpp | 2 +- .../ttnn/operations/data_movement/slice.cpp | 7 ++++--- .../lib/ttnn/operations/pool/maxpool2d.cpp | 4 ++-- .../ttnn/operations/reduction/reduction.cpp | 10 ++++----- runtime/lib/ttnn/runtime.cpp | 21 +++++++++++++++---- third_party/CMakeLists.txt | 2 +- 8 files changed, 36 insertions(+), 21 deletions(-) diff --git a/runtime/include/tt/runtime/detail/ttmetal.h b/runtime/include/tt/runtime/detail/ttmetal.h index 26a007b93..4917daf35 100644 --- a/runtime/include/tt/runtime/detail/ttmetal.h +++ b/runtime/include/tt/runtime/detail/ttmetal.h @@ -160,12 +160,12 @@ createBufferFromTensorRef(::tt::tt_metal::Device *device, .page_size = pageSize, .buffer_type = bufferType, .buffer_layout = TensorMemoryLayout::BLOCK_SHARDED, - .shard_parameters = shardSpecBuffer, - .allocate = false}; - std::shared_ptr<::tt::tt_metal::Buffer> buffer = - ::tt::tt_metal::CreateBuffer(shardedBufferConfig); + .shard_parameters = shardSpecBuffer}; + assert(tensorRef->address()); - buffer->set_address(tensorRef->address()); + std::shared_ptr<::tt::tt_metal::Buffer> buffer = + ::tt::tt_metal::CreateBuffer(shardedBufferConfig, tensorRef->address()); + return buffer; } #pragma clang diagnostic pop diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h index 9654142e7..823de41a4 100644 --- a/runtime/include/tt/runtime/detail/ttnn.h +++ b/runtime/include/tt/runtime/detail/ttnn.h @@ -41,6 +41,7 @@ #pragma clang diagnostic ignored "-Wundefined-inline" #pragma clang diagnostic ignored "-Wc99-extensions" #pragma clang diagnostic ignored "-Wc++11-narrowing" +#pragma clang diagnostic ignored "-Wdeprecated-declarations" #define FMT_HEADER_ONLY #include "distributed/mesh_device.hpp" diff --git a/runtime/lib/common/system_desc.cpp b/runtime/lib/common/system_desc.cpp index afb312627..58212a4aa 100644 --- a/runtime/lib/common/system_desc.cpp +++ b/runtime/lib/common/system_desc.cpp @@ -55,7 +55,7 @@ static ::tt::target::Arch toFlatbuffer(::tt::ARCH arch) { } static std::vector<::tt::target::ChipChannel> -getAllDeviceConnections(const vector<::tt::tt_metal::Device *> &devices) { +getAllDeviceConnections(const std::vector<::tt::tt_metal::Device *> &devices) { std::set> connectionSet; diff --git a/runtime/lib/ttnn/operations/data_movement/slice.cpp b/runtime/lib/ttnn/operations/data_movement/slice.cpp index 5ed520ec6..87ba89d80 100644 --- a/runtime/lib/ttnn/operations/data_movement/slice.cpp +++ b/runtime/lib/ttnn/operations/data_movement/slice.cpp @@ -14,9 +14,10 @@ void run(const ::tt::target::ttnn::SliceOp *op, ProgramContext &context) { ProgramTensorPool &tensorPool = context.getTensorPool(); const ::ttnn::Tensor &in = tensorPool.at(op->in()->global_id()); DEBUG_ASSERT(in.is_allocated()); - std::vector begins(op->begins()->begin(), op->begins()->end()); - std::vector ends(op->ends()->begin(), op->ends()->end()); - std::vector step(op->step()->begin(), op->step()->end()); + ::ttnn::SmallVector begins(op->begins()->begin(), + op->begins()->end()); + ::ttnn::SmallVector ends(op->ends()->begin(), op->ends()->end()); + ::ttnn::SmallVector step(op->step()->begin(), op->step()->end()); ::ttnn::Tensor out = ::ttnn::slice(in, begins, ends, step); tensorPool.insert_or_assign(op->out()->global_id(), out); diff --git a/runtime/lib/ttnn/operations/pool/maxpool2d.cpp b/runtime/lib/ttnn/operations/pool/maxpool2d.cpp index a7bc95eee..dfd8b9375 100644 --- a/runtime/lib/ttnn/operations/pool/maxpool2d.cpp +++ b/runtime/lib/ttnn/operations/pool/maxpool2d.cpp @@ -34,8 +34,8 @@ preshardForMaxPool2d(const ::tt::target::ttnn::MaxPool2dOp *op, auto parallel_config = ::ttnn::operations::conv::conv2d::determine_parallel_config( ::ttnn::TensorMemoryLayout::HEIGHT_SHARDED, op->batch_size(), - op->channels(), output_height, output_width, op->channels(), &device, - ShardOrientation::ROW_MAJOR); + op->channels(), output_height, output_width, op->channels(), + device.compute_with_storage_grid_size(), ShardOrientation::ROW_MAJOR); auto sharded_memory_config = ::ttnn::operations::conv::conv2d:: create_sharded_memory_config_from_parallel_config(inputShape, parallel_config, 1); diff --git a/runtime/lib/ttnn/operations/reduction/reduction.cpp b/runtime/lib/ttnn/operations/reduction/reduction.cpp index b9f494dc8..2330a5287 100644 --- a/runtime/lib/ttnn/operations/reduction/reduction.cpp +++ b/runtime/lib/ttnn/operations/reduction/reduction.cpp @@ -12,8 +12,8 @@ static void runReductionOp( ::tt::target::ttnn::ReductionOp const *op, ProgramTensorPool &tensorPool, std::function<::ttnn::Tensor( const ::ttnn::Tensor &, - const std::optional>> &, const bool, - const std::optional<::tt::tt_metal::MemoryConfig> &, + const std::optional>> &, + const bool, const std::optional<::tt::tt_metal::MemoryConfig> &, const std::optional<::ttnn::DeviceComputeKernelConfig> &, float)> ttnnOp) { ::tt::tt_metal::MemoryConfig outputMemoryConfig = @@ -22,9 +22,9 @@ static void runReductionOp( DEBUG_ASSERT(in.is_allocated()); const auto *fbDimArg = op->dim_arg(); - std::optional> dimArg = - fbDimArg ? std::make_optional( - std::vector(fbDimArg->begin(), fbDimArg->end())) + std::optional<::ttnn::SmallVector> dimArg = + fbDimArg ? std::make_optional(::ttnn::SmallVector(fbDimArg->begin(), + fbDimArg->end())) : std::nullopt; ::ttnn::Tensor out = ttnnOp( diff --git a/runtime/lib/ttnn/runtime.cpp b/runtime/lib/ttnn/runtime.cpp index b06ae893a..9eba9986e 100644 --- a/runtime/lib/ttnn/runtime.cpp +++ b/runtime/lib/ttnn/runtime.cpp @@ -9,6 +9,9 @@ #include "tt/runtime/utils.h" #include "ttmlir/Target/TTNN/Target.h" #include "ttmlir/Version.h" +#include "ttnn/tensor/shape/small_vector.hpp" +#include "ttnn/tensor/types.hpp" + namespace tt::runtime::ttnn { using ::tt::runtime::DeviceRuntime; @@ -61,9 +64,14 @@ createOwnedTensor(std::shared_ptr data, std::vector const &stride, std::uint32_t itemsize, ::tt::target::DataType dataType) { std::uint32_t numElements = shape[0] * stride[0]; + + ::tt::tt_metal::SmallVector small_vector_shape(shape.begin(), + shape.end()); + return ::ttnn::Tensor( - createStorage(data.get(), numElements, dataType), shape, - utils::toTTNNDataType(dataType), ::ttnn::Layout::ROW_MAJOR); + createStorage(data.get(), numElements, dataType), + ::ttnn::Shape(small_vector_shape), utils::toTTNNDataType(dataType), + ::ttnn::Layout::ROW_MAJOR); } Tensor createTensor(std::shared_ptr data, @@ -71,9 +79,14 @@ Tensor createTensor(std::shared_ptr data, std::vector const &stride, std::uint32_t itemsize, ::tt::target::DataType dataType) { std::uint32_t numElements = shape[0] * stride[0]; + + ::tt::tt_metal::SmallVector small_vector_shape(shape.begin(), + shape.end()); + auto tensor = std::make_shared<::ttnn::Tensor>( - createStorage(data.get(), numElements, dataType), shape, - utils::toTTNNDataType(dataType), ::ttnn::Layout::ROW_MAJOR); + createStorage(data.get(), numElements, dataType), + ::ttnn::Shape(small_vector_shape), utils::toTTNNDataType(dataType), + ::ttnn::Layout::ROW_MAJOR); return Tensor(std::static_pointer_cast(tensor), data, DeviceRuntime::TTNN); } diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index 888548aea..e2cbbe8db 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -1,6 +1,6 @@ include(ExternalProject) -set(TT_METAL_VERSION "c59c50cc915d4273ecfd1da15916cb4f6b138bfc") +set(TT_METAL_VERSION "d30c5e874d05f43c9e075f7062017df200fae44c") if ("$ENV{ARCH_NAME}" STREQUAL "grayskull") set(ARCH_NAME "grayskull")