Uplift third_party/tt-metal to 2024-11-01 (3a24131822 w/ fixes) (#1119)

* Uplift third_party/tt-metal to 3a24131822 (Nov1) + fixes - Switch to tt-metal branch w/ 3 PR's cherry picked to solve refactoring issues with TensorLayout - Cherry pick conv2D fix (and revert of another change to resolve conflicts) * Runtime - Update createBufferFromTensorRef() for Buffer API changes in tt-metal - Matches changes from tt-metal PR 14394 * Build Fixes and ignore warning as error for deprecated declaration in tt-metal - Apply small_vector_shape workaround to 2 places in runtime.cpp (Also tested against tt-forge-fe, passes) --------- Co-authored-by: Kyle Mabee <[email protected]> Co-authored-by: Bezulj Marko <[email protected]>
tenstorrent · Nov 13, 2024 · 9d08234 · 9d08234
1 parent 0ccb05c
commit 9d08234
Show file tree

Hide file tree

Showing 8 changed files with 36 additions and 21 deletions.
diff --git a/runtime/include/tt/runtime/detail/ttmetal.h b/runtime/include/tt/runtime/detail/ttmetal.h
@@ -160,12 +160,12 @@ createBufferFromTensorRef(::tt::tt_metal::Device *device,
                           .page_size = pageSize,
                           .buffer_type = bufferType,
                           .buffer_layout = TensorMemoryLayout::BLOCK_SHARDED,
-                          .shard_parameters = shardSpecBuffer,
-                          .allocate = false};
-  std::shared_ptr<::tt::tt_metal::Buffer> buffer =
-      ::tt::tt_metal::CreateBuffer(shardedBufferConfig);
+                          .shard_parameters = shardSpecBuffer};
+
   assert(tensorRef->address());
-  buffer->set_address(tensorRef->address());
+  std::shared_ptr<::tt::tt_metal::Buffer> buffer =
+      ::tt::tt_metal::CreateBuffer(shardedBufferConfig, tensorRef->address());
+
   return buffer;
 }
 #pragma clang diagnostic pop

diff --git a/runtime/include/tt/runtime/detail/ttnn.h b/runtime/include/tt/runtime/detail/ttnn.h
@@ -41,6 +41,7 @@
 #pragma clang diagnostic ignored "-Wundefined-inline"
 #pragma clang diagnostic ignored "-Wc99-extensions"
 #pragma clang diagnostic ignored "-Wc++11-narrowing"
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
 
 #define FMT_HEADER_ONLY
 #include "distributed/mesh_device.hpp"

diff --git a/runtime/lib/common/system_desc.cpp b/runtime/lib/common/system_desc.cpp
@@ -55,7 +55,7 @@ static ::tt::target::Arch toFlatbuffer(::tt::ARCH arch) {
 }
 
 static std::vector<::tt::target::ChipChannel>
-getAllDeviceConnections(const vector<::tt::tt_metal::Device *> &devices) {
+getAllDeviceConnections(const std::vector<::tt::tt_metal::Device *> &devices) {
   std::set<std::tuple<chip_id_t, CoreCoord, chip_id_t, CoreCoord>>
       connectionSet;
 

diff --git a/runtime/lib/ttnn/operations/data_movement/slice.cpp b/runtime/lib/ttnn/operations/data_movement/slice.cpp
@@ -14,9 +14,10 @@ void run(const ::tt::target::ttnn::SliceOp *op, ProgramContext &context) {
   ProgramTensorPool &tensorPool = context.getTensorPool();
   const ::ttnn::Tensor &in = tensorPool.at(op->in()->global_id());
   DEBUG_ASSERT(in.is_allocated());
-  std::vector<int32_t> begins(op->begins()->begin(), op->begins()->end());
-  std::vector<int32_t> ends(op->ends()->begin(), op->ends()->end());
-  std::vector<int32_t> step(op->step()->begin(), op->step()->end());
+  ::ttnn::SmallVector<int32_t> begins(op->begins()->begin(),
+                                      op->begins()->end());
+  ::ttnn::SmallVector<int32_t> ends(op->ends()->begin(), op->ends()->end());
+  ::ttnn::SmallVector<int32_t> step(op->step()->begin(), op->step()->end());
 
   ::ttnn::Tensor out = ::ttnn::slice(in, begins, ends, step);
   tensorPool.insert_or_assign(op->out()->global_id(), out);

diff --git a/runtime/lib/ttnn/operations/pool/maxpool2d.cpp b/runtime/lib/ttnn/operations/pool/maxpool2d.cpp
@@ -34,8 +34,8 @@ preshardForMaxPool2d(const ::tt::target::ttnn::MaxPool2dOp *op,
   auto parallel_config =
       ::ttnn::operations::conv::conv2d::determine_parallel_config(
           ::ttnn::TensorMemoryLayout::HEIGHT_SHARDED, op->batch_size(),
-          op->channels(), output_height, output_width, op->channels(), &device,
-          ShardOrientation::ROW_MAJOR);
+          op->channels(), output_height, output_width, op->channels(),
+          device.compute_with_storage_grid_size(), ShardOrientation::ROW_MAJOR);
   auto sharded_memory_config = ::ttnn::operations::conv::conv2d::
       create_sharded_memory_config_from_parallel_config(inputShape,
                                                         parallel_config, 1);

diff --git a/runtime/lib/ttnn/operations/reduction/reduction.cpp b/runtime/lib/ttnn/operations/reduction/reduction.cpp
@@ -12,8 +12,8 @@ static void runReductionOp(
     ::tt::target::ttnn::ReductionOp const *op, ProgramTensorPool &tensorPool,
     std::function<::ttnn::Tensor(
         const ::ttnn::Tensor &,
-        const std::optional<std::variant<int, std::vector<int>>> &, const bool,
-        const std::optional<::tt::tt_metal::MemoryConfig> &,
+        const std::optional<std::variant<int, ::ttnn::SmallVector<int>>> &,
+        const bool, const std::optional<::tt::tt_metal::MemoryConfig> &,
         const std::optional<::ttnn::DeviceComputeKernelConfig> &, float)>
         ttnnOp) {
   ::tt::tt_metal::MemoryConfig outputMemoryConfig =
@@ -22,9 +22,9 @@ static void runReductionOp(
   DEBUG_ASSERT(in.is_allocated());
 
   const auto *fbDimArg = op->dim_arg();
-  std::optional<vector<int>> dimArg =
-      fbDimArg ? std::make_optional(
-                     std::vector<int>(fbDimArg->begin(), fbDimArg->end()))
+  std::optional<::ttnn::SmallVector<int>> dimArg =
+      fbDimArg ? std::make_optional(::ttnn::SmallVector<int>(fbDimArg->begin(),
+                                                             fbDimArg->end()))
                : std::nullopt;
 
   ::ttnn::Tensor out = ttnnOp(

diff --git a/runtime/lib/ttnn/runtime.cpp b/runtime/lib/ttnn/runtime.cpp
@@ -9,6 +9,9 @@
 #include "tt/runtime/utils.h"
 #include "ttmlir/Target/TTNN/Target.h"
 #include "ttmlir/Version.h"
+#include "ttnn/tensor/shape/small_vector.hpp"
+#include "ttnn/tensor/types.hpp"
+
 namespace tt::runtime::ttnn {
 
 using ::tt::runtime::DeviceRuntime;
@@ -61,19 +64,29 @@ createOwnedTensor(std::shared_ptr<void> data,
                   std::vector<std::uint32_t> const &stride,
                   std::uint32_t itemsize, ::tt::target::DataType dataType) {
   std::uint32_t numElements = shape[0] * stride[0];
+
+  ::tt::tt_metal::SmallVector<uint32_t> small_vector_shape(shape.begin(),
+                                                           shape.end());
+
   return ::ttnn::Tensor(
-      createStorage<OwnedStorage>(data.get(), numElements, dataType), shape,
-      utils::toTTNNDataType(dataType), ::ttnn::Layout::ROW_MAJOR);
+      createStorage<OwnedStorage>(data.get(), numElements, dataType),
+      ::ttnn::Shape(small_vector_shape), utils::toTTNNDataType(dataType),
+      ::ttnn::Layout::ROW_MAJOR);
 }
 
 Tensor createTensor(std::shared_ptr<void> data,
                     std::vector<std::uint32_t> const &shape,
                     std::vector<std::uint32_t> const &stride,
                     std::uint32_t itemsize, ::tt::target::DataType dataType) {
   std::uint32_t numElements = shape[0] * stride[0];
+
+  ::tt::tt_metal::SmallVector<uint32_t> small_vector_shape(shape.begin(),
+                                                           shape.end());
+
   auto tensor = std::make_shared<::ttnn::Tensor>(
-      createStorage<BorrowedStorage>(data.get(), numElements, dataType), shape,
-      utils::toTTNNDataType(dataType), ::ttnn::Layout::ROW_MAJOR);
+      createStorage<BorrowedStorage>(data.get(), numElements, dataType),
+      ::ttnn::Shape(small_vector_shape), utils::toTTNNDataType(dataType),
+      ::ttnn::Layout::ROW_MAJOR);
   return Tensor(std::static_pointer_cast<void>(tensor), data,
                 DeviceRuntime::TTNN);
 }

diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt
@@ -1,6 +1,6 @@
 include(ExternalProject)
 
-set(TT_METAL_VERSION "c59c50cc915d4273ecfd1da15916cb4f6b138bfc")
+set(TT_METAL_VERSION "d30c5e874d05f43c9e075f7062017df200fae44c")
 
 if ("$ENV{ARCH_NAME}" STREQUAL "grayskull")
   set(ARCH_NAME "grayskull")