Skip to content

Commit

Permalink
Remove some usages of ttnn::Shape from the codebase (#17062)
Browse files Browse the repository at this point in the history
### Ticket

### Problem description
Remove some usages of ttnn::Shape from the codebase

### What's changed
Removed some usages of ttnn::Shape from the codebase

### Checklist
- [x] [Post commit CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12960342700)
- [x] [Model regression CI testing
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12960346094)
- [x] [Device performance regression CI testing
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12960344180)
- [x] [T3K frequent CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12958453052)
- [x] [T3K unit tests CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12958455364)
- [x] [Nightly model and ttnn CI
passes](https://github.com/tenstorrent/tt-metal/actions/runs/12960348968)
- [x] New/Existing tests provide coverage for changes
  • Loading branch information
sminakov-tt authored Jan 25, 2025
1 parent ad0b806 commit f952174
Show file tree
Hide file tree
Showing 26 changed files with 185 additions and 204 deletions.
8 changes: 4 additions & 4 deletions tests/tt_eager/ops/test_sliding_window_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ int main() {
{config.batch_size,
config.input_hw.first + 2 * config.pad_hw.first,
config.input_hw.second + 2 * config.pad_hw.second});
auto output_tensor_shape = config.get_output_shape().value;
auto output_tensor_shape = config.get_output_shape();
ttnn::SimpleShape filter_tensor_shape({config.window_hw.first, config.window_hw.second});

Tensor input_padded_tensor =
Expand All @@ -390,12 +390,12 @@ int main() {
vector<float> filter_vector = create_filter_vec(filter_tensor_buf, tc.filter_h, tc.filter_w);
owned_buffer::Buffer<bfloat16> out_golden_tensor_buf = ref_conv_op(
input_padded_tensor,
ttnn::Shape(input_tensor_shape),
input_tensor_shape,
tc.stride_h,
tc.stride_w,
filter_vector,
ttnn::Shape(filter_tensor_shape),
ttnn::Shape(output_tensor_shape));
filter_tensor_shape,
output_tensor_shape);

auto failed_tests = validate_generate_functions(
device,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1646,8 +1646,8 @@ TEST(WorkerFabricEdmDatapath, LineFabricMcast_ManyMessages_SingleSource_Persiste
////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////

ttnn::ccl::Shape4D<uint32_t> shape_to_shape_in_tiles(ttnn::Shape const& shape) {
auto logical_shape = shape.logical_shape();
ttnn::ccl::Shape4D<uint32_t> shape_to_shape_in_tiles(const ttnn::SimpleShape& shape) {
auto logical_shape = shape;
logical_shape[-2] /= tt::constants::TILE_HEIGHT;
logical_shape[-1] /= tt::constants::TILE_WIDTH;
EXPECT_TRUE(logical_shape.size() == 4);
Expand Down Expand Up @@ -2727,20 +2727,19 @@ TEST(CclAsyncOp, ReduceScatterSmall_PersistentFabric) {
"Expected {} devices but got {}",
test_expected_num_devices,
num_devices);
const ttnn::Shape input_shape = ttnn::Shape{1, 1, 32, 32 * num_devices};
const ttnn::SimpleShape input_shape({1, 1, 32, 32 * num_devices});
const MemoryConfig in_memory_config = MemoryConfig(TensorMemoryLayout::INTERLEAVED, BufferType::DRAM);
auto const logical_shape = input_shape.logical_shape();
const auto num_elems = logical_shape.volume();
const auto num_elems = input_shape.volume();

// INPUT TENSOR setup
size_t page_size = tile_size(DataFormat::Float16);
std::vector<Tensor> device_input_tensors;
for (size_t i = 0; i < num_devices; i++) {
// host_input_tensors.push_back(ttnn::numpy::random::uniform(bfloat16(-1.0f), bfloat16(1.0f) ,
// {logical_shape[0],logical_shape[1],logical_shape[2],logical_shape[3]}, layout).to(devices[i]));
// {input_shape[0],input_shape[1],input_shape[2],input_shape[3]}, layout).to(devices[i]));
auto t = ttnn::experimental::view(ttnn::arange(0, num_elems, 1, DataType::BFLOAT16), input_shape).to(layout);
t.set_tensor_spec(TensorSpec(
logical_shape, TensorLayout(DataType::BFLOAT16, PageConfig(layout, tt_metal::Tile()), in_memory_config)));
input_shape, TensorLayout(DataType::BFLOAT16, PageConfig(layout, tt_metal::Tile()), in_memory_config)));

device_input_tensors.push_back(t.to(devices[i]));
}
Expand Down Expand Up @@ -2814,7 +2813,8 @@ TEST(CclAsyncOp, ReduceScatterSmall_PersistentFabric) {
}

#include "ttnn/cpp/ttnn/operations/experimental/ccl/all_gather_async/device/all_gather_async_op.hpp"
void run_all_gather_with_persistent_fabric(const size_t dim, const size_t num_links, ttnn::Shape const& input_shape) {
void run_all_gather_with_persistent_fabric(
const size_t dim, const size_t num_links, ttnn::SimpleShape const& input_shape) {
log_info(tt::LogTest, "entering test");
constexpr auto layout = Layout::TILE;
// DEVICES setuip
Expand All @@ -2841,8 +2841,7 @@ void run_all_gather_with_persistent_fabric(const size_t dim, const size_t num_li
test_expected_num_devices,
num_devices);
const MemoryConfig in_memory_config = MemoryConfig(TensorMemoryLayout::INTERLEAVED, BufferType::DRAM);
auto const logical_shape = input_shape.logical_shape();
const auto num_elems = logical_shape.volume();
const auto num_elems = input_shape.volume();

// INPUT TENSOR setup
log_info(tt::LogTest, "setting up input tensors");
Expand All @@ -2851,7 +2850,7 @@ void run_all_gather_with_persistent_fabric(const size_t dim, const size_t num_li
for (size_t i = 0; i < num_devices; i++) {
auto t = ttnn::experimental::view(ttnn::arange(0, num_elems, 1), input_shape).to(layout);
t.set_tensor_spec(TensorSpec(
logical_shape, TensorLayout(DataType::BFLOAT16, PageConfig(layout, tt_metal::Tile()), in_memory_config)));
input_shape, TensorLayout(DataType::BFLOAT16, PageConfig(layout, tt_metal::Tile()), in_memory_config)));

device_input_tensors.push_back(t.to(devices[i]));
}
Expand Down Expand Up @@ -2915,16 +2914,16 @@ void run_all_gather_with_persistent_fabric(const size_t dim, const size_t num_li
}

TEST(CclAsyncOp, AllGather_PersistentFabric_Dim3_Links1_Shape1_1_32_128) {
run_all_gather_with_persistent_fabric(3, 1, ttnn::Shape{1, 1, 32, 128});
run_all_gather_with_persistent_fabric(3, 1, ttnn::SimpleShape({1, 1, 32, 128}));
}
TEST(CclAsyncOp, AllGather_PersistentFabric_Dim3_Links1_Shape1_1_32_8192) {
run_all_gather_with_persistent_fabric(3, 1, ttnn::Shape{1, 1, 32, 8192});
run_all_gather_with_persistent_fabric(3, 1, ttnn::SimpleShape({1, 1, 32, 8192}));
}
// Mesh device setup seems to not provide the correct configuration for multi-link? To be investigated
TEST(CclAsyncOp, DISABLED_AllGather_PersistentFabric_Dim3_Links2_Shape1_1_32_128) {
run_all_gather_with_persistent_fabric(3, 2, ttnn::Shape{1, 1, 32, 128});
run_all_gather_with_persistent_fabric(3, 2, ttnn::SimpleShape({1, 1, 32, 128}));
}
// Mesh device setup seems to not provide the correct configuration for multi-link? To be investigated
TEST(CclAsyncOp, DISABLED_AllGather_PersistentFabric_Dim3_Links2_Shape1_1_32_8192) {
run_all_gather_with_persistent_fabric(3, 2, ttnn::Shape{1, 1, 32, 8192});
run_all_gather_with_persistent_fabric(3, 2, ttnn::SimpleShape({1, 1, 32, 8192}));
}
36 changes: 18 additions & 18 deletions tests/ttnn/unit_tests/gtests/tensor/test_create_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ std::ostream& operator<<(std::ostream& os, const tt::tt_metal::DataType& value)
}

using CombinationInputParams =
std::tuple<ttnn::Shape, tt::tt_metal::DataType, tt::tt_metal::Layout, tt::tt_metal::MemoryConfig>;
std::tuple<ttnn::SimpleShape, tt::tt_metal::DataType, tt::tt_metal::Layout, tt::tt_metal::MemoryConfig>;
class EmptyTensorTest : public ttnn::TTNNFixtureWithDevice,
public ::testing::WithParamInterface<CombinationInputParams> {};

Expand All @@ -108,36 +108,36 @@ TEST_P(EmptyTensorTest, Combinations) {

// Ignoring too large single bank allocations
if (memory_config.memory_layout == TensorMemoryLayout::SINGLE_BANK) {
if (tensor_layout.compute_page_size_bytes(shape.logical_shape()) >= 500 * 1024) {
if (tensor_layout.compute_page_size_bytes(shape) >= 500 * 1024) {
GTEST_SKIP() << "Skipping test with page size exceeding single bank size of 500 kB!";
}
}

auto tensor = tt::tt_metal::create_device_tensor(shape, dtype, layout, device_, memory_config);
EXPECT_EQ(tensor.get_logical_shape(), shape.logical_shape());
EXPECT_EQ(tensor.get_logical_shape(), shape);

test_utils::test_tensor_on_device(shape.logical_shape(), tensor_layout, device_);
test_utils::test_tensor_on_device(shape, tensor_layout, device_);
}

INSTANTIATE_TEST_SUITE_P(
EmptyTensorTestWithShape,
EmptyTensorTest,
::testing::Combine(
::testing::Values(
ttnn::Shape({}),
ttnn::Shape({0}),
ttnn::Shape({1}),
ttnn::Shape({1, 2}),
ttnn::Shape({1, 2, 3}),
ttnn::Shape({1, 2, 3, 4}),
// ttnn::Shape({0, 0, 0, 0}), fails with width sharded case
ttnn::Shape({1, 1, 1, 1}),
// ttnn::Shape({0, 1, 32, 32}), fails with width sharded case
ttnn::Shape({1, 1, 32, 32}),
ttnn::Shape({2, 1, 32, 32}),
ttnn::Shape({64, 1, 256, 1}),
ttnn::Shape({1, 1, 21120, 16}),
ttnn::Shape({1, 2, 3, 4, 5})),
ttnn::SimpleShape({}),
ttnn::SimpleShape({0}),
ttnn::SimpleShape({1}),
ttnn::SimpleShape({1, 2}),
ttnn::SimpleShape({1, 2, 3}),
ttnn::SimpleShape({1, 2, 3, 4}),
// ttnn::SimpleShape({0, 0, 0, 0}), fails with width sharded case
ttnn::SimpleShape({1, 1, 1, 1}),
// ttnn::SimpleShape({0, 1, 32, 32}), fails with width sharded case
ttnn::SimpleShape({1, 1, 32, 32}),
ttnn::SimpleShape({2, 1, 32, 32}),
ttnn::SimpleShape({64, 1, 256, 1}),
ttnn::SimpleShape({1, 1, 21120, 16}),
ttnn::SimpleShape({1, 2, 3, 4, 5})),

::testing::Values(
tt::tt_metal::DataType::BFLOAT16,
Expand Down
6 changes: 3 additions & 3 deletions tests/ttnn/unit_tests/gtests/test_graph_add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ INSTANTIATE_TEST_SUITE_P(
.expected_l1_output_per_core = 2048,
.expected_l1_peak_per_core = 2048,
.expected_output_info = {graph::TensorInfo{
.shape = ttnn::Shape(tt::tt_metal::Array4D{1, 3, 32, 32}),
.shape = ttnn::SimpleShape(tt::tt_metal::Array4D{1, 3, 32, 32}),
.size = 6144,
.type = tt::tt_metal::BufferType::L1}}},
AddOpGraphTestParam{
Expand All @@ -167,7 +167,7 @@ INSTANTIATE_TEST_SUITE_P(
.expected_l1_output_per_core = 2048,
.expected_l1_peak_per_core = 2 * 2048,
.expected_output_info = {graph::TensorInfo{
.shape = ttnn::Shape(tt::tt_metal::Array4D{4, 3, 32, 32}),
.shape = ttnn::SimpleShape(tt::tt_metal::Array4D{4, 3, 32, 32}),
.size = 24576,
.type = tt::tt_metal::BufferType::L1}},
},
Expand All @@ -190,7 +190,7 @@ INSTANTIATE_TEST_SUITE_P(
.expected_l1_output_per_core = 2 * (3 * 32 * 32 * 32 * 32) / 16,
.expected_l1_peak_per_core = 2 * (3 * 32 * 32 * 32 * 32) / 16,
.expected_output_info = {graph::TensorInfo{
.shape = ttnn::Shape(tt::tt_metal::Array4D{3, 1, 32 * 32, 32 * 32}),
.shape = ttnn::SimpleShape(tt::tt_metal::Array4D{3, 1, 32 * 32, 32 * 32}),
.size = 2 * (3 * 32 * 32 * 32 * 32),
.type = tt::tt_metal::BufferType::L1}}}),
::testing::Values(
Expand Down
4 changes: 2 additions & 2 deletions ttnn/cpp/ttnn/graph/graph_trace_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
namespace ttnn::graph {

namespace {
ttnn::Shape parse_shape(std::string_view shape_string) {
ttnn::SimpleShape parse_shape(std::string_view shape_string) {
// Extract shape values from string like "ttnn.Shape([1, 3, 32, 32])"
auto start = shape_string.find('[') + 1;
auto end = shape_string.find(']');
Expand All @@ -42,7 +42,7 @@ ttnn::Shape parse_shape(std::string_view shape_string) {
}
}

return ttnn::Shape(shape);
return ttnn::SimpleShape(std::move(shape));
}
} // namespace

Expand Down
2 changes: 1 addition & 1 deletion ttnn/cpp/ttnn/graph/graph_trace_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ std::vector<std::string> extract_calltrace(const nlohmann::json& trace);
std::unordered_set<uint32_t> extract_output_tensors(const nlohmann::json& trace);

struct TensorInfo {
ttnn::Shape shape;
ttnn::SimpleShape shape;
uint32_t size = 0;
tt::tt_metal::BufferType type = tt::tt_metal::BufferType::DRAM;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,23 +88,6 @@ std::vector<std::vector<ttnn::ccl::v2::TensorSlice>> split_tensor_slices_across_
return worker_slices_streams;
};

Shape4D<uint32_t> from_tensor_shape(ttnn::Shape const& shape) {
constexpr size_t max_rank = 4;
TT_FATAL(
shape.size() <= max_rank,
"Reduce scatter device code only supports tensors up to rank 4. Current tensor rank is {}. The host code "
"calling the program factory must reduce the dimensionality",
shape.size());

Shape4D<uint32_t> shape4d = {1, 1, 1, 1};
size_t output_index = max_rank - 1;
for (int i = shape.size() - 1; i >= 0; --i) {
shape4d[output_index] = shape[i];
output_index--;
}
return shape4d;
}

static ttnn::ccl::Shape4D<uint32_t> shape_to_shape_in_tiles(const SimpleShape& shape) {
TT_FATAL(shape.rank() == 4, "Expected 4D shape but got {}", shape.rank());
ttnn::ccl::Shape4D<uint32_t> shape_in_tiles = {
Expand Down
40 changes: 22 additions & 18 deletions ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ uint32_t get_num_cores_channels_from_parallel_config(const ParallelConfig& pconf
}

MemoryConfig create_sharded_memory_config_from_parallel_config(
const ttnn::Shape& tensor_shape, const ParallelConfig& parallel_config, uint32_t tile_size) {
const ttnn::SimpleShape& tensor_shape, const ParallelConfig& parallel_config, uint32_t tile_size) {
log_debug(
tt::LogOp,
"create_sharded_memory_config_from_parallel_config: tensor_shape: {}, parallel_config: {}, tile_size: {}",
Expand All @@ -252,7 +252,7 @@ MemoryConfig create_sharded_memory_config_from_parallel_config(
// tensor_shape is [N, H, W, C]
TT_ASSERT(tensor_shape[0] == 1 && tensor_shape[1] == 1); // todo: add support for generic non-2d shapes
// uint32_t channels = tensor_shape[3];
uint32_t channels = tensor_shape.with_tile_padding()[3];
uint32_t channels = tensor_shape[3];
uint32_t num_cores_nhw = get_num_cores_nhw_from_parallel_config(parallel_config);
uint32_t num_cores_channels = get_num_cores_channels_from_parallel_config(parallel_config);
auto shard_scheme = parallel_config.shard_scheme;
Expand Down Expand Up @@ -480,7 +480,7 @@ static TensorMemoryLayout select_shard_spec(
}

template <typename T>
static std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded_input_shape_and_mem_config(
static std::tuple<ttnn::SimpleShape, ttnn::MemoryConfig, bool, bool> get_conv_padded_input_shape_and_mem_config(
T* device,
const ttnn::Tensor& input_tensor_,
const Conv2dConfig& conv_config,
Expand Down Expand Up @@ -616,22 +616,26 @@ static std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded_i
tt::round_up(input_shape[3], conv_config.input_channels_alignment);
}

auto input_padded_shape = ttnn::Shape(std::array<uint32_t, 4>{
1,
1,
input_tensor_height_snapped_to_tile,
input_tensor_width_snapped_to_channels_alignment}); // TODO: resolve ttnn::types::Shape and
// tt::tt_metal::LegacyShape issue to clean up next
// line
auto input_padded_shape = ttnn::SimpleShape(
{1,
1,
input_tensor_height_snapped_to_tile,
input_tensor_width_snapped_to_channels_alignment}); // TODO: resolve ttnn::types::Shape and
// tt::tt_metal::LegacyShape issue to clean up next
// line
MemoryConfig input_tensor_sharded_memory_config = create_sharded_memory_config_from_parallel_config(
ttnn::Shape(std::array<uint32_t, 4>{
input_padded_shape[0], input_padded_shape[1], input_padded_shape[2], input_padded_shape[3]}),
ttnn::SimpleShape(
{input_padded_shape[0], input_padded_shape[1], input_padded_shape[2], input_padded_shape[3]}),
parallel_config,
round_up_size);

return {input_padded_shape, input_tensor_sharded_memory_config, needs_shard_or_reshard, use_non_tile_height};
} else {
return {input_tensor.shape(), input_tensor.memory_config(), needs_shard_or_reshard, use_non_tile_height};
return {
input_tensor.get_logical_shape(),
input_tensor.memory_config(),
needs_shard_or_reshard,
use_non_tile_height};
}
}

Expand Down Expand Up @@ -888,9 +892,7 @@ std::tuple<OptimizedConvParallelizationConfig, OptimizedConvBlockConfig, MemoryC
out_channels_padded = tt::round_up(out_channels, 32);
}
MemoryConfig conv_out_memory_config = create_sharded_memory_config_from_parallel_config(
ttnn::Shape(std::array<uint32_t, 4>{1, 1, nhw_out, out_channels_padded}),
output_parallel_config,
round_up_size);
ttnn::SimpleShape({1, 1, nhw_out, out_channels_padded}), output_parallel_config, round_up_size);
ParallelConfig largest_parallel_config =
output_parallel_config.grid.num_cores() > input_parallel_config.grid.num_cores() ? output_parallel_config
: input_parallel_config;
Expand Down Expand Up @@ -1256,7 +1258,8 @@ template bool check_non_tile_mul_width<IDevice>(
template bool check_non_tile_mul_width<MeshDevice>(
MeshDevice* device, const Conv2dConfig& conv_config, const uint32_t in_channels);

template std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded_input_shape_and_mem_config<IDevice>(
template std::tuple<ttnn::SimpleShape, ttnn::MemoryConfig, bool, bool>
get_conv_padded_input_shape_and_mem_config<IDevice>(
IDevice* device,
const ttnn::Tensor& input_tensor_,
const Conv2dConfig& conv_config,
Expand All @@ -1268,7 +1271,8 @@ template std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded
bool is_mm_conv,
bool is_non_tile_mul_width);

template std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded_input_shape_and_mem_config<MeshDevice>(
template std::tuple<ttnn::SimpleShape, ttnn::MemoryConfig, bool, bool>
get_conv_padded_input_shape_and_mem_config<MeshDevice>(
MeshDevice* device,
const ttnn::Tensor& input_tensor_,
const Conv2dConfig& conv_config,
Expand Down
4 changes: 2 additions & 2 deletions ttnn/cpp/ttnn/operations/conv/conv2d/conv2d_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ uint32_t get_num_cores_nhw_from_parallel_config(const sliding_window::ParallelCo
uint32_t get_num_cores_channels_from_parallel_config(const sliding_window::ParallelConfig& pconfig);

MemoryConfig create_sharded_memory_config_from_parallel_config(
const ttnn::Shape& tensor_shape, const sliding_window::ParallelConfig& parallel_config, uint32_t tile_size);
const ttnn::SimpleShape& tensor_shape, const sliding_window::ParallelConfig& parallel_config, uint32_t tile_size);

OptimizedConvParallelizationConfig determine_conv_op_parallel_config_from_conv_output_mem_config(
const MemoryConfig& conv_output_mem_config, uint32_t num_cores_nhw, uint32_t num_cores_c);
Expand Down Expand Up @@ -104,7 +104,7 @@ std::tuple<OptimizedConvParallelizationConfig, OptimizedConvBlockConfig, MemoryC
T* device);

template <typename T>
static std::tuple<ttnn::Shape, ttnn::MemoryConfig, bool, bool> get_conv_padded_input_shape_and_mem_config(
static std::tuple<ttnn::SimpleShape, ttnn::MemoryConfig, bool, bool> get_conv_padded_input_shape_and_mem_config(
T* device,
const ttnn::Tensor& input_tensor_,
const Conv2dConfig& conv_config,
Expand Down
Loading

0 comments on commit f952174

Please sign in to comment.