Skip to content

Commit

Permalink
#16539: Watcher noc sanitize virtual coord bugfix
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-dma committed Jan 24, 2025
1 parent 4a191c4 commit 8a59830
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ void RunTestOnCore(WatcherFixture* fixture, IDevice* device, CoreCoord &core, bo
uint32_t single_tile_size = 2 * 1024;
uint32_t num_tiles = 50;
uint32_t l1_buffer_size = single_tile_size * num_tiles;
uint32_t l1_buffer_addr = 400 * 1024;
uint32_t l1_buffer_addr = hal.get_dev_addr(HalProgrammableCoreType::TENSIX, HalL1MemAddrType::UNRESERVED);

// For ethernet core, need to have smaller buffer at a different address
if (is_eth_core) {
l1_buffer_size = 1024;
l1_buffer_addr = 200 * 1024;
l1_buffer_addr = hal.get_dev_addr(HalProgrammableCoreType::ACTIVE_ETH, HalL1MemAddrType::UNRESERVED);
}

tt_metal::InterleavedBufferConfig l1_config{
Expand Down Expand Up @@ -103,8 +103,8 @@ void RunTestOnCore(WatcherFixture* fixture, IDevice* device, CoreCoord &core, bo
// depending on the flags passed in.
switch(feature) {
case SanitizeAddress:
output_buf_noc_xy.x = 16;
output_buf_noc_xy.y = 16;
output_buf_noc_xy.x = 26;
output_buf_noc_xy.y = 18;
break;
case SanitizeAlignmentL1Write:
output_l1_buffer_addr++; // This is illegal because reading DRAM->L1 needs DRAM alignment
Expand Down
4 changes: 3 additions & 1 deletion tt_metal/api/tt-metalium/dev_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,9 @@ struct core_info_msg_t {
volatile uint8_t virtual_harvested_y[MAX_HARVESTED_ROWS];
volatile uint8_t noc_size_x;
volatile uint8_t noc_size_y;
volatile uint8_t pad[27];
volatile uint8_t worker_grid_size_x;
volatile uint8_t worker_grid_size_y;
volatile uint8_t pad[25];
};

constexpr uint32_t launch_msg_buffer_num_entries = 4;
Expand Down
13 changes: 10 additions & 3 deletions tt_metal/hw/inc/debug/sanitize_noc.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,18 @@ AddressableCoreType get_core_type(uint8_t noc_id, uint8_t x, uint8_t y, bool& is
}
}
if constexpr (COORDINATE_VIRTUALIZATION_ENABLED) {
// Check if NOC endpoint is valid in the Tensix Virtual Coordinate Space.
// Check if NOC endpoint is valid in the Tensix Virtual Coordinate Space. Use worker grid size instead of noc
// size because virtual coords are continuous.
if (x >= NOC_0_X(noc_id, core_info->noc_size_x, (uint32_t)VIRTUAL_TENSIX_START_X) &&
x <= NOC_0_X(noc_id, core_info->noc_size_x, (uint32_t)VIRTUAL_TENSIX_START_X + core_info->noc_size_x - 1) &&
x <= NOC_0_X(
noc_id,
core_info->noc_size_x,
(uint32_t)VIRTUAL_TENSIX_START_X + core_info->worker_grid_size_x - 1) &&
y >= NOC_0_Y(noc_id, core_info->noc_size_y, (uint32_t)VIRTUAL_TENSIX_START_Y) &&
y <= NOC_0_Y(noc_id, core_info->noc_size_y, (uint32_t)VIRTUAL_TENSIX_START_Y + core_info->noc_size_y - 1)) {
y <= NOC_0_Y(
noc_id,
core_info->noc_size_y,
(uint32_t)VIRTUAL_TENSIX_START_Y + core_info->worker_grid_size_y - 1)) {
is_virtual_coord = true;
return AddressableCoreType::TENSIX;
}
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,8 @@ void Device::initialize_and_launch_firmware() {

core_info->noc_size_x = soc_d.grid_size.x;
core_info->noc_size_y = soc_d.grid_size.y;
core_info->worker_grid_size_x = this->logical_grid_size().x; // Grid size as virtual coords see it (workers only)
core_info->worker_grid_size_y = this->logical_grid_size().y;

// Download to worker cores
log_debug("Initializing firmware");
Expand Down

0 comments on commit 8a59830

Please sign in to comment.