Skip to content

Commit

Permalink
#0: Remove checks for if the coords passed in to NOC_XY_PCIE_ENCODING…
Browse files Browse the repository at this point in the history
… are an actual pcie coord

NOC_XY_PCIE_ENCODING is specifically for getting pcie coordinate. User Error to do otherwise
  • Loading branch information
tt-aho committed Dec 7, 2024
1 parent 10eeea8 commit 19b59ea
Show file tree
Hide file tree
Showing 9 changed files with 10 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ void kernel_main() {

volatile tt_l1_ptr uint32_t* done_address_ptr = reinterpret_cast<volatile tt_l1_ptr uint32_t*>(done_address);

uint64_t pcie_noc_xy_encoding = (uint64_t)NOC_XY_PCIE_ENCODING(PCIE_NOC_X, PCIE_NOC_Y, NOC_INDEX);
uint64_t pcie_noc_xy_encoding = (uint64_t)NOC_XY_PCIE_ENCODING(PCIE_NOC_X, PCIE_NOC_Y);
while (done_address_ptr[0] == 0) {
uint64_t host_src_addr = pcie_noc_xy_encoding | pcie_read_ptr;
noc_async_read(host_src_addr, done_address, read_sizeB);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ void kernel_main() {
constexpr uint32_t base_pcie_dst_address = get_compile_time_arg_val(1);
constexpr uint32_t num_16b_writes = get_compile_time_arg_val(2);

uint64_t pcie_core_noc_encoding = uint64_t(NOC_XY_PCIE_ENCODING(PCIE_NOC_X, PCIE_NOC_Y, NOC_INDEX));
uint64_t pcie_core_noc_encoding = uint64_t(NOC_XY_PCIE_ENCODING(PCIE_NOC_X, PCIE_NOC_Y));

uint32_t l1_src_address = base_l1_src_address;
uint32_t pcie_dst_address = base_pcie_dst_address;
Expand Down
6 changes: 2 additions & 4 deletions tt_metal/hw/inc/blackhole/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,10 +355,8 @@
#define NOC_XY_ENCODING(x, y) ((((uint32_t)(y)) << (NOC_ADDR_NODE_ID_BITS)) | (((uint32_t)(x))))

// Base address pulled from tt::umd::Cluster::get_pcie_base_addr_from_device
#define NOC_XY_PCIE_ENCODING(x, y, noc_index) \
((uint64_t(NOC_XY_ENCODING(x, y)) << (NOC_ADDR_LOCAL_BITS - NOC_COORD_REG_OFFSET))) | \
((noc_index ? (x == PCIE_NOC1_X and y == PCIE_NOC1_Y) : (x == PCIE_NOC_X and y == PCIE_NOC_Y)) * \
0x1000000000000000)
#define NOC_XY_PCIE_ENCODING(x, y) \
((uint64_t(NOC_XY_ENCODING(x, y)) << (NOC_ADDR_LOCAL_BITS - NOC_COORD_REG_OFFSET)) | 0x1000000000000000)

#define NOC_MULTICAST_ENCODING(x_start, y_start, x_end, y_end) \
((((uint32_t)(x_start)) << (2 * NOC_ADDR_NODE_ID_BITS)) | (((uint32_t)(y_start)) << (3 * NOC_ADDR_NODE_ID_BITS)) | \
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/hw/inc/dataflow_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -694,7 +694,7 @@ uint64_t get_system_memory_noc_addr(
const uint32_t offset = 0,
uint8_t noc = noc_index) {
uint64_t pcie_core_noc_encoding =
uint64_t(NOC_XY_PCIE_ENCODING(DYNAMIC_NOC_X(noc, PCIE_NOC_X), DYNAMIC_NOC_Y(noc, PCIE_NOC_Y), noc));
uint64_t(NOC_XY_PCIE_ENCODING(DYNAMIC_NOC_X(noc, PCIE_NOC_X), DYNAMIC_NOC_Y(noc, PCIE_NOC_Y)));
uint32_t addr = base_addr + page_size * id + offset;
uint64_t noc_addr = pcie_core_noc_encoding | addr;
return noc_addr;
Expand Down
3 changes: 1 addition & 2 deletions tt_metal/hw/inc/grayskull/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,7 @@
// Address formats
#define NOC_XY_ENCODING(x, y) ((((uint32_t)(y)) << (NOC_ADDR_NODE_ID_BITS)) | (((uint32_t)(x))))

#define NOC_XY_PCIE_ENCODING(x, y, noc_index) \
((uint64_t(NOC_XY_ENCODING(x, y)) << (NOC_ADDR_LOCAL_BITS - NOC_COORD_REG_OFFSET)))
#define NOC_XY_PCIE_ENCODING(x, y) ((uint64_t(NOC_XY_ENCODING(x, y)) << (NOC_ADDR_LOCAL_BITS - NOC_COORD_REG_OFFSET)))

#define NOC_MULTICAST_ENCODING(x_start, y_start, x_end, y_end) \
((x_start) << (2 * NOC_ADDR_NODE_ID_BITS)) | ((y_start) << (3 * NOC_ADDR_NODE_ID_BITS)) | (x_end) | \
Expand Down
5 changes: 2 additions & 3 deletions tt_metal/hw/inc/wormhole/noc/noc_parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,8 @@
(((uint32_t)(x)) << (NOC_ADDR_LOCAL_BITS % 32))

// Address formats
#define NOC_XY_PCIE_ENCODING(x, y, noc_index) \
((uint64_t(NOC_XY_ENCODING(x, y)) << (NOC_ADDR_LOCAL_BITS - NOC_COORD_REG_OFFSET))) | \
((noc_index ? (x == PCIE_NOC1_X and y == PCIE_NOC1_Y) : (x == PCIE_NOC_X and y == PCIE_NOC_Y)) * 0x800000000)
#define NOC_XY_PCIE_ENCODING(x, y) \
((uint64_t(NOC_XY_ENCODING(x, y)) << (NOC_ADDR_LOCAL_BITS - NOC_COORD_REG_OFFSET)) | 0x800000000)

#define NOC_MULTICAST_ENCODING(x_start, y_start, x_end, y_end) \
(((uint32_t)(x_start)) << ((NOC_ADDR_LOCAL_BITS % 32) + 2 * NOC_ADDR_NODE_ID_BITS)) | \
Expand Down
5 changes: 0 additions & 5 deletions tt_metal/impl/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,15 +324,10 @@ void Device::initialize_device_kernel_defines()
auto pcie_cores = soc_d.get_pcie_cores();
auto grid_size = this->grid_size();

// Workaround for Simulator integration as they use a 2x2 grid which would underflow PCIE_NOC1*
CoreCoord pcie_core = pcie_cores.empty() ? grid_size : pcie_cores[0];
auto pcie_noc1_x = pcie_cores.empty() ? 14 : tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].x);
auto pcie_noc1_y = pcie_cores.empty() ? 11 : tt::tt_metal::hal.noc_coordinate(NOC::NOC_1, grid_size.x, pcie_cores[0].y);

this->device_kernel_defines_.emplace("PCIE_NOC_X", std::to_string(pcie_core.x));
this->device_kernel_defines_.emplace("PCIE_NOC_Y", std::to_string(pcie_core.y));
this->device_kernel_defines_.emplace("PCIE_NOC1_X", std::to_string(pcie_noc1_x));
this->device_kernel_defines_.emplace("PCIE_NOC1_Y", std::to_string(pcie_noc1_x));
}

void Device::initialize_build() {
Expand Down
5 changes: 1 addition & 4 deletions tt_metal/impl/dispatch/kernels/cq_dispatch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,7 @@ constexpr uint32_t downstream_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_NOC_X
constexpr uint32_t dispatch_s_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_SLAVE_NOC_X, DOWNSTREAM_SLAVE_NOC_Y));
constexpr uint8_t my_noc_index = NOC_INDEX;
constexpr uint32_t my_noc_xy = uint32_t(NOC_XY_ENCODING(MY_NOC_X, MY_NOC_Y));
constexpr uint64_t pcie_noc_xy = uint64_t(NOC_XY_PCIE_ENCODING(
NOC_0_X(static_cast<uint8_t>(NOC_INDEX), noc_size_x, PCIE_NOC_X),
NOC_0_Y(static_cast<uint8_t>(NOC_INDEX), noc_size_y, PCIE_NOC_Y),
NOC_INDEX));
constexpr uint64_t pcie_noc_xy = uint64_t(NOC_XY_PCIE_ENCODING(NOC_X(PCIE_NOC_X), NOC_Y(PCIE_NOC_Y)));
constexpr uint32_t dispatch_cb_page_size = 1 << dispatch_cb_log_page_size;

constexpr uint32_t completion_queue_end_addr = completion_queue_base_addr + completion_queue_size;
Expand Down
5 changes: 1 addition & 4 deletions tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,7 @@ constexpr uint32_t my_noc_xy = uint32_t(NOC_XY_ENCODING(MY_NOC_X, MY_NOC_Y));
constexpr uint32_t upstream_noc_xy = uint32_t(NOC_XY_ENCODING(UPSTREAM_NOC_X, UPSTREAM_NOC_Y));
constexpr uint32_t downstream_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_NOC_X, DOWNSTREAM_NOC_Y));
constexpr uint32_t dispatch_s_noc_xy = uint32_t(NOC_XY_ENCODING(DOWNSTREAM_SLAVE_NOC_X, DOWNSTREAM_SLAVE_NOC_Y));
constexpr uint64_t pcie_noc_xy = uint64_t(NOC_XY_PCIE_ENCODING(
NOC_0_X(static_cast<uint8_t>(NOC_INDEX), noc_size_x, PCIE_NOC_X),
NOC_0_Y(static_cast<uint8_t>(NOC_INDEX), noc_size_y, PCIE_NOC_Y),
NOC_INDEX));
constexpr uint64_t pcie_noc_xy = uint64_t(NOC_XY_PCIE_ENCODING(NOC_X(PCIE_NOC_X), NOC_Y(PCIE_NOC_Y)));
constexpr uint32_t downstream_cb_page_size = 1 << downstream_cb_log_page_size;
constexpr uint32_t dispatch_s_cb_page_size = 1 << dispatch_s_cb_log_page_size;
constexpr uint32_t downstream_cb_end = downstream_cb_base + (1 << downstream_cb_log_page_size) * downstream_cb_pages;
Expand Down

0 comments on commit 19b59ea

Please sign in to comment.