Skip to content

Commit

Permalink
Add LLK and API for binary shift left/right (#15926)
Browse files Browse the repository at this point in the history
### Ticket
[Link to Github
Issue](#10034)

### What's changed
Implemented binary left/right shift as binary SFPU OPs.

### Checklist
- [x] Post commit CI passes:
https://github.com/tenstorrent/tt-metal/actions/runs/12287866881
- [x] Blackhole Post commit:
https://github.com/tenstorrent/tt-metal/actions/runs/12287867702
- [x] New/Existing tests provide coverage for changes - will be added in
future PR
  • Loading branch information
rdjogoTT authored Dec 12, 2024
1 parent 74311e9 commit 6e983a7
Show file tree
Hide file tree
Showing 10 changed files with 198 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "ckernel.h"
#include "ckernel_defs.h"
#include "sfpi.h"

using namespace sfpi;

namespace ckernel {
namespace sfpu {

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_binary_left_shift(const uint dst_offset) {
_calculate_binary_left_shift_<APPROXIMATION_MODE, ITERATIONS>(dst_offset);
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_binary_right_shift(const uint dst_offset) {
_calculate_binary_right_shift_<APPROXIMATION_MODE, ITERATIONS>(dst_offset);
}

} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "llk_math_eltwise_binary_sfpu_init.h"
#include "llk_math_eltwise_binary_sfpu_params.h"
#include "ckernel_sfpu_shift.h"

namespace ckernel {

// New LLK SFPU APIs

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_shift_init() {
llk_math_eltwise_binary_sfpu_init<SfpuType::unused, APPROXIMATE>();
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_left_shift(
uint dst_index0, uint32_t dst_index1, int vector_mode = VectorMode::RC) {
llk_math_eltwise_binary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_binary_left_shift<APPROXIMATE>, dst_index0, dst_index1, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_right_shift(
uint dst_index0, uint32_t dst_index1, int vector_mode = VectorMode::RC) {
llk_math_eltwise_binary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_binary_right_shift<APPROXIMATE>, dst_index0, dst_index1, vector_mode);
}

} // namespace ckernel
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "ckernel.h"
#include "ckernel_defs.h"
#include "sfpi.h"

using namespace sfpi;

namespace ckernel {
namespace sfpu {

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_binary_left_shift(const uint dst_offset) {
_calculate_binary_left_shift_<APPROXIMATION_MODE, ITERATIONS>(dst_offset);
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_binary_right_shift(const uint dst_offset) {
_calculate_binary_right_shift_<APPROXIMATION_MODE, ITERATIONS>(dst_offset);
}

} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "llk_math_eltwise_binary_sfpu_init.h"
#include "llk_math_eltwise_binary_sfpu_params.h"
#include "ckernel_sfpu_shift.h"

namespace ckernel {

// New LLK SFPU APIs

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_shift_init() {
llk_math_eltwise_binary_sfpu_init<SfpuType::unused, APPROXIMATE>();
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_left_shift(
uint dst_index0, uint32_t dst_index1, int vector_mode = VectorMode::RC) {
llk_math_eltwise_binary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_binary_left_shift<APPROXIMATE>, dst_index0, dst_index1, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_binary_sfpu_right_shift(
uint dst_index0, uint32_t dst_index1, int vector_mode = VectorMode::RC) {
llk_math_eltwise_binary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_binary_right_shift<APPROXIMATE>, dst_index0, dst_index1, vector_mode);
}

} // namespace ckernel
2 changes: 2 additions & 0 deletions tt_metal/include/compute_kernel_api/add_int32_sfpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ namespace ckernel {
* Performs an elementwise add operation with the two integer inputs: y = add(x0,x1)
* Output overwrites first operand in DST.
*
* The DST register buffer must be in acquired state via *acquire_dst* call. This call is blocking and is only available
* on the compute engine.
* A maximum of 4 tiles from each operand can be loaded into DST at once, for a total of 8 tiles,
* when using 16 bit formats. This gets reduced to 2 tiles from each operand for 32 bit formats.
*
Expand Down
2 changes: 2 additions & 0 deletions tt_metal/include/compute_kernel_api/binary_bitwise_sfpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ namespace ckernel {
* Performs an elementwise binary bitwise operation with the two inputs: y = bitwise(x0,x1)
* Output overwrites first operand in DST.
*
* The DST register buffer must be in acquired state via *acquire_dst* call. This call is blocking and is only available
* on the compute engine.
* A maximum of 4 tiles from each operand can be loaded into DST at once, for a total of 8 tiles,
* when using 16 bit formats. This gets reduced to 2 tiles from each operand for 32 bit formats.
*
Expand Down
68 changes: 68 additions & 0 deletions tt_metal/include/compute_kernel_api/binary_shift.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc.
//
// SPDX-License-Identifier: Apache-2.0

#pragma once

#include "compute_kernel_api/common_globals.h"
#ifdef TRISC_MATH
#include "llk_math_eltwise_binary_sfpu_shift.h"
#define MAIN math_main()
#define MATH(x) x
#else
#define MATH(x)
#endif

namespace ckernel {

/**
* Performs an elementwise shift operation to the left on the input at idst0, by input at idst1: y = x0 << x1
* Both inputs must be of Int32 data type only. Output overwrites first operand in DST.
*
* The DST register buffer must be in acquired state via *acquire_dst* call. This call is blocking and is only available
* on the compute engine.
* A maximum of 4 tiles from each operand can be loaded into DST at once, for a total of 8 tiles,
* when using 16 bit formats. This gets reduced to 2 tiles from each operand for 32 bit formats.
*
* Return value: None
*
* | Argument | Description | Type | Valid Range |
* Required |
* |----------------|-----------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst0 | The index of the tile in DST register buffer to use as first operand | uint32_t | Must be less
* than the size of the DST register buffer | True | | idst1 | The index of the tile in DST register buffer
* to use as second operand | uint32_t | Must be less than the size of the DST register buffer | True |
*/
ALWI void binary_left_shift_tile(uint32_t idst0, uint32_t idst1) {
MATH((llk_math_eltwise_binary_sfpu_left_shift<APPROX>(idst0, idst1)));
}

/**
* Performs an elementwise shift operation to the right on the input at idst0, by input at idst1: y = x0 >> x1
* Both inputs must be of Int32 data type only. Output overwrites first operand in DST.
*
* The DST register buffer must be in acquired state via *acquire_dst* call. This call is blocking and is only available
* on the compute engine.
* A maximum of 4 tiles from each operand can be loaded into DST at once, for a total of 8 tiles,
* when using 16 bit formats. This gets reduced to 2 tiles from each operand for 32 bit formats.
*
* Return value: None
*
* | Argument | Description | Type | Valid Range |
* Required |
* |----------------|-----------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst0 | The index of the tile in DST register buffer to use as first operand | uint32_t | Must be less
* than the size of the DST register buffer | True | | idst1 | The index of the tile in DST register buffer
* to use as second operand | uint32_t | Must be less than the size of the DST register buffer | True |
*/

ALWI void binary_right_shift_tile(uint32_t idst0, uint32_t idst1) {
MATH((llk_math_eltwise_binary_sfpu_right_shift<APPROX>(idst0, idst1)));
}

/**
* Please refer to documentation for any_init.
*/
ALWI void binary_shift_tile_init() { MATH((llk_math_eltwise_binary_sfpu_shift_init<APPROX>())); }

} // namespace ckernel
2 changes: 2 additions & 0 deletions tt_metal/include/compute_kernel_api/eltwise_binary_sfpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ namespace ckernel {
* Performs an elementwise binop operation with the two floating point inputs: y = binop(x0,x1)
* Output overwrites first operand in DST.
*
* The DST register buffer must be in acquired state via *acquire_dst* call. This call is blocking and is only available
* on the compute engine.
* A maximum of 4 tiles from each operand can be loaded into DST at once, for a total of 8 tiles,
* when using 16 bit formats. This gets reduced to 2 tiles from each operand for 32 bit formats.
*
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/third_party/tt_llk_blackhole

0 comments on commit 6e983a7

Please sign in to comment.