Skip to content

Commit

Permalink
13544: Update floor op (#15583)
Browse files Browse the repository at this point in the history
### Ticket
Link to Github Issue #13544 

### Problem description

- Floor op ignore values outside range of i16

### What's changed

- Updated the logic to support fp32

### Profiling Results : Shape used [1, 1, 102400, 32]

Kernel Duration [ns]
- Bfloat16 : 68870
- Float32 : 156560

### Checklist
- [x] All Post commit CI
  • Loading branch information
mouliraj-mcw authored Dec 7, 2024
1 parent 19b59ea commit 3afc3b8
Show file tree
Hide file tree
Showing 10 changed files with 182 additions and 8 deletions.
19 changes: 19 additions & 0 deletions tests/ttnn/unit_tests/operations/eltwise/test_unary.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,3 +428,22 @@ def run_unary_test_bitwise_not(device, h, w, fill_value, ttnn_function, pcc=0.99
@pytest.mark.parametrize("fill_value", [-2147483647, 2147483648, 7534, 225, 97, 3])
def test_bitwise_not(device, h, w, fill_value):
run_unary_test_bitwise_not(device, h, w, fill_value, ttnn.bitwise_not)


@skip_for_grayskull()
@pytest.mark.parametrize(
"input_shapes",
(
(torch.Size([1, 1, 32, 32])),
(torch.Size([1, 1, 320, 384])),
(torch.Size([1, 3, 320, 384])),
),
)
def test_unary_floor(input_shapes, device):
in_data1 = torch.empty(input_shapes, dtype=torch.float32).uniform_(-43566, 43565)
input_tensor1 = ttnn.from_torch(in_data1, dtype=ttnn.float32, layout=ttnn.TILE_LAYOUT, device=device)
output_tensor = ttnn.floor(input_tensor1)
golden_function = ttnn.get_golden_function(ttnn.floor)
golden_tensor = golden_function(in_data1)
output_tensor = ttnn.to_torch(output_tensor)
assert_with_pcc(golden_tensor, output_tensor, 0.999)
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,40 @@ using namespace sfpi;
namespace ckernel {
namespace sfpu {

inline vInt float_to_int32(vFloat in)
{
vInt result;
vInt exp = exexp(in); // extract exponent
v_if (exp < 0) {
result = 0;
} v_elseif (exp > 30) {
// set to int32 max value in case of overflow
result = std::numeric_limits<int32_t>::max();
// check sign
v_if (in < 0) {
result = reinterpret<vInt>(setsgn(reinterpret<vFloat>(result), 1));
} v_endif
} v_else {
// extract mantissa
vInt man = exman8(in);
// shift the mantissa by (23-exponent) to the right
vInt shift = exp - 23;
man = shft(reinterpret<vUInt>(man), shift);
// check sign
v_if (in < 0) {
man = reinterpret<vInt>(setsgn(reinterpret<vFloat>(man), 1));
} v_endif
result = man;
} v_endif
return result;
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_floor() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int16(result, 0); // TODO: Replace float_to_int16 to float_to_int32 once it is available
vInt tmp = float_to_int16(result, 0);
result = int32_to_float(tmp, 0);
v_if(result > v) { result = result - 1; }
v_endif;
Expand All @@ -31,5 +59,19 @@ inline void calculate_floor() {
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_floor_float32() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int32(result);
result = int32_to_float(tmp, 0);
v_if(result > v) { result = result - 1; }
v_endif;
dst_reg[0] = result;
dst_reg++;
}
}

} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,9 @@ inline void llk_math_eltwise_unary_sfpu_floor(uint dst_index, int vector_mode =
ckernel::sfpu::calculate_floor<APPROXIMATE>, dst_index, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_floor_float32(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_floor_float32<APPROXIMATE>, dst_index, vector_mode);
}
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,40 @@ using namespace sfpi;
namespace ckernel {
namespace sfpu {

inline vInt float_to_int32(vFloat in)
{
vInt result;
vInt exp = exexp(in); // extract exponent
v_if (exp < 0) {
result = 0;
} v_elseif (exp > 30) {
// set to int32 max value in case of overflow
result = std::numeric_limits<int32_t>::max();
// check sign
v_if (in < 0) {
result = reinterpret<vInt>(setsgn(reinterpret<vFloat>(result), 1));
} v_endif
} v_else {
// extract mantissa
vInt man = exman8(in);
// shift the mantissa by (23-exponent) to the right
vInt shift = exp - 23;
man = shft(reinterpret<vUInt>(man), shift);
// check sign
v_if (in < 0) {
man = reinterpret<vInt>(setsgn(reinterpret<vFloat>(man), 1));
} v_endif
result = man;
} v_endif
return result;
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_floor() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int16(result, 0); // TODO: Replace float_to_int16 to float_to_int32 once it is available
vInt tmp = float_to_int16(result, 0);
result = int32_to_float(tmp, 0);
v_if(result > v) { result = result - 1; }
v_endif;
Expand All @@ -31,5 +59,19 @@ inline void calculate_floor() {
}
}

template <bool APPROXIMATION_MODE, int ITERATIONS = 8>
inline void calculate_floor_float32() {
for (int d = 0; d < ITERATIONS; d++) {
vFloat result = dst_reg[0];
vFloat v = result;
vInt tmp = float_to_int32(result);
result = int32_to_float(tmp, 0);
v_if(result > v) { result = result - 1; }
v_endif;
dst_reg[0] = result;
dst_reg++;
}
}

} // namespace sfpu
} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,9 @@ inline void llk_math_eltwise_unary_sfpu_floor(uint dst_index, int vector_mode =
ckernel::sfpu::calculate_floor<APPROXIMATE>, dst_index, vector_mode);
}

template <bool APPROXIMATE>
inline void llk_math_eltwise_unary_sfpu_floor_float32(uint dst_index, int vector_mode = (int)VectorMode::RC) {
llk_math_eltwise_unary_sfpu_params<APPROXIMATE>(
ckernel::sfpu::calculate_floor_float32<APPROXIMATE>, dst_index, vector_mode);
}
} // namespace ckernel
19 changes: 17 additions & 2 deletions tt_metal/include/compute_kernel_api/eltwise_unary/floor.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#endif

namespace ckernel {

/**
* Please refer to documentation for any_init.
*/
Expand All @@ -31,9 +30,25 @@ ALWI void floor_tile_init() { MATH((llk_math_eltwise_unary_sfpu_floor_init<APPRO
* | Argument | Description | Type | Valid
* Range | Required |
* |-----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst | The index of the tile in DST register buffer to modify the sign bit of | uint32_t | Must be
* | idst | The index of the tile in DST register buffer to perform floor operation | uint32_t | Must be
* less than the size of the DST register buffer | True |
*/
ALWI void floor_tile(uint32_t idst) { MATH((llk_math_eltwise_unary_sfpu_floor<APPROX>(idst))); }

/**
* Performs floor operation on each row of a tile.
* in DST register at index tile_index. The DST register buffer must be in
* acquired state via *acquire_dst* call. This call is blocking and is only
* available on the compute engine.
*
* Return value: None
*
* | Argument | Description | Type | Valid
* Range | Required |
* |-----------------|----------------------------------------------------------------------------|----------|-------------------------------------------------------|----------|
* | idst | The index of the tile in DST register buffer to perform floor operation | uint32_t | Must be
* less than the size of the DST register buffer | True |
*/
ALWI void floor_tile_float32(uint32_t idst) { MATH((llk_math_eltwise_unary_sfpu_floor_float32<APPROX>(idst))); }

} // namespace ckernel
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ enum class UnaryOpType {
BITWISE_OR,
RIGHT_SHIFT,
FLOOR,
FLOOR_FLOAT32,
CEIL,
LEFT_SHIFT,
REMAINDER,
Expand Down
10 changes: 8 additions & 2 deletions ttnn/cpp/ttnn/operations/eltwise/unary/common/unary_op_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ void update_macro_defines(UnaryOpType op_type, std::map<std::string, std::string
case UnaryOpType::DIV_UNARY_SFPU: defines["SFPU_OP_BINOP_WITH_SCALAR_INCLUDE"] = "1"; break;
case UnaryOpType::IDENTITY:
case UnaryOpType::IDENTITY_UINT32: defines["SFPU_OP_IDENTITY_INCLUDE"] = "1"; break;
case UnaryOpType::FLOOR:
case UnaryOpType::FLOOR_FLOAT32: defines["SFPU_OP_FLOOR_INCLUDE"] = "1"; break;
case UnaryOpType::RDIV: break;
case UnaryOpType::RSUB: defines["SFPU_OP_REVERSE_FAMILY_INCLUDE"] = "1";
case UnaryOpType::ISINF:
Expand All @@ -71,7 +73,6 @@ void update_macro_defines(UnaryOpType op_type, std::map<std::string, std::string
case UnaryOpType::BITWISE_AND: defines["SFPU_OP_BITWISE_AND_INCLUDE"] = "1"; break;
case UnaryOpType::BITWISE_OR: defines["SFPU_OP_BITWISE_OR_INCLUDE"] = "1"; break;
case UnaryOpType::RIGHT_SHIFT: defines["SFPU_OP_RIGHT_SHIFT_INCLUDE"] = "1"; break;
case UnaryOpType::FLOOR: defines["SFPU_OP_FLOOR_INCLUDE"] = "1"; break;
case UnaryOpType::CEIL: defines["SFPU_OP_CEIL_INCLUDE"] = "1"; break;
case UnaryOpType::LEFT_SHIFT: defines["SFPU_OP_LEFT_SHIFT_INCLUDE"] = "1"; break;
case UnaryOpType::REMAINDER: defines["SFPU_OP_REMAINDER_INCLUDE"] = "1"; break;
Expand Down Expand Up @@ -281,7 +282,6 @@ std::pair<string, string> get_op_init_and_func_default(UnaryOpType op_type, std:
case UnaryOpType::SIGNBIT:
op_init_and_name = {"signbit_tile_init();", fmt::format("signbit_tile({});", idst)};
break;
case UnaryOpType::FLOOR: op_init_and_name = {"floor_tile_init();", fmt::format("floor_tile({});", idst)}; break;
case UnaryOpType::CEIL: op_init_and_name = {"ceil_tile_init();", fmt::format("ceil_tile({});", idst)}; break;
case UnaryOpType::SIN: op_init_and_name = {"sin_tile_init();", fmt::format("sin_tile({});", idst)}; break;
case UnaryOpType::COS: op_init_and_name = {"cos_tile_init();", fmt::format("cos_tile({});", idst)}; break;
Expand Down Expand Up @@ -340,6 +340,12 @@ std::pair<string, string> get_op_init_and_func_default(UnaryOpType op_type, std:
case UnaryOpType::IDENTITY_UINT32:
op_init_and_name = {"identity_tile_init();", fmt::format("identity_tile_uint32({});", idst)};
break;
case UnaryOpType::FLOOR:
op_init_and_name = {"floor_tile_init();", fmt::format("floor_tile({});", idst)};
break;
case UnaryOpType::FLOOR_FLOAT32:
op_init_and_name = {"floor_tile_init();", fmt::format("floor_tile_float32({});", idst)}; break;
break;
case UnaryOpType::RELU6:
op_init_and_name = {"relu_max_tile_init();", fmt::format("relu_max_tile({}, 0x40c00000u);", idst)};
break;
Expand Down
27 changes: 26 additions & 1 deletion ttnn/cpp/ttnn/operations/eltwise/unary/unary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ template struct ExecuteUnary<UnaryOpType::ERFINV>;
template struct ExecuteUnary<UnaryOpType::EXP2>;
template struct ExecuteUnary<UnaryOpType::EXPM1>;
template struct ExecuteUnary<UnaryOpType::EQZ>;
template struct ExecuteUnary<UnaryOpType::FLOOR>;
template struct ExecuteUnary<UnaryOpType::CEIL>;
template struct ExecuteUnary<UnaryOpType::GEZ>;
template struct ExecuteUnary<UnaryOpType::GTZ>;
Expand Down Expand Up @@ -337,6 +336,32 @@ Tensor Identity::invoke(
DefaultQueueId, input_tensor, {UnaryWithParam{op_type}}, memory_config, optional_output_tensor);
}

Tensor Floor::invoke(
uint8_t queue_id,
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config,
const std::optional<Tensor>& optional_output_tensor) {
UnaryOpType op_type = UnaryOpType::FLOOR;
if (input_tensor.get_dtype() == DataType::FLOAT32) {
op_type = UnaryOpType::FLOOR_FLOAT32;
}

return detail::unary_impl(queue_id, input_tensor, {UnaryWithParam{op_type}}, memory_config, optional_output_tensor);
}

Tensor Floor::invoke(
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config,
const std::optional<Tensor>& optional_output_tensor) {
UnaryOpType op_type = UnaryOpType::FLOOR;
if (input_tensor.get_dtype() == DataType::FLOAT32) {
op_type = UnaryOpType::FLOOR_FLOAT32;
}

return detail::unary_impl(
DefaultQueueId, input_tensor, {UnaryWithParam{op_type}}, memory_config, optional_output_tensor);
}

Tensor Dropout::invoke(
const Tensor& input,
const uint32_t seed,
Expand Down
16 changes: 15 additions & 1 deletion ttnn/cpp/ttnn/operations/eltwise/unary/unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,19 @@ struct Identity {
const std::optional<Tensor>& optional_output_tensor = std::nullopt);
};

struct Floor {
static Tensor invoke(
uint8_t queue_id,
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config = std::nullopt,
const std::optional<Tensor>& optional_output_tensor = std::nullopt);

static Tensor invoke(
const Tensor& input_tensor,
const std::optional<MemoryConfig>& memory_config = std::nullopt,
const std::optional<Tensor>& optional_output_tensor = std::nullopt);
};

struct Dropout {
static Tensor invoke(
const Tensor& input,
Expand Down Expand Up @@ -281,7 +294,6 @@ REGISTER_UNARY_OPERATION(erfinv, ERFINV);
REGISTER_UNARY_OPERATION(exp2, EXP2);
REGISTER_UNARY_OPERATION(expm1, EXPM1);
REGISTER_UNARY_OPERATION(eqz, EQZ);
REGISTER_UNARY_OPERATION(floor, FLOOR);
REGISTER_UNARY_OPERATION(ceil, CEIL);
REGISTER_UNARY_OPERATION(gez, GEZ);
REGISTER_UNARY_OPERATION(gtz, GTZ);
Expand Down Expand Up @@ -354,6 +366,8 @@ constexpr auto dropout =
ttnn::register_operation_with_auto_launch_op<"ttnn::dropout", ttnn::operations::unary::Dropout>();
constexpr auto identity =
ttnn::register_operation_with_auto_launch_op<"ttnn::identity", ttnn::operations::unary::Identity>();
constexpr auto floor =
ttnn::register_operation_with_auto_launch_op<"ttnn::floor", ttnn::operations::unary::Floor>();
constexpr auto softplus =
ttnn::register_operation_with_auto_launch_op<"ttnn::softplus", ttnn::operations::unary::Softplus>();
constexpr auto prelu_sfpu =
Expand Down

0 comments on commit 3afc3b8

Please sign in to comment.