Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing MVE support for int16 convolution #124

Merged
merged 3 commits into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions ARM.CMSIS-NN.pdsc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s4_s16.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s16.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_1x1_s4_fast.c"/>
Expand All @@ -53,7 +54,6 @@
<file category="source" name="Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s4.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_depthwise_conv_s4_opt.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_depthwise_conv_s4.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_fast_s16.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_s4.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_s16.c"/>
Expand All @@ -69,13 +69,13 @@
<file category="source" name="Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_transpose_conv_s8.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_convolve_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_x.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_w.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_y.c"/>
<file category="source" name="Source/ConcatenationFunctions/arm_concatenation_s8_z.c"/>
<file category="source" name="Source/SVDFunctions/arm_svdf_s8.c"/>
<file category="source" name="Source/SVDFunctions/arm_svdf_state_s16_s8.c"/>
<file category="source" name="Source/SVDFunctions/arm_svdf_get_buffer_sizes_s8.c"/>
<file category="source" name="Source/PoolingFunctions/arm_max_pool_s8.c"/>
<file category="source" name="Source/PoolingFunctions/arm_max_pool_s16.c"/>
<file category="source" name="Source/PoolingFunctions/arm_avgpool_s8.c"/>
Expand All @@ -101,9 +101,9 @@
<file category="source" name="Source/NNSupportFunctions/arm_s8_to_s16_unordered_with_offset.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s4.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8_s32.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nntables.c"/>
Expand All @@ -112,6 +112,7 @@
<file category="source" name="Source/NNSupportFunctions/arm_nn_lstm_step_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_lstm_calculate_gate_s8_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s8_s16.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s4.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s8.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s16.c"/>
Expand Down
68 changes: 10 additions & 58 deletions Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
* $Date: 20 February 2024
* $Revision: V.14.0.0
* $Date: 11 March 2024
* $Revision: V.15.0.0

*
* Target : Arm(R) M-Profile Architecture
Expand Down Expand Up @@ -375,8 +375,10 @@ arm_cmsis_nn_status arm_convolve_s4(const cmsis_nn_context *ctx,
* @param[in] bias_data Optional bias data pointer. Data type: int32
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int8

* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> if successful or
* <code>ARM_CMSIS_NN_ARG_ERROR</code> if incorrect arguments or
* <code>ARM_CMSIS_NN_NO_IMPL_ERROR</code>
*
* @details
* 1. Supported framework: TensorFlow Lite micro
Expand Down Expand Up @@ -522,8 +524,10 @@ int32_t arm_transpose_conv_s8_get_buffer_size_mve(const cmsis_nn_dims *input_dim
* @param[in] bias_data Optional bias data pointer. Data type: int64
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int16

* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> if successful or
* <code>ARM_CMSIS_NN_ARG_ERROR</code> if incorrect arguments or
* <code>ARM_CMSIS_NN_NO_IMPL_ERROR</code>
*
* @details
* 1. Supported framework: TensorFlow Lite micro
Expand All @@ -541,47 +545,6 @@ arm_cmsis_nn_status arm_convolve_s16(const cmsis_nn_context *ctx,
const int64_t *bias_data,
const cmsis_nn_dims *output_dims,
int16_t *output_data);
/**
* @brief Optimized s16 convolution function
* @param[in, out] ctx Function context that contains the additional buffer if required by the function.
* arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required.
* The caller is expected to clear the buffer, if applicable, for security reasons.
* @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...).
* conv_params->input_offset : Not used
* conv_params->output_offset : Not used
* @param[in] quant_params Per-channel quantization info.
* It contains the multiplier and shift values to be applied to each output channel
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] input_data Input (activation) data pointer. Data type: int16
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the
* spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not
exceed 512
* @param[in] filter_data Filter data pointer. Data type: int8
* @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT]
* @param[in] bias_data Optional bias data pointer. Data type: int64
* @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT]
* @param[out] output_data Output data pointer. Data type: int16

* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
* @details
* 1. Supported framework: TensorFlow Lite micro
* 2. Additional memory is required for optimization. Refer to argument 'ctx' for details.
* 3. Implementation supports kernel volumes (filter width * filter height * input channels) < 512.
*
*/

arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
const cmsis_nn_conv_params *conv_params,
const cmsis_nn_per_channel_quant_params *quant_params,
const cmsis_nn_dims *input_dims,
const int16_t *input_data,
const cmsis_nn_dims *filter_dims,
const int8_t *filter_data,
const cmsis_nn_dims *bias_dims,
const int64_t *bias_data,
const cmsis_nn_dims *output_dims,
int16_t *output_data);

/**
* @brief Get the required buffer size for s16 convolution function
Expand All @@ -594,17 +557,6 @@ arm_cmsis_nn_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
*/
int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);

/**
* @brief Get the required buffer size for fast s16 convolution function
*
* @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN]
* @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK
* are the spatial filter dimensions
* @return The function returns required buffer size(bytes)
*
*/
int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims);

/**
* @brief Fast s4 version for 1x1 convolution (non-square shape)
*
Expand Down
54 changes: 50 additions & 4 deletions Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: 14 February 2024
* $Revision: V.19.0.0
* $Date: 22 March 2024
* $Revision: V.20.0.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -63,6 +63,10 @@ extern "C" {
// will result in lower scratch buffer usage.
#define CH_IN_BLOCK_MVE (124)

// For input of int16 when number of columns are above this limit int64 accumulation is needed
// to not loose precision.
#define MAX_COL_COUNT (512)

/**
* @brief definition to pack four 8 bit values.
*/
Expand Down Expand Up @@ -281,8 +285,8 @@ int16_t *arm_nn_mat_mult_kernel_s16(const int8_t *input_a,
const int32_t output_ch,
const int32_t *out_shift,
const int32_t *out_mult,
const int16_t activation_min,
const int16_t activation_max,
const int32_t activation_min,
const int32_t activation_max,
const int32_t num_col_a,
const int64_t *const output_bias,
int16_t *out_0);
Expand Down Expand Up @@ -445,6 +449,48 @@ arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s8(const int8_t *lhs,
const int32_t row_address_offset,
const int32_t lhs_cols_offset);

/**
* @brief General Matrix-multiplication function with per-channel requantization and int16 input (LHS) and output.
* This function assumes:
* - LHS input matrix NOT transposed (nt)
* - RHS input matrix transposed (t)
*
* @note This operation also performs the broadcast bias addition before the requantization
*
* @param[in] lhs Pointer to the LHS input matrix
* @param[in] rhs Pointer to the RHS input matrix
* @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of
* output columns (or RHS input rows)
* @param[out] dst Pointer to the output matrix with "m" rows and "n" columns
* @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization.
* The length of this vector is equal to the number of output columns (or RHS input
* rows)
* @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length
* of this vector is equal to the number of output columns (or RHS input rows)
* @param[in] lhs_rows Number of LHS input rows
* @param[in] rhs_rows Number of RHS input rows
* @param[in] rhs_cols Number of LHS/RHS input columns
* @param[in] activation_min Minimum value to clamp down the output. Range : int16
* @param[in] activation_max Maximum value to clamp up the output. Range : int16
*
* @details MVE implementation only.
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code> or
* <code>ARM_CMSIS_NN_NO_IMPL_ERROR</code> if not for MVE
*
*/
arm_cmsis_nn_status arm_nn_mat_mult_nt_t_s16(const int16_t *lhs,
const int8_t *rhs,
const int64_t *bias,
int16_t *dst,
const int32_t *dst_multipliers,
const int32_t *dst_shifts,
const int32_t lhs_rows,
const int32_t rhs_rows,
const int32_t rhs_cols,
const int32_t activation_min,
const int32_t activation_max);

/**
* @brief General Matrix-multiplication function with int8 input and int32 output.
* This function assumes:
Expand Down
Loading
Loading