Skip to content

Commit

Permalink
Int8 and Int16 Batch Matmul support (ARM-software#139)
Browse files Browse the repository at this point in the history
* Adds new BatchMatmulFunctions folder
 * Adds new Batch Matmul public functions
 * Adds new s16 x s16 vector matmult function
 * Adds new unit tests
 * Adds new struct for batch matmul params
 * Remove cmake files from check version script

Change-Id: I4709f56b23c2be91c08e26e381a1e956b6a9978b
<Co-authored-by: Ryan O'Shea, [email protected]>
<Co-authored-by: Adrian Lundell, [email protected]>
  • Loading branch information
ArmRyan authored Jul 11, 2024
1 parent 5a11800 commit 9d924bd
Show file tree
Hide file tree
Showing 98 changed files with 11,493 additions and 50 deletions.
3 changes: 3 additions & 0 deletions ARM.CMSIS-NN.pdsc
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s4.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_s8_to_s16_unordered_with_offset.c"/>
Expand All @@ -127,6 +128,8 @@
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s8_s16.c"/>
<file category="source" name="Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_row_offset_s8_s16.c"/>
<file category="source" name="Source/NNSupportFunctions/arm_nn_vec_mat_mul_result_acc_s16.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_batch_matmul_s8.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_batch_matmul_s16.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s4.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s8.c"/>
<file category="source" name="Source/FullyConnectedFunctions/arm_fully_connected_s16.c"/>
Expand Down
5 changes: 3 additions & 2 deletions Include/Internal/arm_nn_compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nn_compiler.h
* Description: Generic compiler header
*
* $Date: 16 January 2024
* $Revision: V.1.2.2
* $Date: 20 June 2024
* $Revision: V.1.3.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -189,6 +189,7 @@ __STATIC_FORCEINLINE uint8_t CLZ(uint32_t value)
// Common intrinsics
#define SMLABB __smlabb
#define SMLATT __smlatt
#define SMLALD __smlald
#define QADD __qadd
#define QSUB8 __qsub8
#define QSUB16 __qsub16
Expand Down
14 changes: 11 additions & 3 deletions Include/arm_nn_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
* Description: Public header file to contain the CMSIS-NN structs for the
* TensorFlowLite micro compliant functions
*
* $Date: 11 April 2024
* $Revision: V.3.2.0
* $Date: 19 June 2024
* $Revision: V.3.3.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -165,11 +165,19 @@ typedef struct
typedef struct
{
int32_t input_offset; /**< The negative of the zero value for the input tensor */
int32_t filter_offset; /**< The negative of the zero value for the filter tensor. Not used */
int32_t filter_offset; /**< The negative of the zero value for the filter tensor */
int32_t output_offset; /**< The negative of the zero value for the output tensor */
cmsis_nn_activation activation;
} cmsis_nn_fc_params;

/** CMSIS-NN object for Batch Matmul layer parameters */
typedef struct
{
const bool adj_x;
const bool adj_y;
cmsis_nn_fc_params fc_params;
} cmsis_nn_bmm_params;

/** CMSIS-NN object for SVDF layer parameters */
typedef struct
{
Expand Down
78 changes: 76 additions & 2 deletions Include/arm_nnfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnfunctions.h
* Description: Public header file for CMSIS NN Library
*
* $Date: 04 Jun 2024
* $Revision: V.16.1.0
* $Date: 19 June 2024
* $Revision: V.16.2.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -2604,6 +2604,80 @@ arm_cmsis_nn_status arm_lstm_unidirectional_s16(const int16_t *input,
const cmsis_nn_lstm_params *params,
cmsis_nn_lstm_context *buffers);

/**
* @brief Batch matmul function with 8 bit input and output.
*
* @param[in] ctx Temporary scratch buffer
* The caller is expected to clear the buffer, if applicable, for security reasons.
* Optional function arm_fully_connected_s8_get_buffer_size() provides the buffer
* size if an additional buffer is required.
* @param[in] bmm_params Batch matmul Parameters
* Adjoint flags are currently unused.
* @param[in] quant_params Quantization parameters
* @param[in] input_lhs_dims Input lhs tensor dimensions.
* This should be NHWC where lhs C = rhs C
* @param[in] input_lhs Pointer to input tensor
* @param[in] input_rhs_dims Input lhs tensor dimensions.
* This is expected to be transposed so
* should be NHWC where lhs C = rhs C
* @param[in] input_rhs Pointer to transposed input tensor
* @param[in] output_dims Output tensor dimensions
* @param[out] output Pointer to the output tensor
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
* @details
* 1. Supported framework: TensorFlow Lite Micro
* 2. Performs row * row matrix multiplication with the RHS transposed.
*
*/
arm_cmsis_nn_status arm_batch_matmul_s8(const cmsis_nn_context *ctx,
const cmsis_nn_bmm_params *bmm_params,
const cmsis_nn_per_tensor_quant_params *quant_params,
const cmsis_nn_dims *input_lhs_dims,
const int8_t *input_lhs,
const cmsis_nn_dims *input_rhs_dims,
const int8_t *input_rhs,
const cmsis_nn_dims *output_dims,
int8_t *output);

/**
* @brief Batch matmul function with 16 bit input and output.
*
* @param[in] ctx Temporary scratch buffer
* The caller is expected to clear the buffer, if applicable, for security reasons.
* Optional function arm_fully_connected_s8_get_buffer_size() provides the buffer
* size if an additional buffer is required.
* @param[in] bmm_params Batch matmul Parameters
* Adjoint flags are currently unused.
* @param[in] quant_params Quantization parameters
* @param[in] input_lhs_dims Input lhs tensor dimensions.
* This should be NHWC where LHS.C = RHS.C
* @param[in] input_lhs Pointer to input tensor
* @param[in] input_rhs_dims Input lhs tensor dimensions.
* This is expected to be transposed so
* should be NHWC where LHS.C = RHS.C
* @param[in] input_rhs Pointer to transposed input tensor
* @param[in] output_dims Output tensor dimensions
* @param[out] output Pointer to the output tensor
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
* @details
* 1. Supported framework: TensorFlow Lite Micro
* 2. Performs row * row matrix multiplication with the RHS transposed.
*
*/
arm_cmsis_nn_status arm_batch_matmul_s16(const cmsis_nn_context *ctx,
const cmsis_nn_bmm_params *bmm_params,
const cmsis_nn_per_tensor_quant_params *quant_params,
const cmsis_nn_dims *input_lhs_dims,
const int16_t *input_lhs,
const cmsis_nn_dims *input_rhs_dims,
const int16_t *input_rhs,
const cmsis_nn_dims *output_dims,
int16_t *output);

#ifdef __cplusplus
}
#endif
Expand Down
34 changes: 31 additions & 3 deletions Include/arm_nnsupportfunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
* Title: arm_nnsupportfunctions.h
* Description: Public header file of support functions for CMSIS NN Library
*
* $Date: 27 May 2024
* $Revision: V.22.1.0
* $Date: 19 June 2024
* $Revision: V.22.2.0
*
* Target : Arm(R) M-Profile Architecture
* -------------------------------------------------------------------- */
Expand Down Expand Up @@ -704,7 +704,7 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s8(const int8_t *lhs,
const int32_t rhs_offset);

/**
* @brief s16 Vector by Matrix (transposed) multiplication
* @brief s16 Vector by s8 Matrix (transposed) multiplication
*
* @param[in] lhs Input left-hand side vector
* @param[in] rhs Input right-hand side matrix (transposed)
Expand All @@ -731,6 +731,34 @@ arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s16(const int16_t *lhs,
const int32_t activation_min,
const int32_t activation_max);

/**
* @brief s16 Vector by s16 Matrix (transposed) multiplication
*
* @param[in] lhs Input left-hand side vector
* @param[in] rhs Input right-hand side matrix (transposed)
* @param[in] bias Input bias
* @param[out] dst Output vector
* @param[in] dst_multiplier Output multiplier
* @param[in] dst_shift Output shift
* @param[in] rhs_cols Number of columns in the right-hand side input matrix
* @param[in] rhs_rows Number of rows in the right-hand side input matrix
* @param[in] activation_min Minimum value to clamp the output to. Range: int16
* @param[in] activation_max Maximum value to clamp the output to. Range: int16
*
* @return The function returns <code>ARM_CMSIS_NN_SUCCESS</code>
*
*/
arm_cmsis_nn_status arm_nn_vec_mat_mult_t_s16_s16(const int16_t *lhs,
const int16_t *rhs,
const int64_t *bias,
int16_t *dst,
const int32_t dst_multiplier,
const int32_t dst_shift,
const int32_t rhs_cols,
const int32_t rhs_rows,
const int32_t activation_min,
const int32_t activation_max);

/**
* @brief s8 Vector by Matrix (transposed) multiplication with s16 output
*
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Examples are Cortex-M55 or Cortex-M85 configured with MVE.
| DepthwiseConv2D | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
| TransposeConv2D | Yes | No | No | Yes | No | No | Yes | No | No |
| Fully Connected | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
| Batch Matmul | Yes | Yes | No | Yes | Yes | No | Yes | Yes | No |
| Add | Yes | Yes | N/A | Yes | Yes | N/A | Yes | Yes | N/A |
| Mul | Yes | Yes | N/A | Yes | Yes | N/A | Yes | Yes | N/A |
| MaxPooling | Yes | Yes | N/A | Yes | Yes | N/A | Yes | Yes | N/A |
Expand Down
4 changes: 2 additions & 2 deletions Source/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# SPDX-FileCopyrightText: Copyright 2019-2022 Arm Limited and/or its affiliates <[email protected]>
# SPDX-FileCopyrightText: Copyright 2019-2022, 2024 Arm Limited and/or its affiliates <[email protected]>
#
# SPDX-License-Identifier: Apache-2.0
#
Expand All @@ -18,7 +18,7 @@

SET(ROOT ${CMSIS_PATH})

# Select which parts of the CMSIS-DSP must be compiled.
# Select which parts of the CMSIS-NN must be compiled.
# There are some dependencies between the parts but they are not tracked
# by this cmake. So, enabling some functions may require to enable some
# other ones.
Expand Down
105 changes: 105 additions & 0 deletions Source/FullyConnectedFunctions/arm_batch_matmul_s16.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
* SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates <open-source-office.com>
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* ----------------------------------------------------------------------
* Project: CMSIS NN Library
* Title: arm_nn_batch_matmul_s16.c
* Description: Batch matrix multiplication. Does not perform transposes, see header file for details.
*
* $Date: 19 June 2024
* $Revision: V.1.0.0
*
* Target : Arm(R) M-Profile Architecture
*
* -------------------------------------------------------------------- */
#include "arm_nnfunctions.h"
#include "arm_nnsupportfunctions.h"

/**
* @ingroup Public
*/

/**
* @addtogroup FC
* @{
*/

/*
* s16 batch matrix multiplication
* Refer to header file for details.
*/
arm_cmsis_nn_status arm_batch_matmul_s16(const cmsis_nn_context *ctx,
const cmsis_nn_bmm_params *bmm_params,
const cmsis_nn_per_tensor_quant_params *quant_params,
const cmsis_nn_dims *input_lhs_dims,
const int16_t *input_lhs,
const cmsis_nn_dims *input_rhs_dims,
const int16_t *input_rhs,
const cmsis_nn_dims *output_dims,
int16_t *output)
{
(void)ctx;
const int32_t output_batch = output_dims->n;
const int32_t output_height = output_dims->h;
const int32_t lhs_rows = input_lhs_dims->w;
const int32_t rhs_rows = input_rhs_dims->w;
const int32_t rhs_cols = input_rhs_dims->c;

const int32_t inner_lhs_diff = input_lhs_dims->h >= input_rhs_dims->h ? 0 : lhs_rows * rhs_cols;
const int32_t inner_rhs_diff = input_rhs_dims->h >= input_lhs_dims->h ? rhs_rows * rhs_cols : 0;
const int32_t outer_lhs_diff = input_lhs_dims->n >= input_rhs_dims->n
? inner_lhs_diff
: -((lhs_rows * rhs_cols) - inner_lhs_diff) * input_lhs_dims->h;
const int32_t outer_rhs_diff = input_rhs_dims->n >= input_lhs_dims->n ? (rhs_rows * rhs_cols) - inner_rhs_diff
: -inner_rhs_diff * input_rhs_dims->h;

const int32_t reduced_multiplier = REDUCE_MULTIPLIER(quant_params->multiplier);

for (int i_out_batch = 0; i_out_batch < output_batch; i_out_batch++)
{
for (int i_out_height = 0; i_out_height < output_height; i_out_height++)
{

for (int j = 0; j < lhs_rows; j++)
{
arm_nn_vec_mat_mult_t_s16_s16(input_lhs,
input_rhs,
NULL,
output,
reduced_multiplier,
quant_params->shift,
rhs_cols,
rhs_rows,
bmm_params->fc_params.activation.min,
bmm_params->fc_params.activation.max);
input_lhs += rhs_cols;
output += rhs_rows;
}
input_lhs -= inner_lhs_diff;
input_rhs += inner_rhs_diff;
}
input_lhs += outer_lhs_diff;
input_rhs += outer_rhs_diff;
}

return ARM_CMSIS_NN_SUCCESS;
}

/**
* @} end of Doxygen group
*/
Loading

0 comments on commit 9d924bd

Please sign in to comment.