Skip to content

Commit

Permalink
Int4 Depthwise performance improvement (ARM-software#117)
Browse files Browse the repository at this point in the history
* Fix unit test generation for depthwise
 * Add new unit tests for arm_depthwise_conv_s4_generic
 * Improved performance for arm_depthwise_conv_s4_generic
 * Fix buffer allocation for arm_depthwise_conv_s4_generic unit tests

Change-Id: I87543d055e936481f406f1d0872debcf87efdbdd

Signed-off-by: Ryan O'Shea <[email protected]>
  • Loading branch information
ArmRyan authored Mar 5, 2024
1 parent 4b46c85 commit 6cc31fb
Show file tree
Hide file tree
Showing 22 changed files with 1,572 additions and 109 deletions.
453 changes: 359 additions & 94 deletions Source/ConvolutionFunctions/arm_depthwise_conv_s4.c

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#include <stdint.h>

const int32_t depthwise_int4_generic_5_biases[20] = {-8594325, -1703598848, 1687007744, -571777664, 521577216,
2024419072, -263814048, -794905728, 1743926656, -1629277440,
-1894920320, -219097952, -2074920448, 942072256, -156722528,
-1605767168, -181432688, 1205177728, 552752768, -2036018944};
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#define DEPTHWISE_INT4_GENERIC_5_OUT_CH 20
#define DEPTHWISE_INT4_GENERIC_5_IN_CH 20
#define DEPTHWISE_INT4_GENERIC_5_INPUT_W 21
#define DEPTHWISE_INT4_GENERIC_5_INPUT_H 21
#define DEPTHWISE_INT4_GENERIC_5_DST_SIZE 1820
#define DEPTHWISE_INT4_GENERIC_5_INPUT_SIZE 8820
#define DEPTHWISE_INT4_GENERIC_5_OUT_ACTIVATION_MIN -127
#define DEPTHWISE_INT4_GENERIC_5_OUT_ACTIVATION_MAX 125
#define DEPTHWISE_INT4_GENERIC_5_INPUT_BATCHES 1
#define DEPTHWISE_INT4_GENERIC_5_FILTER_X 5
#define DEPTHWISE_INT4_GENERIC_5_FILTER_Y 5
#define DEPTHWISE_INT4_GENERIC_5_STRIDE_X 1
#define DEPTHWISE_INT4_GENERIC_5_STRIDE_Y 2
#define DEPTHWISE_INT4_GENERIC_5_PAD_X 0
#define DEPTHWISE_INT4_GENERIC_5_PAD_Y 0
#define DEPTHWISE_INT4_GENERIC_5_OUTPUT_W 13
#define DEPTHWISE_INT4_GENERIC_5_OUTPUT_H 7
#define DEPTHWISE_INT4_GENERIC_5_CH_MULT 1
#define DEPTHWISE_INT4_GENERIC_5_INPUT_OFFSET 128
#define DEPTHWISE_INT4_GENERIC_5_OUTPUT_OFFSET -2
#define DEPTHWISE_INT4_GENERIC_5_DILATION_X 2
#define DEPTHWISE_INT4_GENERIC_5_DILATION_Y 2

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#include <stdint.h>

const int32_t depthwise_int4_generic_5_output_mult[20] = {1075004527, 1647295198, 1888315972, 1321105339, 1585122696,
1252045640, 1869573033, 1425108753, 1398916209, 1183393042,
1799365468, 1536291489, 1403481944, 1894003979, 1455287472,
1437413539, 1237118826, 1475310136, 1982989726, 1894603641};
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#include <stdint.h>

const int8_t depthwise_int4_generic_5_output_ref[1820] = {
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1,
7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4,
-14, -3, -22, 4, -3, -18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3,
-18, -4, 0, 2, -15, -2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15,
-2, -7, 1, -7, 1, 7, -4, -10, 15, -4, -14, -3, -22, 4, -3, -18, -4, 0, 2, -15};
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#include <stdint.h>

const int32_t depthwise_int4_generic_5_output_shift[20] = {-27, -28, -29, -26, -27, -27, -27, -26, -26, -29,
-27, -28, -26, -27, -27, -26, -26, -29, -27, -27};
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#include "biases_data.h"
#include "config_data.h"
#include "input_data.h"
#include "output_mult_data.h"
#include "output_ref_data.h"
#include "output_shift_data.h"
#include "weights_data.h"
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#include <stdint.h>

const int8_t depthwise_int4_generic_5_weights[250] = {
-59, -48, 52, 10, -97, -35, -87, 0, -108, 63, -58, 94, 101, -51, -23, 32, -20, -59, 2, -21, -64,
66, 36, -94, 42, -92, -9, 79, 95, 42, -49, -22, 111, -87, 38, -62, -20, -66, 11, -19, -26, 75,
67, -37, 12, 107, 109, -4, -106, 4, 35, 64, -17, 92, 54, -61, 9, 94, -92, 84, 43, -46, 27,
95, -47, -10, -108, 9, 11, -34, 80, -31, -53, 61, 97, 15, -6, -106, -87, -79, -28, 49, 34, 109,
33, 0, -42, 47, 75, -83, 109, -83, -30, -106, -112, -96, 32, 64, 16, 63, 80, -84, -49, -86, -14,
-67, 106, -11, 100, 21, 32, 52, 15, 9, -3, 27, -97, 89, 0, -62, -99, 28, 69, -70, 12, -2,
84, 75, 14, 32, 12, -22, -13, -3, -94, -58, -2, -98, 12, 75, -48, -6, -89, 2, 6, 10, -26,
96, -98, -43, 77, -5, -34, -54, 95, 57, 10, 9, 0, 31, -28, -14, -51, 15, -111, -109, -55, -90,
-28, -76, -37, -111, 18, 42, 4, -96, 6, 62, -51, 2, 32, 113, -60, 91, -68, -26, -91, -60, 28,
93, 20, -35, 64, -6, 16, -38, -17, 59, -90, -19, 35, -76, 80, 53, 109, 94, -75, -106, 15, 77,
28, 79, 3, -100, 47, 109, 43, -106, -96, -37, 60, 3, -83, 67, 42, 58, 80, -31, -90, -69, -64,
-29, -36, -59, 85, 12, -75, -23, 89, -67, 96, 78, -111, 31, 63, 51, 10, -20, 79, -34};
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#include <stdint.h>

const int32_t depthwise_int4_generic_6_biases[12] = {1917617024,
1587747584,
-128584216,
108614224,
-136052432,
2047927040,
-13180282,
1485387776,
62589904,
1990100992,
1641220096,
721666816};
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Generated by test_settings.py using tensorflow version 2.15.0 (Keras version 2.15.0).
// Interpreter from tensorflow version 2.15.0 and revision v2.15.0-rc1-8-g6887368d6d4.
#pragma once
#define DEPTHWISE_INT4_GENERIC_6_OUT_CH 12
#define DEPTHWISE_INT4_GENERIC_6_IN_CH 4
#define DEPTHWISE_INT4_GENERIC_6_INPUT_W 21
#define DEPTHWISE_INT4_GENERIC_6_INPUT_H 21
#define DEPTHWISE_INT4_GENERIC_6_DST_SIZE 3876
#define DEPTHWISE_INT4_GENERIC_6_INPUT_SIZE 1764
#define DEPTHWISE_INT4_GENERIC_6_OUT_ACTIVATION_MIN -127
#define DEPTHWISE_INT4_GENERIC_6_OUT_ACTIVATION_MAX 125
#define DEPTHWISE_INT4_GENERIC_6_INPUT_BATCHES 1
#define DEPTHWISE_INT4_GENERIC_6_FILTER_X 3
#define DEPTHWISE_INT4_GENERIC_6_FILTER_Y 3
#define DEPTHWISE_INT4_GENERIC_6_STRIDE_X 1
#define DEPTHWISE_INT4_GENERIC_6_STRIDE_Y 1
#define DEPTHWISE_INT4_GENERIC_6_PAD_X 0
#define DEPTHWISE_INT4_GENERIC_6_PAD_Y 0
#define DEPTHWISE_INT4_GENERIC_6_OUTPUT_W 19
#define DEPTHWISE_INT4_GENERIC_6_OUTPUT_H 17
#define DEPTHWISE_INT4_GENERIC_6_CH_MULT 3
#define DEPTHWISE_INT4_GENERIC_6_INPUT_OFFSET 128
#define DEPTHWISE_INT4_GENERIC_6_OUTPUT_OFFSET -2
#define DEPTHWISE_INT4_GENERIC_6_DILATION_X 1
#define DEPTHWISE_INT4_GENERIC_6_DILATION_Y 2
Loading

0 comments on commit 6cc31fb

Please sign in to comment.