diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc index 1103620829d1b..75cac5d0f1208 100644 --- a/src/relay/qnn/op/convolution.cc +++ b/src/relay/qnn/op/convolution.cc @@ -106,8 +106,6 @@ WorkloadType GetWorkload(const Array& arg_types, const QnnConv * \brief Fallback to simpler lowering for dilation or depthwise conv. * \param data The input expr. * \param weight The weight expr. - * \param zp_data The data zero point expr. - * \param zp_kernel The kernel zero point expr. * \param param The qnn conv2d attributes. * \return The fallback lowered sequence of Relay expr. * \note In case of dilation, normal lowering would require a dilated pool. @@ -115,16 +113,19 @@ WorkloadType GetWorkload(const Array& arg_types, const QnnConv * Relay operations. This will potentially lead to performance degradation * as the convolution is called on int32 tensors instead of int8 tensors. */ -Expr Conv2DFallBack(const Expr& data, const Expr& weight, const Expr& zp_data, - const Expr& zp_kernel, const QnnConv2DAttrs* param) { - auto shifted_data = data; +Expr Conv2DFallBack(const Expr& data, const Expr& weight, const QnnConv2DAttrs* param) { + // Upcast the zero point to Int16. + auto zp_data = MakeConstantScalar(Int(16), param->input_zero_point); + auto zp_kernel = MakeConstantScalar(Int(16), param->kernel_zero_point); + + auto shifted_data = Cast(data, Int(16)); if (param->input_zero_point != 0) { - shifted_data = Subtract(Cast(data, Int(32)), zp_data); + shifted_data = Subtract(Cast(data, Int(16)), zp_data); } - auto shifted_kernel = weight; + auto shifted_kernel = Cast(weight, Int(16)); if (param->kernel_zero_point != 0) { - shifted_kernel = Subtract(Cast(weight, Int(32)), zp_kernel); + shifted_kernel = Subtract(Cast(weight, Int(16)), zp_kernel); } return Conv2D(shifted_data, shifted_kernel, param->strides, param->padding, param->dilation, @@ -186,7 +187,6 @@ Expr Conv2DFirstTerm(const Expr& padded_data, const Expr& weight, const QnnConv2 /* * \brief Calculates the second term in the qnn.conv2d lowering sequence. * \param padded_data The padded data expr. - * \param zp_kernel The kernel zero point expr. * \param param The qnn conv2d attributes. * \param kernel_h The height of kernel. * \param kernel_w The width of kernel. @@ -200,8 +200,11 @@ Expr Conv2DFirstTerm(const Expr& padded_data, const Expr& weight, const QnnConv2 * followed by a reduce on the C axis. Using avg_pool2d also gives an * opportunity to reuse alter_op_layout infrastructure. */ -Expr Conv2DSecondTerm(const Expr& padded_data, const Expr& zp_kernel, const QnnConv2DAttrs* param, - int kernel_h, int kernel_w, int out_channels) { +Expr Conv2DSecondTerm(const Expr& padded_data, const QnnConv2DAttrs* param, int kernel_h, + int kernel_w, int out_channels) { + // Constant Expr for the kernel zero point. + auto zp_kernel = MakeConstantScalar(Int(32), param->kernel_zero_point); + auto casted_t2 = Cast(padded_data, Int(32)); // We can reduce the H and W axis by using avg_pool2d. However, avg_pool2d averages the sum. @@ -241,7 +244,6 @@ Expr Conv2DSecondTerm(const Expr& padded_data, const Expr& zp_kernel, const QnnC /* * \brief Calculates the third term in the qnn.conv2d lowering sequence. * \param weight The weight expr. - * \param zp_data The data zero point expr. * \param param The qnn conv2d attributes. * \param batch_size The batch size. * \param out_channels The number of output channels. @@ -254,8 +256,11 @@ Expr Conv2DSecondTerm(const Expr& padded_data, const Expr& zp_kernel, const QnnC * a 1D tensor. The tensor is then reshaped to conform to NHWC/NCHW * format. */ -Expr Conv2DThirdTerm(const Expr& weight, const Expr& zp_data, const QnnConv2DAttrs* param, - int batch_size, int out_channels) { +Expr Conv2DThirdTerm(const Expr& weight, const QnnConv2DAttrs* param, int batch_size, + int out_channels) { + // Constant expr for input zero point. + auto zp_data = MakeConstantScalar(Int(32), param->input_zero_point); + // Find which dimensions are C, R, S. Array axes_t3; if (param->kernel_layout == "OIHW") { @@ -415,21 +420,19 @@ Expr QnnConv2DCanonicalize(const Attrs& attrs, const Array& new_args, int batch_size, in_channels, out_channels, kernel_h, kernel_w; std::tie(batch_size, in_channels, out_channels, kernel_h, kernel_w) = GetWorkload(arg_types, param); - auto zp_data = MakeConstantScalar(Int(32), param->input_zero_point); - auto zp_kernel = MakeConstantScalar(Int(32), param->kernel_zero_point); // Fallback to int32 conv if there is dilation or depthwise conv2d CHECK_EQ(param->dilation.size(), 2) << "qnn.conv2d only supports 2D dilation"; auto dilation_h = get_const_int(param->dilation[0]); auto dilation_w = get_const_int(param->dilation[1]); if (dilation_h != 1 || dilation_w != 1 || param->groups != 1) { - return Conv2DFallBack(data, weight, zp_data, zp_kernel, param); + return Conv2DFallBack(data, weight, param); } auto padded_data = Conv2DPadInput(data, param); auto term1 = Conv2DFirstTerm(padded_data, weight, param); - auto term2 = Conv2DSecondTerm(padded_data, zp_kernel, param, kernel_h, kernel_w, out_channels); - auto term3 = Conv2DThirdTerm(weight, zp_data, param, batch_size, out_channels); + auto term2 = Conv2DSecondTerm(padded_data, param, kernel_h, kernel_w, out_channels); + auto term3 = Conv2DThirdTerm(weight, param, batch_size, out_channels); auto term4 = Conv2DFourthTerm(param, batch_size, in_channels, kernel_h, kernel_w); return Conv2DCombineTerms(term1, term2, term3, term4, param); } diff --git a/tests/python/relay/test_op_qnn_conv2d.py b/tests/python/relay/test_op_qnn_conv2d.py index b4e8bfd71b623..71368f84d0239 100644 --- a/tests/python/relay/test_op_qnn_conv2d.py +++ b/tests/python/relay/test_op_qnn_conv2d.py @@ -160,7 +160,7 @@ def get_output(func, golden_inputs): qnn_output = get_output(qnn_func, golden_inputs) np.testing.assert_equal(qnn_output, golden_output) -def no_zero_point_test(): +def test_no_zero_point(): # uint8 input data_shape = (2, 1, 2, 4) data_dtype = 'uint8' @@ -203,7 +203,7 @@ def no_zero_point_test(): verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype) -def kernel_zero_point_test(): +def test_kernel_zero_point(): # uint8 input data_shape = (2, 4, 2, 4) data_dtype = 'uint8' @@ -247,7 +247,7 @@ def kernel_zero_point_test(): kernel_shape, kernel_dtype) -def input_zero_point_test(): +def test_input_zero_point(): # uint8 input data_shape = (2, 4, 2, 4) data_dtype = 'uint8' @@ -290,7 +290,7 @@ def input_zero_point_test(): verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype) -def both_zero_point_test(): +def test_both_zero_point(): # uint8 input data_shape = (2, 4, 2, 4) data_dtype = 'uint8' @@ -333,7 +333,7 @@ def both_zero_point_test(): verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype) -def layout_test(): +def test_layout(): # uint8 input data_shape = (2, 2, 4, 4) # NHWC data_dtype = 'uint8' @@ -378,7 +378,7 @@ def layout_test(): -def padding_test(): +def test_padding(): # uint8 input data_shape = (1, 4, 2, 2) data_dtype = 'uint8' @@ -421,7 +421,7 @@ def padding_test(): verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype) -def dilation_test(): +def test_dilation(): # uint8 input data_shape = (2, 4, 4, 4) data_dtype = 'uint8' @@ -444,7 +444,7 @@ def dilation_test(): kernel_shape, kernel_dtype) -def const_folding_test(): +def test_const_folding(): data_shape = (2, 4, 2, 4) data_dtype = 'uint8' kernel_shape = (3, 4, 2, 2) @@ -470,7 +470,7 @@ def const_folding_test(): folded_func = folded_mod["main"] assert "reshape" not in folded_func.astext() -def kernel_size_1x1_test(): +def test_kernel_size_1x1(): # uint8 input data_shape = (2, 4, 2, 4) data_dtype = 'uint8' @@ -493,7 +493,7 @@ def kernel_size_1x1_test(): verify(ref_func, qnn_func, data_shape, data_dtype, kernel_shape, kernel_dtype) -def tflite_large_irregular_test(): +def test_tflite_large_irregular(): # uint8 input data_shape = (1, 1024, 1, 1) data_dtype = 'uint8' @@ -526,7 +526,7 @@ def tflite_large_irregular_test(): golden_output = np.full((1, 1001, 1, 1), 0).astype('uint8') np.testing.assert_equal(qnn_output, golden_output) -def tflite_output_multiplier_greater_than_one(): +def test_tflite_output_multiplier_greater_than_one(): # uint8 input data_shape = (2, 1, 2, 4) data_dtype = 'uint8' @@ -570,7 +570,7 @@ def tflite_output_multiplier_greater_than_one(): 0, 0)).reshape(2, 3, 1, 2) np.testing.assert_equal(qnn_output, golden_output) -def tflite_anistropic_strides(): +def test_tflite_anistropic_strides(): # uint8 input data_shape = (1, 1, 3, 6) data_dtype = 'uint8' @@ -607,7 +607,7 @@ def tflite_anistropic_strides(): golden_output = np.array((124, -92, 164, -132)).reshape(1, 1, 2, 2) np.testing.assert_equal(qnn_output, golden_output) -def broadcast_layout_test(): +def test_broadcast_layout(): # Test broadcast support for NHWC layout. data_shape = (1, 229, 229, 3) # NHWC data_dtype = 'uint8' @@ -641,16 +641,16 @@ def broadcast_layout_test(): graph, lib, params = relay.build(mod, "llvm -mcpu=skylake-avx512") if __name__ == "__main__": - no_zero_point_test() - input_zero_point_test() - kernel_zero_point_test() - both_zero_point_test() - layout_test() - padding_test() - dilation_test() - const_folding_test() - kernel_size_1x1_test() - tflite_large_irregular_test() - tflite_output_multiplier_greater_than_one() - tflite_anistropic_strides() - broadcast_layout_test() + test_no_zero_point() + test_input_zero_point() + test_kernel_zero_point() + test_both_zero_point() + test_layout() + test_padding() + test_dilation() + test_const_folding() + test_kernel_size_1x1() + test_tflite_large_irregular() + test_broadcast_layout() + test_tflite_output_multiplier_greater_than_one() + test_tflite_anistropic_strides() diff --git a/tests/python/relay/test_qnn_mul.py b/tests/python/relay/test_op_qnn_mul.py similarity index 100% rename from tests/python/relay/test_qnn_mul.py rename to tests/python/relay/test_op_qnn_mul.py diff --git a/tests/python/relay/test_op_qnn_requantize.py b/tests/python/relay/test_op_qnn_requantize.py index 18e2f308969b9..3818135ecda70 100644 --- a/tests/python/relay/test_op_qnn_requantize.py +++ b/tests/python/relay/test_op_qnn_requantize.py @@ -22,230 +22,227 @@ roundings = ["UPWARD", "TONEAREST"] -def test_requantize(): - def verify(mod, goldens): - with relay.build_config(opt_level=3): - graph, lib, params = relay.build(mod, "llvm", params=None) - golden_data, golden_output = goldens - rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) - rt_mod.set_input("quantized_data",golden_data) - rt_mod.set_input(**params) - rt_mod.run() - res = rt_mod.get_output(0).asnumpy() - np.testing.assert_equal(res, golden_output) - - def get_mod(data_shape, data_dtype, out_dtype, input_scale, output_scale, - input_zero_point=0, output_zero_point=0, rounding="TONEAREST"): - quantized_data = relay.var("quantized_data", shape=data_shape, - dtype=data_dtype) - mod = relay.qnn.op.requantize( - quantized_data, - input_scale=input_scale, - input_zero_point=input_zero_point, - output_scale=output_scale, - output_zero_point=output_zero_point, - rounding=rounding, - out_dtype=out_dtype) - - mod = relay.Function(relay.analysis.free_vars(mod), mod) - mod = relay.Module.from_expr(mod) - return mod - - def same_scale_test(): - # Have same scales, everything within range - golden_data = np.arange(-100, 100, 1).astype('int32') - golden_output = golden_data - - for rounding in roundings: - mod = get_mod(data_shape=(200, ), - data_dtype='int32', - out_dtype="int8", - input_scale=0.5, - output_scale=0.5, - rounding=rounding) - assert 'right_shift' not in mod.astext() - verify(mod, (golden_data, golden_output)) - - def downscale_test(): - for rounding in roundings: - mod = get_mod(data_shape=(32, ), - data_dtype='int32', - out_dtype='int8', - input_scale=1, - output_scale=16, - rounding=rounding) - - # Try positive values - # 8 corresponds to 0.5, resulting in 1 - golden_data = np.arange(0, 32, 1).astype('int32') - golden_output = np.repeat([0, 1, 2], [8, 16, 8]) - verify(mod, (golden_data, golden_output)) - - # Try negative values - # -8 corresponds to -0.5. For UPWARD, this is 0 - golden_data = np.arange(0, -32, -1).astype('int32') - if rounding == "UPWARD": - golden_output = np.repeat([0, -1, -2], [9, 16, 7]) - else: - golden_output = np.repeat([0, -1, -2], [8, 16, 8]) - verify(mod, (golden_data, golden_output)) - - # Try a different scale - mod = get_mod(data_shape=(32, ), - data_dtype='int32', - out_dtype="int8", - input_scale=1, - output_scale=4, - rounding=rounding) - - # Try positive values - # 2I corresponds to 0.5, resulting in 1 - golden_data = np.arange(0, 32, 1).astype('int32') - golden_output = np.repeat([0, 1, 2, 3, 4, 5, 6, 7, 8], +def verify(mod, goldens): + with relay.build_config(opt_level=3): + graph, lib, params = relay.build(mod, "llvm", params=None) + golden_data, golden_output = goldens + rt_mod = graph_runtime.create(graph, lib, ctx=tvm.cpu(0)) + rt_mod.set_input("quantized_data",golden_data) + rt_mod.set_input(**params) + rt_mod.run() + res = rt_mod.get_output(0).asnumpy() + np.testing.assert_equal(res, golden_output) + +def get_mod(data_shape, data_dtype, out_dtype, input_scale, output_scale, + input_zero_point=0, output_zero_point=0, rounding="TONEAREST"): + quantized_data = relay.var("quantized_data", shape=data_shape, + dtype=data_dtype) + mod = relay.qnn.op.requantize( + quantized_data, + input_scale=input_scale, + input_zero_point=input_zero_point, + output_scale=output_scale, + output_zero_point=output_zero_point, + rounding=rounding, + out_dtype=out_dtype) + + mod = relay.Function(relay.analysis.free_vars(mod), mod) + mod = relay.Module.from_expr(mod) + return mod + +def test_same_scale(): + # Have same scales, everything within range + golden_data = np.arange(-100, 100, 1).astype('int32') + golden_output = golden_data + + for rounding in roundings: + mod = get_mod(data_shape=(200, ), + data_dtype='int32', + out_dtype="int8", + input_scale=0.5, + output_scale=0.5, + rounding=rounding) + assert 'right_shift' not in mod.astext() + verify(mod, (golden_data, golden_output)) + +def test_downscale(): + for rounding in roundings: + mod = get_mod(data_shape=(32, ), + data_dtype='int32', + out_dtype='int8', + input_scale=1, + output_scale=16, + rounding=rounding) + + # Try positive values + # 8 corresponds to 0.5, resulting in 1 + golden_data = np.arange(0, 32, 1).astype('int32') + golden_output = np.repeat([0, 1, 2], [8, 16, 8]) + verify(mod, (golden_data, golden_output)) + + # Try negative values + # -8 corresponds to -0.5. For UPWARD, this is 0 + golden_data = np.arange(0, -32, -1).astype('int32') + if rounding == "UPWARD": + golden_output = np.repeat([0, -1, -2], [9, 16, 7]) + else: + golden_output = np.repeat([0, -1, -2], [8, 16, 8]) + verify(mod, (golden_data, golden_output)) + + # Try a different scale + mod = get_mod(data_shape=(32, ), + data_dtype='int32', + out_dtype="int8", + input_scale=1, + output_scale=4, + rounding=rounding) + + # Try positive values + # 2I corresponds to 0.5, resulting in 1 + golden_data = np.arange(0, 32, 1).astype('int32') + golden_output = np.repeat([0, 1, 2, 3, 4, 5, 6, 7, 8], + [2, 4, 4, 4, 4, 4, 4, 4, 2]) + verify(mod, (golden_data, golden_output)) + + # Try negative values + # -8 corresponds to -0.5. For UPWARD, this is 0 + golden_data = np.arange(0, -32, -1).astype('int32') + if rounding == "UPWARD": + golden_output = np.repeat([0, -1, -2, -3, -4, -5, -6, -7, -8], + [3, 4, 4, 4, 4, 4, 4, 4, 1]) + else: + golden_output = np.repeat([0, -1, -2, -3, -4, -5, -6, -7, -8], [2, 4, 4, 4, 4, 4, 4, 4, 2]) - verify(mod, (golden_data, golden_output)) - - # Try negative values - # -8 corresponds to -0.5. For UPWARD, this is 0 - golden_data = np.arange(0, -32, -1).astype('int32') - if rounding == "UPWARD": - golden_output = np.repeat([0, -1, -2, -3, -4, -5, -6, -7, -8], - [3, 4, 4, 4, 4, 4, 4, 4, 1]) - else: - golden_output = np.repeat([0, -1, -2, -3, -4, -5, -6, -7, -8], - [2, 4, 4, 4, 4, 4, 4, 4, 2]) - verify(mod, (golden_data, golden_output)) - - # Try uint8 out_dtype - mod = get_mod(data_shape=(32, ), - data_dtype='int32', - out_dtype='uint8', - input_scale=1, - output_scale=16, - rounding=rounding) - - # Try positive values - # 8 corresponds to 0.5, resulting in 1 - golden_data = np.arange(0, 32, 1).astype('int32') - golden_output = np.repeat([0, 1, 2], [8, 16, 8]) - verify(mod, (golden_data, golden_output)) - - # Try uint8 in_dtyope and uint8 out_dtype - mod = get_mod(data_shape=(32, ), - data_dtype='uint8', - out_dtype='uint8', - input_scale=1, - output_scale=16, - rounding=rounding) - - # Try positive values - # 8 corresponds to 0.5, resulting in 1 - golden_data = np.arange(0, 32, 1).astype('int32') - golden_output = np.repeat([0, 1, 2], [8, 16, 8]) - verify(mod, (golden_data, golden_output)) - - def upscale_test(): - for rounding in roundings: - mod = get_mod(data_shape=(32, ), - data_dtype='int32', - out_dtype="int8", - input_scale=2, - output_scale=1, - rounding=rounding) - - # Try positive values - # 8 corresponds to 0.5, resulting in 1 - golden_data = np.arange(0, 32, 1).astype('int32') - golden_output = np.multiply(2, golden_data) - verify(mod, (golden_data, golden_output)) - - # Try negative values - # -8 corresponds to -0.5. For UPWARD, this is 0 - golden_data = np.arange(0, -32, -1).astype('int32') - golden_output = np.multiply(2, golden_data) - verify(mod, (golden_data, golden_output)) - - def saturation_test(): - for rounding in roundings: - mod = get_mod(data_shape=(16, ), - data_dtype='int32', - out_dtype="int8", - input_scale=0.5, - output_scale=0.5, - rounding=rounding) - golden_data = np.arange(0, 16, 1).astype('int32') - golden_data = np.add(120, golden_data) - output = np.array([120, 121, 122, 123, 124, 125, 126, 127, - 127, 127, 127, 127, 127, 127, 127, 127]) - golden_output = output - verify(mod, (golden_data, golden_output)) - - # Try negative numbers - golden_data = np.arange(0, -16, -1).astype('int32') - golden_data = np.add(-120, golden_data) - output = np.array([-120, -121, -122, -123, -124, -125, -126, -127, - -128, -128, -128, -128, -128, -128, -128, -128]) - golden_output = output - verify(mod, (golden_data, golden_output)) - - def zero_point_test(): - # Output zero point - for rounding in roundings: - mod = get_mod(data_shape=(32, ), - data_dtype='int32', - out_dtype='int8', - input_scale=1, - output_scale=16, - output_zero_point=1, - rounding=rounding) - - # Try positive values - # 8 corresponds to 0.5, resulting in 1 - golden_data = np.arange(0, 32, 1).astype('int32') - golden_output = np.repeat([0, 1, 2], [8, 16, 8]) - golden_output = np.add(1, golden_output) - verify(mod, (golden_data, golden_output)) - - # Try negative values - # -8 corresponds to -0.5. For UPWARD, this is 0 - golden_data = np.arange(-32, -64, -1).astype('int32') - if rounding == "UPWARD": - golden_output = np.repeat([-2, -3, -4], [9, 16, 7]) - else: - golden_output = np.repeat([-2, -3, -4], [8, 16, 8]) - golden_output = np.add(1, golden_output) - verify(mod, (golden_data, golden_output)) - - # Input zero point - for rounding in roundings: - mod = get_mod(data_shape=(32, ), - data_dtype='int32', - out_dtype='int8', - input_scale=1, - output_scale=16, - input_zero_point=16, - rounding=rounding) - - # Try positive values - golden_data = np.arange(32, 64, 1).astype('int32') - golden_output = np.repeat([2, 3, 4], [8, 16, 8]) - golden_output = np.subtract(golden_output, 1) - verify(mod, (golden_data, golden_output)) - - # Try negative values - golden_data = np.arange(-32, -64, -1).astype('int32') - if rounding == "UPWARD": - golden_output = np.repeat([-2, -3, -4], [9, 16, 7]) - else: - golden_output = np.repeat([-2, -3, -4], [8, 16, 8]) - golden_output = np.subtract(golden_output, 1) - verify(mod, (golden_data, golden_output)) - - same_scale_test() - downscale_test() - upscale_test() - saturation_test() - zero_point_test() + verify(mod, (golden_data, golden_output)) + + # Try uint8 out_dtype + mod = get_mod(data_shape=(32, ), + data_dtype='int32', + out_dtype='uint8', + input_scale=1, + output_scale=16, + rounding=rounding) + + # Try positive values + # 8 corresponds to 0.5, resulting in 1 + golden_data = np.arange(0, 32, 1).astype('int32') + golden_output = np.repeat([0, 1, 2], [8, 16, 8]) + verify(mod, (golden_data, golden_output)) + + # Try uint8 in_dtyope and uint8 out_dtype + mod = get_mod(data_shape=(32, ), + data_dtype='uint8', + out_dtype='uint8', + input_scale=1, + output_scale=16, + rounding=rounding) + + # Try positive values + # 8 corresponds to 0.5, resulting in 1 + golden_data = np.arange(0, 32, 1).astype('int32') + golden_output = np.repeat([0, 1, 2], [8, 16, 8]) + verify(mod, (golden_data, golden_output)) + +def test_upscale(): + for rounding in roundings: + mod = get_mod(data_shape=(32, ), + data_dtype='int32', + out_dtype="int8", + input_scale=2, + output_scale=1, + rounding=rounding) + + # Try positive values + # 8 corresponds to 0.5, resulting in 1 + golden_data = np.arange(0, 32, 1).astype('int32') + golden_output = np.multiply(2, golden_data) + verify(mod, (golden_data, golden_output)) + + # Try negative values + # -8 corresponds to -0.5. For UPWARD, this is 0 + golden_data = np.arange(0, -32, -1).astype('int32') + golden_output = np.multiply(2, golden_data) + verify(mod, (golden_data, golden_output)) + +def test_saturation(): + for rounding in roundings: + mod = get_mod(data_shape=(16, ), + data_dtype='int32', + out_dtype="int8", + input_scale=0.5, + output_scale=0.5, + rounding=rounding) + golden_data = np.arange(0, 16, 1).astype('int32') + golden_data = np.add(120, golden_data) + output = np.array([120, 121, 122, 123, 124, 125, 126, 127, + 127, 127, 127, 127, 127, 127, 127, 127]) + golden_output = output + verify(mod, (golden_data, golden_output)) + + # Try negative numbers + golden_data = np.arange(0, -16, -1).astype('int32') + golden_data = np.add(-120, golden_data) + output = np.array([-120, -121, -122, -123, -124, -125, -126, -127, + -128, -128, -128, -128, -128, -128, -128, -128]) + golden_output = output + verify(mod, (golden_data, golden_output)) + +def test_zero_point(): + # Output zero point + for rounding in roundings: + mod = get_mod(data_shape=(32, ), + data_dtype='int32', + out_dtype='int8', + input_scale=1, + output_scale=16, + output_zero_point=1, + rounding=rounding) + + # Try positive values + # 8 corresponds to 0.5, resulting in 1 + golden_data = np.arange(0, 32, 1).astype('int32') + golden_output = np.repeat([0, 1, 2], [8, 16, 8]) + golden_output = np.add(1, golden_output) + verify(mod, (golden_data, golden_output)) + + # Try negative values + # -8 corresponds to -0.5. For UPWARD, this is 0 + golden_data = np.arange(-32, -64, -1).astype('int32') + if rounding == "UPWARD": + golden_output = np.repeat([-2, -3, -4], [9, 16, 7]) + else: + golden_output = np.repeat([-2, -3, -4], [8, 16, 8]) + golden_output = np.add(1, golden_output) + verify(mod, (golden_data, golden_output)) + + # Input zero point + for rounding in roundings: + mod = get_mod(data_shape=(32, ), + data_dtype='int32', + out_dtype='int8', + input_scale=1, + output_scale=16, + input_zero_point=16, + rounding=rounding) + + # Try positive values + golden_data = np.arange(32, 64, 1).astype('int32') + golden_output = np.repeat([2, 3, 4], [8, 16, 8]) + golden_output = np.subtract(golden_output, 1) + verify(mod, (golden_data, golden_output)) + + # Try negative values + golden_data = np.arange(-32, -64, -1).astype('int32') + if rounding == "UPWARD": + golden_output = np.repeat([-2, -3, -4], [9, 16, 7]) + else: + golden_output = np.repeat([-2, -3, -4], [8, 16, 8]) + golden_output = np.subtract(golden_output, 1) + verify(mod, (golden_data, golden_output)) if __name__ == "__main__": - test_requantize() + test_same_scale() + test_downscale() + test_upscale() + test_saturation() + test_zero_point()