From a4286950c0c6829d203e191d2eb1c828317e4bd6 Mon Sep 17 00:00:00 2001 From: Christian Convey Date: Mon, 13 Jun 2022 10:54:51 -0700 Subject: [PATCH] baseline config for variation testing --- .../test_hexagon/benchmark_maxpool2d.py | 126 ++++++++---------- .../test_hexagon/crashy_mccrashface.py | 1 - .../contrib/test_hexagon/infrastructure.py | 8 +- 3 files changed, 61 insertions(+), 74 deletions(-) diff --git a/tests/python/contrib/test_hexagon/benchmark_maxpool2d.py b/tests/python/contrib/test_hexagon/benchmark_maxpool2d.py index d07aaf3f6b6e..606a746650c4 100644 --- a/tests/python/contrib/test_hexagon/benchmark_maxpool2d.py +++ b/tests/python/contrib/test_hexagon/benchmark_maxpool2d.py @@ -29,22 +29,20 @@ from .infrastructure import allocate_hexagon_array, get_packed_shape -#def get_blocked_shape_int8_nhwc_8h8w32c(n, h, w, c, include_axis_sep:bool): -# shape = [ -# n, -# h // 8, -# w // 8, -# c // 32, -# h % 8, -# w % 8, -# c % 32, -# ] -# -# if include_axis_sep: -# shape = shape[:-3] + [IndexMap.AXIS_SEPARATOR] + shape[-3:] -# -# return shape +import time + +def int8_nhwc_8h8w32c(n, h, w, c): + shape = [ + n, + h // 8, + w // 8, + c // 32, + h % 8, + w % 8, + c % 32, + ] + return shape class TestMaxPool2D: dtype = tvm.testing.parameter("int8") @@ -82,24 +80,24 @@ def test_maxpool2d_nhwc( hexagon_session: Session, ): data = te.placeholder((N, H, W, C), dtype=dtype) # data.shape = [1, 128, 128, 64] - #output = topi.nn.pool2d(data, kernel, stride, dilation, padding, "max", layout="NHWC") # output: tvm.te.tensor.Tensor ; output.shape = [1,126,126,64] - - @T.prim_func - def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: - # function attr dict - T.func_attr({"global_symbol": "main", "tir.noalias": True}) - placeholder = T.match_buffer(var_placeholder, [1, 2048], dtype="int8", axis_separators=[1]) - T.preflattened_buffer(placeholder, [1, 1, 1, 1, 8, 8, 32], dtype="int8", data=placeholder.data, axis_separators=[4]) - T.preflattened_buffer(tensor, [1, 8, 8, 32], dtype="int8", data=tensor.data) - # body - for i1, i2, i3 in T.grid(8, 8, 32): - cse_var_1: T.int32 = i1 * 256 + i2 * 32 + i3 - tensor[cse_var_1] = T.int8(-128) - tensor[cse_var_1] = T.max(tensor[cse_var_1], placeholder[0, cse_var_1]) - primfunc = func + output = topi.nn.pool2d(data, kernel, stride, dilation, padding, "max", layout="NHWC") # output: tvm.te.tensor.Tensor ; output.shape = [1,126,126,64] + + #@T.prim_func + #def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: + # # function attr dict + # T.func_attr({"global_symbol": "main", "tir.noalias": True}) + # placeholder = T.match_buffer(var_placeholder, [1, 2048], dtype="int8", axis_separators=[1]) + # T.preflattened_buffer(placeholder, [1, 1, 1, 1, 8, 8, 32], dtype="int8", data=placeholder.data, axis_separators=[4]) + # T.preflattened_buffer(tensor, [1, 8, 8, 32], dtype="int8", data=tensor.data) + # # body + # for i1, i2, i3 in T.grid(8, 8, 32): + # cse_var_1: T.int32 = i1 * 256 + i2 * 32 + i3 + # tensor[cse_var_1] = T.int8(-128) + # tensor[cse_var_1] = T.max(tensor[cse_var_1], placeholder[0, cse_var_1]) + #primfunc = func # Disabled because we're copy-pasting TVMScript - #primfunc = te.create_prim_func([data, output]) # type(primfunc) = tvm.tir.function.PrimFunc + primfunc = te.create_prim_func([data, output]) # type(primfunc) = tvm.tir.function.PrimFunc with open('out-2a.txt', 'w') as f: f.write(str(primfunc)) @@ -115,7 +113,7 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: # Line 74 in Chris's script # Disabled while we're using TVMScript - # sch.transform_layout(block="tensor", buffer="placeholder", index_map=int8_nhwc_8h8w32c) + sch.transform_layout(block="tensor", buffer="placeholder", index_map=int8_nhwc_8h8w32c) foo = 'with-axis-separator' @@ -123,10 +121,10 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: f.write(str(sch.mod['main'])) f.write(str(sch.mod['main'].script())) - #with open(f'out-5-{foo}.txt', 'w') as f: - # foo = tvm.lower(sch.mod, [data, output,])['main'] - # f.write(str(foo)) - # f.write(str(foo.script())) + with open(f'out-5-{foo}.txt', 'w') as f: + foo = tvm.lower(sch.mod, [data, output,])['main'] + f.write(str(foo)) + f.write(str(foo.script())) # compute : tvm.tir.schedule.schedule.BlockRV @@ -143,8 +141,18 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: mod = hexagon_session.load_module(func) a_np = np.random.randint(low=-128, high=127, size=(N, H, W, C), dtype=np.int8) + + # Random is overrated while debugging... + for n in range(N): + for h in range(H): + for w in range(W): + for c in range(C): + a_np[n,h,w,c] = 42 + + ref_output = testing.poolnd_python( - a_np.astype("int32"), + #a_np.astype("int32"), + a_np.astype("int8"), # ???? kernel, stride, dilation, @@ -155,8 +163,6 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: layout="NHWC", ).astype("int8") - # Do we actually need c_np? - c_np = np.zeros(ref_output.shape).astype("int8") #breakpoint() @@ -165,38 +171,9 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: 0, 1, 3, 5, 2, 4, 6 ) - #a_transformed_step1 = a_np.reshape(N, H // 8, 8, W // 8, 8, C // 32, 32) - #a_transformed_step2 = a_transformed_step1.transpose( - # 0, 1, 3, 5, 2, 4, 6 - #) - - # Q: What does transpose ^^^ actually do above? Why not just reshape immediately? - # Does it have to do with numpy's 'reshape' when it increases the rank? - #input_shape = [1,1,1,1,8,8,32] #output_shape = [1,8,8,32] - ## Create the I/O tensors... - #A_hexagon = tvm.nd.empty(input_shape, dtype, hexagon_session.device, 'global') - #C_hexagon = tvm.nd.empty(output_shape, dtype, hexagon_session.device, 'global') - - #foo = tvm.nd.empty(input_shape, dtype, hexagon_session.device, 'global') - - - ## Use a host-side tensor to provide the initial values for the - ## primfunc call's input tensor... - #A_host = np.ndarray(input_shape, dtype=dtype) - - #import random - #for i0 in range(input_shape[0]): - # for i1 in range(input_shape[1]): - # for i2 in range(input_shape[2]): - # for i3 in range(input_shape[3]): - # for i4 in range(input_shape[4]): - # for i5 in range(input_shape[5]): - # for i6 in range(input_shape[6]): - # A_host[i0,i1,i2,i3,i4,i5,i6] = random.randint(-128,127) - packed_input_shape = get_packed_shape([N,H,W,C]) a_hexagon = allocate_hexagon_array( @@ -215,9 +192,20 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None: mem_scope="global.vtcm", ) + #breakpoint() + + print('AAAAA: a_hexagon.numpy()[0,0,0,0,0,0,0]={}'.format(a_hexagon.numpy()[0,0,0,0,0,0,0])) + a_hexagon.copyfrom(a_transformed) - #breakpoint() + for i in range(1,11): + time.sleep(5) + print('BBBBB {}: a_hexagon.numpy()[0,0,0,0,0,0,0]={}'.format(i, a_hexagon.numpy()[0,0,0,0,0,0,0])) + + # This is just to help with debugging... + c_np = np.zeros(ref_output.shape).astype("int8") + c_hexagon.copyfrom(c_np) + mod(a_hexagon, c_hexagon) tvm.testing.assert_allclose(ref_output, c_hexagon.numpy(), rtol=1e-4) diff --git a/tests/python/contrib/test_hexagon/crashy_mccrashface.py b/tests/python/contrib/test_hexagon/crashy_mccrashface.py index 8881d845b861..0b5887191b4e 100644 --- a/tests/python/contrib/test_hexagon/crashy_mccrashface.py +++ b/tests/python/contrib/test_hexagon/crashy_mccrashface.py @@ -27,7 +27,6 @@ from tvm.relay.backend import Executor, Runtime from tvm.contrib.hexagon.session import Session - from .infrastructure import allocate_hexagon_array diff --git a/tests/python/contrib/test_hexagon/infrastructure.py b/tests/python/contrib/test_hexagon/infrastructure.py index 01eef86e6b5b..80831659d59f 100644 --- a/tests/python/contrib/test_hexagon/infrastructure.py +++ b/tests/python/contrib/test_hexagon/infrastructure.py @@ -20,7 +20,7 @@ import tvm from tvm import te import numpy - +from typing import List, Union def allocate_hexagon_array( dev, tensor_shape=None, dtype=None, data=None, axis_separators=None, mem_scope=None @@ -55,10 +55,10 @@ def allocate_hexagon_array( return arr._create_view(tensor_shape) - def ceildiv(o, d): assert o >= 0 assert d >= 0 + return tvm.tir.floordiv(o + d - 1, d) @@ -73,8 +73,8 @@ def get_filter_block_shape(): # input: locgical shape in nhwc layout -# output: physical packed shape in nhw8h8w32c layout -def get_packed_shape(logical_shape_nhwc): +# output: the physical packed shape in nhw8h8w32c layout +def get_packed_shape(logical_shape_nhwc) -> List[Union[int, tvm.tir.expr.IntImm]] : assert len(logical_shape_nhwc) == 4 physical_shape_nhwc8h8w32c = [logical_shape_nhwc[0]] block_shape = get_block_shape()