Skip to content

Commit

Permalink
baseline config for variation testing
Browse files Browse the repository at this point in the history
  • Loading branch information
Christian Convey committed Jun 13, 2022
1 parent a3d4e66 commit a428695
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 74 deletions.
126 changes: 57 additions & 69 deletions tests/python/contrib/test_hexagon/benchmark_maxpool2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,20 @@

from .infrastructure import allocate_hexagon_array, get_packed_shape

#def get_blocked_shape_int8_nhwc_8h8w32c(n, h, w, c, include_axis_sep:bool):
# shape = [
# n,
# h // 8,
# w // 8,
# c // 32,
# h % 8,
# w % 8,
# c % 32,
# ]
#
# if include_axis_sep:
# shape = shape[:-3] + [IndexMap.AXIS_SEPARATOR] + shape[-3:]
#
# return shape
import time

def int8_nhwc_8h8w32c(n, h, w, c):
shape = [
n,
h // 8,
w // 8,
c // 32,
h % 8,
w % 8,
c % 32,
]

return shape

class TestMaxPool2D:
dtype = tvm.testing.parameter("int8")
Expand Down Expand Up @@ -82,24 +80,24 @@ def test_maxpool2d_nhwc(
hexagon_session: Session,
):
data = te.placeholder((N, H, W, C), dtype=dtype) # data.shape = [1, 128, 128, 64]
#output = topi.nn.pool2d(data, kernel, stride, dilation, padding, "max", layout="NHWC") # output: tvm.te.tensor.Tensor ; output.shape = [1,126,126,64]

@T.prim_func
def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:
# function attr dict
T.func_attr({"global_symbol": "main", "tir.noalias": True})
placeholder = T.match_buffer(var_placeholder, [1, 2048], dtype="int8", axis_separators=[1])
T.preflattened_buffer(placeholder, [1, 1, 1, 1, 8, 8, 32], dtype="int8", data=placeholder.data, axis_separators=[4])
T.preflattened_buffer(tensor, [1, 8, 8, 32], dtype="int8", data=tensor.data)
# body
for i1, i2, i3 in T.grid(8, 8, 32):
cse_var_1: T.int32 = i1 * 256 + i2 * 32 + i3
tensor[cse_var_1] = T.int8(-128)
tensor[cse_var_1] = T.max(tensor[cse_var_1], placeholder[0, cse_var_1])
primfunc = func
output = topi.nn.pool2d(data, kernel, stride, dilation, padding, "max", layout="NHWC") # output: tvm.te.tensor.Tensor ; output.shape = [1,126,126,64]

#@T.prim_func
#def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:
# # function attr dict
# T.func_attr({"global_symbol": "main", "tir.noalias": True})
# placeholder = T.match_buffer(var_placeholder, [1, 2048], dtype="int8", axis_separators=[1])
# T.preflattened_buffer(placeholder, [1, 1, 1, 1, 8, 8, 32], dtype="int8", data=placeholder.data, axis_separators=[4])
# T.preflattened_buffer(tensor, [1, 8, 8, 32], dtype="int8", data=tensor.data)
# # body
# for i1, i2, i3 in T.grid(8, 8, 32):
# cse_var_1: T.int32 = i1 * 256 + i2 * 32 + i3
# tensor[cse_var_1] = T.int8(-128)
# tensor[cse_var_1] = T.max(tensor[cse_var_1], placeholder[0, cse_var_1])
#primfunc = func

# Disabled because we're copy-pasting TVMScript
#primfunc = te.create_prim_func([data, output]) # type(primfunc) = tvm.tir.function.PrimFunc
primfunc = te.create_prim_func([data, output]) # type(primfunc) = tvm.tir.function.PrimFunc

with open('out-2a.txt', 'w') as f:
f.write(str(primfunc))
Expand All @@ -115,18 +113,18 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:

# Line 74 in Chris's script
# Disabled while we're using TVMScript
# sch.transform_layout(block="tensor", buffer="placeholder", index_map=int8_nhwc_8h8w32c)
sch.transform_layout(block="tensor", buffer="placeholder", index_map=int8_nhwc_8h8w32c)

foo = 'with-axis-separator'

with open(f'out-4-{foo}.txt', 'w') as f:
f.write(str(sch.mod['main']))
f.write(str(sch.mod['main'].script()))

#with open(f'out-5-{foo}.txt', 'w') as f:
# foo = tvm.lower(sch.mod, [data, output,])['main']
# f.write(str(foo))
# f.write(str(foo.script()))
with open(f'out-5-{foo}.txt', 'w') as f:
foo = tvm.lower(sch.mod, [data, output,])['main']
f.write(str(foo))
f.write(str(foo.script()))


# compute : tvm.tir.schedule.schedule.BlockRV
Expand All @@ -143,8 +141,18 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:
mod = hexagon_session.load_module(func)

a_np = np.random.randint(low=-128, high=127, size=(N, H, W, C), dtype=np.int8)

# Random is overrated while debugging...
for n in range(N):
for h in range(H):
for w in range(W):
for c in range(C):
a_np[n,h,w,c] = 42


ref_output = testing.poolnd_python(
a_np.astype("int32"),
#a_np.astype("int32"),
a_np.astype("int8"), # ????
kernel,
stride,
dilation,
Expand All @@ -155,8 +163,6 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:
layout="NHWC",
).astype("int8")

# Do we actually need c_np?
c_np = np.zeros(ref_output.shape).astype("int8")

#breakpoint()

Expand All @@ -165,38 +171,9 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:
0, 1, 3, 5, 2, 4, 6
)

#a_transformed_step1 = a_np.reshape(N, H // 8, 8, W // 8, 8, C // 32, 32)
#a_transformed_step2 = a_transformed_step1.transpose(
# 0, 1, 3, 5, 2, 4, 6
#)

# Q: What does transpose ^^^ actually do above? Why not just reshape immediately?
# Does it have to do with numpy's 'reshape' when it increases the rank?

#input_shape = [1,1,1,1,8,8,32]
#output_shape = [1,8,8,32]

## Create the I/O tensors...
#A_hexagon = tvm.nd.empty(input_shape, dtype, hexagon_session.device, 'global')
#C_hexagon = tvm.nd.empty(output_shape, dtype, hexagon_session.device, 'global')

#foo = tvm.nd.empty(input_shape, dtype, hexagon_session.device, 'global')


## Use a host-side tensor to provide the initial values for the
## primfunc call's input tensor...
#A_host = np.ndarray(input_shape, dtype=dtype)

#import random
#for i0 in range(input_shape[0]):
# for i1 in range(input_shape[1]):
# for i2 in range(input_shape[2]):
# for i3 in range(input_shape[3]):
# for i4 in range(input_shape[4]):
# for i5 in range(input_shape[5]):
# for i6 in range(input_shape[6]):
# A_host[i0,i1,i2,i3,i4,i5,i6] = random.randint(-128,127)

packed_input_shape = get_packed_shape([N,H,W,C])

a_hexagon = allocate_hexagon_array(
Expand All @@ -215,9 +192,20 @@ def func(var_placeholder: T.handle, tensor: T.Buffer[2048, "int8"]) -> None:
mem_scope="global.vtcm",
)

#breakpoint()

print('AAAAA: a_hexagon.numpy()[0,0,0,0,0,0,0]={}'.format(a_hexagon.numpy()[0,0,0,0,0,0,0]))

a_hexagon.copyfrom(a_transformed)

#breakpoint()
for i in range(1,11):
time.sleep(5)
print('BBBBB {}: a_hexagon.numpy()[0,0,0,0,0,0,0]={}'.format(i, a_hexagon.numpy()[0,0,0,0,0,0,0]))

# This is just to help with debugging...
c_np = np.zeros(ref_output.shape).astype("int8")
c_hexagon.copyfrom(c_np)

mod(a_hexagon, c_hexagon)

tvm.testing.assert_allclose(ref_output, c_hexagon.numpy(), rtol=1e-4)
Expand Down
1 change: 0 additions & 1 deletion tests/python/contrib/test_hexagon/crashy_mccrashface.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from tvm.relay.backend import Executor, Runtime
from tvm.contrib.hexagon.session import Session


from .infrastructure import allocate_hexagon_array


Expand Down
8 changes: 4 additions & 4 deletions tests/python/contrib/test_hexagon/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import tvm
from tvm import te
import numpy

from typing import List, Union

def allocate_hexagon_array(
dev, tensor_shape=None, dtype=None, data=None, axis_separators=None, mem_scope=None
Expand Down Expand Up @@ -55,10 +55,10 @@ def allocate_hexagon_array(

return arr._create_view(tensor_shape)


def ceildiv(o, d):
assert o >= 0
assert d >= 0

return tvm.tir.floordiv(o + d - 1, d)


Expand All @@ -73,8 +73,8 @@ def get_filter_block_shape():


# input: locgical shape in nhwc layout
# output: physical packed shape in nhw8h8w32c layout
def get_packed_shape(logical_shape_nhwc):
# output: the physical packed shape in nhw8h8w32c layout
def get_packed_shape(logical_shape_nhwc) -> List[Union[int, tvm.tir.expr.IntImm]] :
assert len(logical_shape_nhwc) == 4
physical_shape_nhwc8h8w32c = [logical_shape_nhwc[0]]
block_shape = get_block_shape()
Expand Down

0 comments on commit a428695

Please sign in to comment.