From a447b57ade7c99da4efd38e1bdfc213a64a80fd2 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Sun, 12 Dec 2021 14:54:52 +0900 Subject: [PATCH] speed up profiling by removing initialization --- python/tvm/contrib/cutlass/conv2d_profiler.py | 42 ++++--------------- 1 file changed, 7 insertions(+), 35 deletions(-) diff --git a/python/tvm/contrib/cutlass/conv2d_profiler.py b/python/tvm/contrib/cutlass/conv2d_profiler.py index 0ece2c2bf6cc..e4ae03a4e3c7 100644 --- a/python/tvm/contrib/cutlass/conv2d_profiler.py +++ b/python/tvm/contrib/cutlass/conv2d_profiler.py @@ -81,10 +81,7 @@ def __init__(self): auto dilated_w = (filter_size.w() - 1) * dilation.column() + 1; auto h = (input_size.h() + padding.n() + padding.h() - dilated_h) / conv_stride.row() + 1; auto w = (input_size.w() + padding.w() + padding.c() - dilated_w) / conv_stride.column() + 1; - return cutlass::Tensor4DCoord( - input_size.n(), - h, w, - filter_size.n()); + return cutlass::Tensor4DCoord(input_size.n(), h, w, filter_size.n()); } }; @@ -98,31 +95,6 @@ def __init__(self): cutlass::HostTensor tensor_c(oshape); cutlass::HostTensor tensor_ref_c(oshape); - cutlass::reference::host::TensorFillRandomUniform( - tensor_a.host_view(), - 1, - ElementInputA(7), - ElementInputA(-8), - 0); - - cutlass::reference::host::TensorFillRandomUniform( - tensor_b.host_view(), - 1, - ElementInputB(7), - ElementInputB(-8), - 0); - - cutlass::reference::host::TensorFill( - tensor_c.host_view()); - - cutlass::reference::host::TensorFill( - tensor_ref_c.host_view()); - - tensor_a.sync_device(); - tensor_b.sync_device(); - tensor_c.sync_device(); - tensor_ref_c.sync_device(); - cutlass::conv::Conv2dProblemSize problem_size( options.input_size, options.filter_size, @@ -137,12 +109,12 @@ def __init__(self): using ElementComputeEpilogue = typename ImplicitGemm::ElementCompute; typename ImplicitGemm::Arguments arguments{ problem_size, - tensor_a.device_ref(), - tensor_b.device_ref(), - tensor_c.device_ref(), - tensor_c.device_ref(), - {ElementComputeEpilogue(1), ElementComputeEpilogue(0)}, - }; + tensor_a.device_ref(), + tensor_b.device_ref(), + tensor_c.device_ref(), + tensor_c.device_ref(), + {ElementComputeEpilogue(1), ElementComputeEpilogue(0)}, + }; ImplicitGemm implicit_gemm_op; size_t workspace_size = implicit_gemm_op.get_workspace_size(arguments);