Optimize x86 conv3d_ndhwc using data packing approach.

Add tuneable conv3d_ndhwc schedule
apache · Feb 11, 2020 · 13d4ecf · 13d4ecf
1 parent 4fce513
commit 13d4ecf
Show file tree

Hide file tree

Showing 5 changed files with 374 additions and 38 deletions.
diff --git a/python/tvm/autotvm/task/relay_integration.py b/python/tvm/autotvm/task/relay_integration.py
@@ -133,6 +133,7 @@ def extract_from_multiple_program(mods, params, ops, target, target_host=None,
         tvm.relay.op.nn.batch_matmul: [topi.nn.batch_matmul],
         tvm.relay.op.nn.deformable_conv2d: [topi.nn.deformable_conv2d_nchw],
         tvm.relay.op.nn.conv1d_transpose: [topi.nn.conv1d_transpose_ncw],
+        tvm.relay.op.nn.conv3d: [topi.nn.conv3d],
     }
 
     topi_funcs = []

diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py
@@ -94,6 +94,7 @@ def __init__(self, allow_duplicate=False):
             topi.nn.bitserial_dense: "topi_nn_bitserial_dense",
             topi.nn.deformable_conv2d_nchw: "topi_nn_deformable_conv2d_nchw",
             topi.nn.conv1d_transpose_ncw: "topi_nn_conv1d_transpose_ncw",
+            topi.nn.conv3d: "topi_nn_conv3d",
         }
 
         self.topi_to_schedule = {
@@ -112,6 +113,7 @@ def __init__(self, allow_duplicate=False):
             topi.nn.bitserial_dense: [topi.generic.schedule_bitserial_dense],
             topi.nn.deformable_conv2d_nchw: [topi.generic.schedule_deformable_conv2d_nchw],
             topi.nn.conv1d_transpose_ncw: [topi.generic.schedule_conv1d_transpose_ncw],
+            topi.nn.conv3d: [topi.generic.schedule_conv3d_ndhwc],
         }
 
         # function reflection for tracing
@@ -129,6 +131,7 @@ def __init__(self, allow_duplicate=False):
             topi.nn.bitserial_dense:        lambda x: setattr(topi.nn, 'bitserial_dense', x),
             topi.nn.deformable_conv2d_nchw: lambda x: setattr(topi.nn, 'deformable_conv2d_nchw', x),
             topi.nn.conv1d_transpose_ncw:   lambda x: setattr(topi.nn, 'conv1d_transpose_ncw', x),
+            topi.nn.conv3d:                 lambda x: setattr(topi.nn, 'conv3d', x),
         }
 
         self.allow_duplicate = allow_duplicate
@@ -231,6 +234,15 @@ def _topi_nn_conv1d_transpose_ncw(*args, **kwargs):
             s = topi.generic.schedule_conv1d_transpose_ncw([C])
             return s, [A, W, C]
 
+        @register("topi_nn_conv3d")
+        def _topi_nn_conv3d(*args, **kwargs):
+            assert not kwargs, "Do not support kwargs in template function call"
+            args = deserialize_args(args)
+            A, W = args[:2]
+            C = topi.nn.conv3d(*args, **kwargs)
+            s = topi.generic.schedule_conv3d_ndhwc([C])
+            return s, [A, W, C]
+
         @register("topi_nn_dense")
         def _topi_nn_dense(*args, **kwargs):
             assert not kwargs, "Do not support kwargs in template function call"

diff --git a/topi/python/topi/nn/util.py b/topi/python/topi/nn/util.py
@@ -47,6 +47,36 @@ def infer_pad(data, data_pad):
     wpad = (TW - IW) // 2
     return get_const_int(hpad), get_const_int(wpad)
 
+def infer_pad3d(data, data_pad):
+    """Infer the padding from stages in reverse.
+
+    Parameters
+    ----------
+    data : Tensor
+        data stage.
+
+    data_pad : Tensor
+        pad stage.
+
+    Returns
+    -------
+    dpad : int
+        padding depth
+    hpad : int
+        padding height
+    wpad : int
+        padding width
+    """
+    if data_pad is None:
+        return 0, 0, 0
+
+    _, ID, IH, IW, _ = data.shape
+    _, TD, TH, TW, _ = data_pad.shape
+    dpad = (TD - ID)
+    hpad = (TH - IH)
+    wpad = (TW - IW)
+    return get_const_int(dpad), get_const_int(hpad), get_const_int(wpad)
+
 def infer_stride(data, kernel, out):
     """Infer the stride from stages in reverse.