diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index d37c0136b21da..ae050f0891c44 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -113,7 +113,7 @@ def get_valid_counts_ir(data, flag, idx, valid_count, out): valid_count = ib.buffer_ptr(valid_count) out = ib.buffer_ptr(out) - max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads) + max_threads = int(math_sqrt(tvm.target.current_target(allow_none=False).max_num_threads)) nthread_tx = max_threads nthread_bx = batch_size * num_anchors * elem_length // max_threads + 1 tx = tvm.thread_axis("threadIdx.x") diff --git a/topi/python/topi/cuda/ssd/multibox.py b/topi/python/topi/cuda/ssd/multibox.py index c0307947cb184..04fd30d34a4d3 100644 --- a/topi/python/topi/cuda/ssd/multibox.py +++ b/topi/python/topi/cuda/ssd/multibox.py @@ -76,7 +76,6 @@ def multibox_prior_ir(data, out, sizes, ratios, steps, offsets): with ib.if_scope((j < in_width)): center_h = (i + offset_h) * steps_h center_w = (j + offset_w) * steps_w - for k in range(num_sizes + num_ratios - 1): w = if_then_else(k < num_sizes, size_ratio_concat[k] * in_height / in_width / 2.0,