Skip to content

Commit

Permalink
less threads per block for get valid count
Browse files Browse the repository at this point in the history
  • Loading branch information
Laurawly committed Mar 14, 2019
1 parent 000e10d commit 08896cf
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 2 deletions.
2 changes: 1 addition & 1 deletion topi/python/topi/cuda/nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def get_valid_counts_ir(data, flag, idx, valid_count, out):
valid_count = ib.buffer_ptr(valid_count)
out = ib.buffer_ptr(out)

max_threads = int(tvm.target.current_target(allow_none=False).max_num_threads)
max_threads = int(math_sqrt(tvm.target.current_target(allow_none=False).max_num_threads))
nthread_tx = max_threads
nthread_bx = batch_size * num_anchors * elem_length // max_threads + 1
tx = tvm.thread_axis("threadIdx.x")
Expand Down
1 change: 0 additions & 1 deletion topi/python/topi/cuda/ssd/multibox.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def multibox_prior_ir(data, out, sizes, ratios, steps, offsets):
with ib.if_scope((j < in_width)):
center_h = (i + offset_h) * steps_h
center_w = (j + offset_w) * steps_w

for k in range(num_sizes + num_ratios - 1):
w = if_then_else(k < num_sizes,
size_ratio_concat[k] * in_height / in_width / 2.0,
Expand Down

0 comments on commit 08896cf

Please sign in to comment.