From 1da5012b09caac3d5a139a41041958f1e04088f3 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 28 Jul 2018 22:10:47 -0700 Subject: [PATCH 01/43] Refactor NMS --- topi/python/topi/vision/nms.py | 79 +++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index a41ee5b50089..9e3614a73d46 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -2,7 +2,67 @@ """Non-maximum suppression operator""" import tvm -from tvm import api +from tvm import api, hybrid + +@hybrid.script +def rearrange_out(input, output): + """Rearrange nms output to move all valid entries to top. + + Parameters + ---------- + input : Tensor or Var or numpy NDArray + NMS output. 3-D tensor with shape + [batch_size, num_anchors, 6]. + + output : Tensor or Var or numpy NDArray + Transformed NMS output. 3-D tensor with shape + [batch_size, num_anchors, 6]. + It should filled with invalid entry -1. + """ + batch_size = input.shape[0] + num_anchors = input.shape[1] + elem_length = input.shape[2] + for i in range(batch_size): + valid_idx = 0 + for j in range(num_anchors): + if input[i, j, 0] >= 0: + for k in range(elem_length): + output[i, valid_idx, k] = input[i, j, k] + valid_idx += 1 + + +@hybrid.script +def get_valid_counts(data, inter_data, valid_count, score_threshold): + """Get valid count of bounding boxes given a score threshlod. + Also moves valid boxes to the top of input data. + + Parameters + ---------- + data : Tensor or Var or numpy NDArray + Input data. 3-D tensor with shape [batch_size, num_anchors, 6]. + + inter_data : Tensor or Var or numpy NDArray + Intermediate output. 3-D tensor with shape + [batch_size, num_anchors, 6]. + + valid_count : Tensor or Var or numpy NDArray + 1-D tensor for valid number of boxes. + + score_threshold : float + Lower limit of score for valid bounding boxes. + """ + batch_size = data.shape[0] + num_anchors = data.shape[1] + for i in range(batch_size): + valid_count[i] = 0 + inter_idx = 0 + for j in range(num_anchors): + score = data[i, j, 1] + if score >= score_threshold: + valid_count[i] += 1 + inter_data[i, inter_idx] = data[i, j] + inter_idx += 1 + def nms_ir(data, sort_result, valid_count, out, nms_threshold, force_suppress, nms_topk): """Low level IR routing for transform location in multibox_detection operator. @@ -107,12 +167,13 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): @tvm.target.generic_func -def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1): +def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, + do_rearrange=False): """Non-maximum suppression operator for object detection. Parameters ---------- - data: tvm.Tensor + data : tvm.Tensor 3-D tensor with shape [batch_size, num_anchors, 6]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom]. @@ -120,15 +181,18 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1) valid_count : tvm.Tensor 1-D tensor for valid number of boxes. - nms_threshold : float + nms_threshold : optional, float Non-maximum suppression threshold. - force_suppress : boolean + force_suppress : optional, boolean Whether to suppress all detections regardless of class_id. - nms_topk : int + nms_topk : optional, int Keep maximum top k detections before nms, -1 for no limit. + do_rearrange : optional, boolean + Whether to move all valid bounding boxes to the top. + Returns ------- out : tvm.Tensor @@ -189,4 +253,7 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1) dtype="float32", in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], tag="nms") + if do_rearrange: + normalized_out = tvm.compute(out.shape, lambda *index: -1) + hybrid.parse(rearrange_out, [out, normalized_out]) return out From 1eb27a8f59971c53997941f2968a69acecabb2d0 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 4 Sep 2018 10:54:33 -0700 Subject: [PATCH 02/43] Avoid using function call for hybrid frontend --- topi/python/topi/vision/nms.py | 122 ++++++++++++++++++++++++++++++--- 1 file changed, 114 insertions(+), 8 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 9e3614a73d46..f7b090b88397 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -17,18 +17,22 @@ def rearrange_out(input, output): output : Tensor or Var or numpy NDArray Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. - It should filled with invalid entry -1. """ batch_size = input.shape[0] num_anchors = input.shape[1] elem_length = input.shape[2] + for i in range(batch_size): + for j in range(num_anchors): + for k in range(elem_length): + output[i, j, k] = -1.0 + for i in range(batch_size): valid_idx = 0 for j in range(num_anchors): if input[i, j, 0] >= 0: for k in range(elem_length): output[i, valid_idx, k] = input[i, j, k] - valid_idx += 1 + valid_idx = valid_idx + 1 @hybrid.script @@ -61,7 +65,7 @@ def get_valid_counts(data, inter_data, valid_count, score_threshold): if score >= score_threshold: valid_count[i] += 1 inter_data[i, inter_idx] = data[i, j] - inter_idx += 1 + inter_idx = inter_idx + 1 def nms_ir(data, sort_result, valid_count, out, nms_threshold, force_suppress, nms_topk): @@ -165,6 +169,101 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): p_out[n * num_anchors * 6 + (l + p_valid_count[n]) * 6 + m] = -1.0 return ib.get() +@hybrid.script +def calculate_iou(inter_data, batch_idx, box_a_idx, box_b_idx, box_start_idx): + a_t = inter_data[batch_idx, box_a_idx, box_start_idx + 1] + a_b = inter_data[batch_idx, box_a_idx, box_start_idx + 3] + a_l = inter_data[batch_idx, box_a_idx, box_start_idx] + a_r = inter_data[batch_idx, box_a_idx, box_start_idx + 2] + b_t = inter_data[batch_idx, box_b_idx, box_start_idx + 1] + b_b = inter_data[batch_idx, box_b_idx, box_start_idx + 3] + b_l = inter_data[batch_idx, box_b_idx, box_start_idx] + b_r = inter_data[batch_idx, box_b_idx, box_start_idx + 2] + w = max(0.0, min(a_r, b_r) - max(a_l, b_l)) + h = max(0.0, min(a_b, b_b) - max(a_t, b_t)) + i = h * w + u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - i + return 0.0 if u <= 0 else i / u + +@hybrid.script +def hybrid_nms(data, sorted_index, valid_count, output, iou_threshold, force_suppress, nms_topk): + """Hybrid routing for non-maximum suppression. + + Parameters + ---------- + data: Tensor or Var or numpy NDArray + Bounding boxes with class and score. 3-D tensor with shape + [batch_size, num_anchors, 6]. + + sorted_index : Tensor or Var or numpy NDArray + Bounding box indexes sorted by score, with shape + [batch_size, num_anchors]. + + valid_count : Tensor or Var or numpy NDArray + 1-D tensor for valid number of boxes. + + output : Tensor or Var or numpy NDArray + NMS output tensor. + + iou_threshold : float + Overlapping(IoU) threshold to suppress object with smaller score. + + force_suppress : boolean + Whether to suppress all detections regardless of class_id. + + nms_topk : int + Keep maximum top k detections before nms, -1 for no limit. + """ + batch_size = data.shape[0] + num_anchors = data.shape[1] + box_data_length = data.shape[2] + for i in parallel(batch_size): + if iou_threshold > 0 and valid_count[i] > 0: + # Reorder output + nkeep = nms_topk if 0 < nms_topk < valid_count[i] else valid_count[i] + for j in range(nkeep): + for k in range(box_data_length): + output[i, j, k] = data[i, sorted_index[i, j], k] + if 0 < nms_topk < valid_count[i]: + for j in range(valid_count[i] - nkeep): + for k in range(box_data_length): + output[i, j + nkeep, k] = data[i, j + nkeep, k] + # Apply nms + for j in range(valid_count[i]): + if output[i, j, 0] >= 0: + for k in range(valid_count[i]): + if k > j and output[i, k, 0] >= 0 and (force_suppress + or output[i, j, 0] + == output[i, k, 0]): + #iou = calculate_iou(output, i, j, k, 2) + inter_data = output + batch_idx = i + box_a_idx, box_b_idx = j, k + box_start_idx = 2 + a_t = inter_data[batch_idx, box_a_idx, box_start_idx + 1] + a_b = inter_data[batch_idx, box_a_idx, box_start_idx + 3] + a_l = inter_data[batch_idx, box_a_idx, box_start_idx] + a_r = inter_data[batch_idx, box_a_idx, box_start_idx + 2] + b_t = inter_data[batch_idx, box_b_idx, box_start_idx + 1] + b_b = inter_data[batch_idx, box_b_idx, box_start_idx + 3] + b_l = inter_data[batch_idx, box_b_idx, box_start_idx] + b_r = inter_data[batch_idx, box_b_idx, box_start_idx + 2] + w = max(0.0, min(a_r, b_r) - max(a_l, b_l)) + h = max(0.0, min(a_b, b_b) - max(a_t, b_t)) + i = h * w + u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - i + iou = 0.0 if u <= 0 else i / u + if iou >= iou_threshold: + output[i, k, 0] = -1.0 + else: + for j in range(valid_count[i]): + for k in range(box_data_length): + output[i, j, k] = data[i, j, k] + # Set invalid entry to be -1 + for j in range(num_anchors - valid_count[i]): + for k in range(box_data_length): + output[i, j + valid_count[i], k] = -1.0 + @tvm.target.generic_func def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, @@ -244,16 +343,23 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, in_buffers=[score_tensor_buf, valid_count_buf], out_buffers=sort_tensor_buf, name="nms_sort") + d_plc = tvm.placeholder(data.shape, name="nms_out", dtype="float32") + out = tvm.placeholder(data.shape, name="nms_out", dtype="float32") + inter_func = hybrid.parse(hybrid_nms, [data, sort_tensor, valid_count, out, nms_threshold, force_suppress, nms_topk]) + print(inter_func) out = \ tvm.extern(data.shape, [data, sort_tensor, valid_count], - lambda ins, outs: nms_ir( - ins[0], ins[1], ins[2], outs[0], nms_threshold, - force_suppress, nms_topk), + lambda ins, outs: hybrid.parse( + hybrid_nms, + [ins[0], ins[1], ins[2], outs[0], nms_threshold, + force_suppress, nms_topk]), dtype="float32", in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], tag="nms") if do_rearrange: - normalized_out = tvm.compute(out.shape, lambda *index: -1) - hybrid.parse(rearrange_out, [out, normalized_out]) + out = tvm.extern(out.shape, [out], + lambda ins, outs: hybrid.parse( + rearrange_out, [ins[0], outs[0]]), + dtype="float32",) return out From 1e2cdb544f7120ae14b5e54825f76faa78294111 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 27 Nov 2018 16:58:51 -0800 Subject: [PATCH 03/43] Modify nms --- topi/python/topi/vision/nms.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index f7b090b88397..a2e8f50c1056 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -324,7 +324,6 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, valid_count_dtype = "int32" valid_count_buf = api.decl_buffer(valid_count.shape, valid_count_dtype, "valid_count_buf", data_alignment=4) - data_buf = api.decl_buffer(data.shape, data.dtype, "data_buf", data_alignment=8) score_axis = 1 score_shape = (batch_size, num_anchors) score_tensor = tvm.compute(score_shape, lambda i, j: data[i, j, score_axis]) @@ -343,23 +342,11 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, in_buffers=[score_tensor_buf, valid_count_buf], out_buffers=sort_tensor_buf, name="nms_sort") - d_plc = tvm.placeholder(data.shape, name="nms_out", dtype="float32") - out = tvm.placeholder(data.shape, name="nms_out", dtype="float32") - inter_func = hybrid.parse(hybrid_nms, [data, sort_tensor, valid_count, out, nms_threshold, force_suppress, nms_topk]) - print(inter_func) - out = \ - tvm.extern(data.shape, - [data, sort_tensor, valid_count], - lambda ins, outs: hybrid.parse( - hybrid_nms, - [ins[0], ins[1], ins[2], outs[0], nms_threshold, - force_suppress, nms_topk]), - dtype="float32", - in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], - tag="nms") + out = tvm.placeholder(data.shape, dtype=data.dtype) + out = hybrid_nms(data, sort_tensor, valid_count, out, + tvm.convert(nms_threshold), tvm.convert(force_suppress), + tvm.convert(nms_topk)) if do_rearrange: - out = tvm.extern(out.shape, [out], - lambda ins, outs: hybrid.parse( - rearrange_out, [ins[0], outs[0]]), - dtype="float32",) + out = rearrange_out(out) + return out From d89286772336beebd16a559873e2335d9af1aa68 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 10 Dec 2018 17:02:12 -0800 Subject: [PATCH 04/43] Add box_nms --- topi/python/topi/vision/nms.py | 344 ++++++++++++++++----------------- 1 file changed, 166 insertions(+), 178 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index a2e8f50c1056..7569ae636dca 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -5,19 +5,25 @@ from tvm import api, hybrid @hybrid.script -def rearrange_out(input, output): +def rearrange_out(input): """Rearrange nms output to move all valid entries to top. Parameters ---------- - input : Tensor or Var or numpy NDArray + input : tvm.Tensor or numpy NDArray NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. - output : Tensor or Var or numpy NDArray + Returns + ------- + output : tvm.Tensor or numpy NDArray Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. """ + output = output_tensor((input.shape[0], + input.shape[1], + input.shape[2],), + input.dtype) batch_size = input.shape[0] num_anchors = input.shape[1] elem_length = input.shape[2] @@ -33,226 +39,136 @@ def rearrange_out(input, output): for k in range(elem_length): output[i, valid_idx, k] = input[i, j, k] valid_idx = valid_idx + 1 + return output @hybrid.script -def get_valid_counts(data, inter_data, valid_count, score_threshold): +def get_valid_counts(data, score_threshold): """Get valid count of bounding boxes given a score threshlod. Also moves valid boxes to the top of input data. Parameters ---------- - data : Tensor or Var or numpy NDArray + data : tvm.Tensor or numpy NDArray Input data. 3-D tensor with shape [batch_size, num_anchors, 6]. - inter_data : Tensor or Var or numpy NDArray - Intermediate output. 3-D tensor with shape - [batch_size, num_anchors, 6]. + score_threshold : tvm.const + Lower limit of score for valid bounding boxes. - valid_count : Tensor or Var or numpy NDArray - 1-D tensor for valid number of boxes. + Returns + ------- + out_tensor : tvm.Tensor or numpy NDArray + Rearranged data tensor. - score_threshold : float - Lower limit of score for valid bounding boxes. + valid_count : tvm.Tensor or numpy NDArray + 1-D tensor for valid number of boxes. """ batch_size = data.shape[0] num_anchors = data.shape[1] + box_data_length = data.shape[2] + valid_count = output_tensor((batch_size,), "int32") + out_tensor = output_tensor((batch_size, + num_anchors, + box_data_length), + data.dtype) for i in range(batch_size): valid_count[i] = 0 inter_idx = 0 for j in range(num_anchors): score = data[i, j, 1] if score >= score_threshold: + for k in range(box_data_length): + out_tensor[i, inter_idx, k] = data[i, j, k] valid_count[i] += 1 - inter_data[i, inter_idx] = data[i, j] inter_idx = inter_idx + 1 + return valid_count, out_tensor -def nms_ir(data, sort_result, valid_count, out, nms_threshold, force_suppress, nms_topk): - """Low level IR routing for transform location in multibox_detection operator. - - Parameters - ---------- - data: Buffer - Buffer of output boxes with class and score. - - sort_result : Buffer - Buffer of output box indexes sorted by score. - - valid_count : Buffer - Buffer of number of valid output boxes. - - out : Buffer - Output buffer. - - nms_threshold : float - Non-maximum suppression threshold. - - force_suppress : boolean - Whether to suppress all detections regardless of class_id. - - nms_topk : int - Keep maximum top k detections before nms, -1 for no limit. - - Returns - ------- - stmt : Stmt - The result IR statement. - """ - def calculate_overlap(out_tensor, box_a_idx, box_b_idx): - """Calculate overlap of two boxes. - """ - w = tvm.make.Max(0.0, tvm.make.Min(out_tensor[box_a_idx + 2], out_tensor[box_b_idx + 2]) - - tvm.make.Max(out_tensor[box_a_idx], out_tensor[box_b_idx])) - h = tvm.make.Max(0.0, tvm.make.Min(out_tensor[box_a_idx + 3], out_tensor[box_b_idx + 3]) - - tvm.make.Max(out_tensor[box_a_idx + 1], out_tensor[box_b_idx + 1])) - i = w * h - u = (out_tensor[box_a_idx + 2] - out_tensor[box_a_idx]) * \ - (out_tensor[box_a_idx + 3] - out_tensor[box_a_idx + 1]) + \ - (out_tensor[box_b_idx + 2] - out_tensor[box_b_idx]) * \ - (out_tensor[box_b_idx + 3] - out_tensor[box_b_idx + 1]) - i - return tvm.expr.Select(u <= 0.0, 0.0, i / u) - - ib = tvm.ir_builder.create() - p_data = ib.buffer_ptr(data) - p_sort_result = ib.buffer_ptr(sort_result) - p_valid_count = ib.buffer_ptr(valid_count) - p_out = ib.buffer_ptr(out) - batch_size = out.shape[0] - num_anchors = out.shape[1] - - nms_threshold_node = tvm.make.node("FloatImm", dtype="float32", value=nms_threshold) - nms_topk_node = tvm.make.node("IntImm", dtype="int32", value=nms_topk) - force_suppress_node = tvm.make.node("IntImm", dtype="int32", value=1 if force_suppress else 0) - with ib.for_range(0, batch_size, for_type="parallel", name="n") as n: - with ib.if_scope(tvm.all(nms_threshold_node > 0, nms_threshold_node < 1, - p_valid_count[0] > 0)): - # Reorder output - nkeep = tvm.if_then_else( - tvm.all(nms_topk_node > 0, nms_topk < p_valid_count[n]), - nms_topk, p_valid_count[n]) - with ib.for_range(0, nkeep, name="l") as l: - with ib.for_range(0, 6, name="m") as m: - p_out[(n * num_anchors * 6 - + l * 6 + m)] = p_data[(n * num_anchors * 6 - + p_sort_result[n * num_anchors + l] * 6 + m)] - with ib.if_scope(tvm.all(nms_topk_node > 0, nms_topk < p_valid_count[n])): - with ib.for_range(0, p_valid_count[n] - nkeep, name="l") as l: - with ib.for_range(0, 6, name="m") as m: - p_out[(n * num_anchors * 6 - + (l + nkeep) * 6 + m)] = p_data[(n * num_anchors * 6 - + (l + nkeep) * 6 + m)] - # Apply nms - with ib.for_range(0, p_valid_count[n], name="l") as l: - offset_l = l * 6 - with ib.if_scope(p_out[n * num_anchors * 6 + offset_l] >= 0): - with ib.for_range(0, p_valid_count[n], name="m") as m: - offset_m = m * 6 - with ib.if_scope(tvm.all(m > l, p_out[n * num_anchors * 6 - + offset_m] >= 0)): - with ib.if_scope(tvm.any(force_suppress_node > 0, - p_out[n * num_anchors * 6 + offset_l] == - p_out[n * num_anchors * 6 + offset_m])): - # When force_suppress == True or class_id equals - iou = calculate_overlap(p_out, n * num_anchors * 6 + offset_l + 2, - n * num_anchors * 6 + offset_m + 2) - with ib.if_scope(iou >= nms_threshold): - p_out[n * num_anchors * 6 + offset_m] = -1.0 - with ib.else_scope(): - with ib.for_range(0, p_valid_count[n], name="l") as l: - with ib.for_range(0, 6, name="m") as m: - p_out[(n * num_anchors * 6 - + l * 6 + m)] = p_data[n * num_anchors * 6 + l * 6 + m] - # Set invalid entry to be -1 - with ib.for_range(0, num_anchors - p_valid_count[n], name="l") as l: - with ib.for_range(0, 6, name="m") as m: - p_out[n * num_anchors * 6 + (l + p_valid_count[n]) * 6 + m] = -1.0 - return ib.get() - -@hybrid.script -def calculate_iou(inter_data, batch_idx, box_a_idx, box_b_idx, box_start_idx): - a_t = inter_data[batch_idx, box_a_idx, box_start_idx + 1] - a_b = inter_data[batch_idx, box_a_idx, box_start_idx + 3] - a_l = inter_data[batch_idx, box_a_idx, box_start_idx] - a_r = inter_data[batch_idx, box_a_idx, box_start_idx + 2] - b_t = inter_data[batch_idx, box_b_idx, box_start_idx + 1] - b_b = inter_data[batch_idx, box_b_idx, box_start_idx + 3] - b_l = inter_data[batch_idx, box_b_idx, box_start_idx] - b_r = inter_data[batch_idx, box_b_idx, box_start_idx + 2] - w = max(0.0, min(a_r, b_r) - max(a_l, b_l)) - h = max(0.0, min(a_b, b_b) - max(a_t, b_t)) - i = h * w - u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - i - return 0.0 if u <= 0 else i / u @hybrid.script -def hybrid_nms(data, sorted_index, valid_count, output, iou_threshold, force_suppress, nms_topk): +def hybrid_nms(data, sorted_index, valid_count, + iou_threshold, force_suppress, topk): """Hybrid routing for non-maximum suppression. Parameters ---------- - data: Tensor or Var or numpy NDArray + data: tvm.Tensor or numpy NDArray Bounding boxes with class and score. 3-D tensor with shape [batch_size, num_anchors, 6]. - sorted_index : Tensor or Var or numpy NDArray + sorted_index : tvm.Tensor or numpy NDArray Bounding box indexes sorted by score, with shape [batch_size, num_anchors]. - valid_count : Tensor or Var or numpy NDArray + valid_count : tvm.Tensor or numpy NDArray 1-D tensor for valid number of boxes. - output : Tensor or Var or numpy NDArray - NMS output tensor. - - iou_threshold : float + iou_threshold : tvm.const Overlapping(IoU) threshold to suppress object with smaller score. - force_suppress : boolean + force_suppress : tvm.const Whether to suppress all detections regardless of class_id. - nms_topk : int + topk : tvm.const Keep maximum top k detections before nms, -1 for no limit. + + Returns + ------- + valid_count : tvm.Tensor or numpy NDArray + 1-D tensor for valid number of boxes. """ batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] + output = output_tensor((batch_size, + num_anchors, + box_data_length,), + data.dtype) for i in parallel(batch_size): - if iou_threshold > 0 and valid_count[i] > 0: - # Reorder output - nkeep = nms_topk if 0 < nms_topk < valid_count[i] else valid_count[i] - for j in range(nkeep): - for k in range(box_data_length): - output[i, j, k] = data[i, sorted_index[i, j], k] - if 0 < nms_topk < valid_count[i]: - for j in range(valid_count[i] - nkeep): + if iou_threshold > 0: + if valid_count[i] > 0: + # Reorder output + nkeep = valid_count[i] + if topk > 0: + if topk < valid_count[i]: + nkeep = topk + for j in range(nkeep): for k in range(box_data_length): - output[i, j + nkeep, k] = data[i, j + nkeep, k] + output[i, j, k] = data[i, sorted_index[i, j], k] + if topk > 0: + if topk < valid_count[i]: + for j in range(valid_count[i] - nkeep): + for k in range(box_data_length): + output[i, j + nkeep, k] = data[i, j + nkeep, k] # Apply nms for j in range(valid_count[i]): if output[i, j, 0] >= 0: for k in range(valid_count[i]): - if k > j and output[i, k, 0] >= 0 and (force_suppress - or output[i, j, 0] - == output[i, k, 0]): - #iou = calculate_iou(output, i, j, k, 2) - inter_data = output + check_iou = 0 + if k > j: + if output[i, k, 0] >= 0: + if force_suppress: + check_iou = 1 + elif output[i, j, 0] == output[i, k, 0]: + check_iou = 1 + if check_iou: batch_idx = i - box_a_idx, box_b_idx = j, k + box_a_idx = j + box_b_idx = k box_start_idx = 2 - a_t = inter_data[batch_idx, box_a_idx, box_start_idx + 1] - a_b = inter_data[batch_idx, box_a_idx, box_start_idx + 3] - a_l = inter_data[batch_idx, box_a_idx, box_start_idx] - a_r = inter_data[batch_idx, box_a_idx, box_start_idx + 2] - b_t = inter_data[batch_idx, box_b_idx, box_start_idx + 1] - b_b = inter_data[batch_idx, box_b_idx, box_start_idx + 3] - b_l = inter_data[batch_idx, box_b_idx, box_start_idx] - b_r = inter_data[batch_idx, box_b_idx, box_start_idx + 2] + a_t = output[batch_idx, box_a_idx, box_start_idx + 1] + a_b = output[batch_idx, box_a_idx, box_start_idx + 3] + a_l = output[batch_idx, box_a_idx, box_start_idx] + a_r = output[batch_idx, box_a_idx, box_start_idx + 2] + b_t = output[batch_idx, box_b_idx, box_start_idx + 1] + b_b = output[batch_idx, box_b_idx, box_start_idx + 3] + b_l = output[batch_idx, box_b_idx, box_start_idx] + b_r = output[batch_idx, box_b_idx, box_start_idx + 2] w = max(0.0, min(a_r, b_r) - max(a_l, b_l)) h = max(0.0, min(a_b, b_b) - max(a_t, b_t)) - i = h * w - u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - i - iou = 0.0 if u <= 0 else i / u + area = h * w + u = (a_r - a_l) * (a_b - a_t) + (b_r - b_l) * (b_b - b_t) - area + iou = 0.0 if u <= 0.0 else area / u if iou >= iou_threshold: output[i, k, 0] = -1.0 else: @@ -263,11 +179,12 @@ def hybrid_nms(data, sorted_index, valid_count, output, iou_threshold, force_sup for j in range(num_anchors - valid_count[i]): for k in range(box_data_length): output[i, j + valid_count[i], k] = -1.0 + return output @tvm.target.generic_func -def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, - do_rearrange=False): +def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, + topk=-1, do_rearrange=False): """Non-maximum suppression operator for object detection. Parameters @@ -280,13 +197,13 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, valid_count : tvm.Tensor 1-D tensor for valid number of boxes. - nms_threshold : optional, float + iou_threshold : optional, float Non-maximum suppression threshold. force_suppress : optional, boolean Whether to suppress all detections regardless of class_id. - nms_topk : optional, int + topk : optional, int Keep maximum top k detections before nms, -1 for no limit. do_rearrange : optional, boolean @@ -305,12 +222,12 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, dshape = (1, 5, 6) data = tvm.placeholder(dshape, name="data") valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") - nms_threshold = 0.7 + iou_threshold = 0.7 force_suppress = True - nms_topk = -1 - out = nms(data, valid_count, nms_threshold, force_suppress, nms_topk) - np_data = np.random.uniform(size=dshape).astype("float32") - np_valid_count = np.array([4]).astype("int32") + topk = -1 + out = nms(data, valid_count, iou_threshold, force_suppress, topk) + np_data = np.random.uniform(dshape) + np_valid_count = np.array([4]) s = topi.generic.schedule_nms(out) f = tvm.build(s, [data, valid_count, out], "llvm") ctx = tvm.cpu() @@ -342,11 +259,82 @@ def nms(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, in_buffers=[score_tensor_buf, valid_count_buf], out_buffers=sort_tensor_buf, name="nms_sort") - out = tvm.placeholder(data.shape, dtype=data.dtype) - out = hybrid_nms(data, sort_tensor, valid_count, out, - tvm.convert(nms_threshold), tvm.convert(force_suppress), - tvm.convert(nms_topk)) + out = hybrid_nms(data, sort_tensor, valid_count, + tvm.const(iou_threshold, dtype="float32"), + tvm.const(force_suppress, dtype="bool"), + tvm.const(topk, dtype="int32")) if do_rearrange: out = rearrange_out(out) return out + +@tvm.target.generic_func +def box_nms(data, iou_threshold=0.5, score_threshold=0, + force_suppress=True, topk=-1): + """Apply non-maximum suppression to input. + Comparing to nms, this function takes score_threshold + as argument and automatically filters valid anchor boxes. + + Parameters + ---------- + data : tvm.Tensor + 3-D tensor with shape [batch_size, num_anchors, 6]. + The last dimension should be in format of + [class_id, score, box_left, box_top, box_right, box_bottom]. + + iou_threshold : optional, float + Non-maximum suppression threshold. + + score_threshold : optional, float + Lower limit of score for valid bounding boxes. + + force_suppress : optional, boolean + Whether to suppress all detections regardless of class_id. + + topk : optional, int + Keep maximum top k detections before nms, -1 for no limit. + + Returns + ------- + out : tvm.Tensor + 3-D tensor with shape [batch_size, num_anchors, 6]. + """ + score_threshold_const = tvm.const(score_threshold, + dtype="float32") + valid_count, out = get_valid_counts(data, score_threshold_const) + return nms(out, valid_count, iou_threshold, + force_suppress, topk, True) + + +if __name__ == '__main__': + import tvm + import topi + import numpy as np + + score_threshold = 0.13 + overlap_thresh = 0.5 + + # This works. + # Here we first call get_valid_counts with np data, + # then build nms function and feed data into it. + np_data = np.random.uniform(size=(1, 5000, 6)).astype("float32") + np_valid_count, np_inter_out = topi.vision.get_valid_counts(np_data, score_threshold) + data = tvm.placeholder((1, 5000, 6), name="data", dtype="float32") + valid_count = tvm.placeholder((1,), name="valid_count", dtype="int32") + result = topi.vision.nms(data, valid_count, iou_threshold=overlap_thresh, force_suppress=True, do_rearrange=True) + st = tvm.create_schedule(result.op) + f = tvm.build(st, [data, valid_count, result], "llvm") + ctx = tvm.cpu(0) + np_out = np.zeros(np_inter_out.shape) + aa = tvm.nd.array(np_inter_out.astype(data.dtype), ctx) + bb = tvm.nd.array(np_valid_count.astype(valid_count.dtype), ctx) + cc = tvm.nd.array(np_out.astype(result.dtype), ctx) + f(aa, bb, cc) + + + # This will fail + # We combine get_valid_counts and nms into box_nms + data = tvm.placeholder((1, 5000, 6), name="data", dtype="float32") + result = topi.vision.box_nms(data, iou_threshold=overlap_thresh, force_suppress=True, score_threshold=score_threshold) + st = tvm.create_schedule(result.op) + f = tvm.build(st, [data, result], "llvm") From c19526b79f06206fc4a9351c90f1c03e884acee4 Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 13 Dec 2018 14:24:36 -0800 Subject: [PATCH 05/43] Add test for get_valid_counts --- topi/python/topi/cuda/nms.py | 5 +- topi/python/topi/generic/vision.py | 17 +++ topi/python/topi/vision/nms.py | 174 +++++++++----------------- topi/tests/python/test_topi_vision.py | 51 +++++++- 4 files changed, 132 insertions(+), 115 deletions(-) diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index e0d71559f1a0..89c0da381aae 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -1,4 +1,4 @@ -# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison +# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison, unused-argument """Non-maximum suppression operator""" import math import tvm @@ -182,7 +182,8 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): @nms.register(["cuda", "gpu"]) -def nms_gpu(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1): +def nms_gpu(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, + do_rearrange=False): """Non-maximum suppression operator for object detection. Parameters diff --git a/topi/python/topi/generic/vision.py b/topi/python/topi/generic/vision.py index 76e8545bfc52..bfd6c55d533a 100644 --- a/topi/python/topi/generic/vision.py +++ b/topi/python/topi/generic/vision.py @@ -36,6 +36,23 @@ def schedule_reorg(outs): cpp_target = cpp.TEST_create_target(target.target_name) return cpp.generic.default_schedule(cpp_target, outs, False) +@tvm.target.generic_func +def schedule_get_valid_counts(outs): + """Schedule for get_valid_counts + + Parameters + ---------- + outs: Array of Tensor + The computation graph description of nms + in the format of an array of tensors. + + Returns + ------- + s: Schedule + The computation schedule for the op. + """ + return _default_schedule(outs, False) + @tvm.target.generic_func def schedule_nms(outs): """Schedule for non-maximum suppression diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 7569ae636dca..0b113733f8da 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -1,16 +1,17 @@ -# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments +# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches """Non-maximum suppression operator""" import tvm from tvm import api, hybrid @hybrid.script -def rearrange_out(input): - """Rearrange nms output to move all valid entries to top. +def hybrid_rearrange_out(data): + """Hybrid routine to rearrange nms output to + move all valid entries to top. Parameters ---------- - input : tvm.Tensor or numpy NDArray + data : tvm.Tensor or numpy NDArray NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. @@ -20,32 +21,32 @@ def rearrange_out(input): Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. """ - output = output_tensor((input.shape[0], - input.shape[1], - input.shape[2],), - input.dtype) - batch_size = input.shape[0] - num_anchors = input.shape[1] - elem_length = input.shape[2] - for i in range(batch_size): - for j in range(num_anchors): - for k in range(elem_length): - output[i, j, k] = -1.0 + output = output_tensor((data.shape[0], + data.shape[1], + data.shape[2],), + data.dtype) + batch_size = data.shape[0] + num_anchors = data.shape[1] + elem_length = data.shape[2] - for i in range(batch_size): + for i in parallel(batch_size): valid_idx = 0 for j in range(num_anchors): - if input[i, j, 0] >= 0: + if data[i, j, 0] >= 0: + for k in range(elem_length): + output[i, valid_idx, k] = data[i, j, k] + valid_idx += 1 + if j >= valid_idx: for k in range(elem_length): - output[i, valid_idx, k] = input[i, j, k] - valid_idx = valid_idx + 1 + output[i, j, k] = -1.0 return output @hybrid.script -def get_valid_counts(data, score_threshold): - """Get valid count of bounding boxes given a score threshlod. - Also moves valid boxes to the top of input data. +def hybrid_get_valid_counts(data, score_threshold): + """Hybrid routine to get valid count of bounding boxes + given a score threshold. Also moves valid boxes to the + top of input data. Parameters ---------- @@ -71,7 +72,7 @@ def get_valid_counts(data, score_threshold): num_anchors, box_data_length), data.dtype) - for i in range(batch_size): + for i in parallel(batch_size): valid_count[i] = 0 inter_idx = 0 for j in range(num_anchors): @@ -80,10 +81,33 @@ def get_valid_counts(data, score_threshold): for k in range(box_data_length): out_tensor[i, inter_idx, k] = data[i, j, k] valid_count[i] += 1 - inter_idx = inter_idx + 1 - + inter_idx += 1 return valid_count, out_tensor +@tvm.target.generic_func +def get_valid_counts(data, score_threshold=0): + """Get valid count of bounding boxes given a score threshold. + Also moves valid boxes to the top of input data. + + Parameters + ---------- + data : tvm.Tensor + Input data. 3-D tensor with shape [batch_size, num_anchors, 6]. + + score_threshold : optional, float + Lower limit of score for valid bounding boxes. + + Returns + ------- + out_tensor : tvm.Tensor + Rearranged data tensor. + + valid_count : tvm.Tensor + 1-D tensor for valid number of boxes. + """ + score_threshold_const = tvm.const(score_threshold, "float") + return hybrid_get_valid_counts(data, score_threshold_const) + @hybrid.script def hybrid_nms(data, sorted_index, valid_count, @@ -129,29 +153,26 @@ def hybrid_nms(data, sorted_index, valid_count, if valid_count[i] > 0: # Reorder output nkeep = valid_count[i] - if topk > 0: - if topk < valid_count[i]: - nkeep = topk + if 0 < topk < nkeep: + nkeep = topk for j in range(nkeep): for k in range(box_data_length): output[i, j, k] = data[i, sorted_index[i, j], k] - if topk > 0: - if topk < valid_count[i]: - for j in range(valid_count[i] - nkeep): - for k in range(box_data_length): - output[i, j + nkeep, k] = data[i, j + nkeep, k] + if 0 < topk < valid_count[i]: + for j in range(valid_count[i] - nkeep): + for k in range(box_data_length): + output[i, j + nkeep, k] = data[i, j + nkeep, k] # Apply nms for j in range(valid_count[i]): if output[i, j, 0] >= 0: for k in range(valid_count[i]): check_iou = 0 - if k > j: - if output[i, k, 0] >= 0: - if force_suppress: - check_iou = 1 - elif output[i, j, 0] == output[i, k, 0]: - check_iou = 1 - if check_iou: + if k > j and output[i, k, 0] >= 0: + if force_suppress: + check_iou = 1 + elif output[i, j, 0] == output[i, k, 0]: + check_iou = 1 + if check_iou > 0: batch_idx = i box_a_idx = j box_b_idx = k @@ -264,77 +285,6 @@ def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, tvm.const(force_suppress, dtype="bool"), tvm.const(topk, dtype="int32")) if do_rearrange: - out = rearrange_out(out) + out = hybrid_rearrange_out(out) return out - -@tvm.target.generic_func -def box_nms(data, iou_threshold=0.5, score_threshold=0, - force_suppress=True, topk=-1): - """Apply non-maximum suppression to input. - Comparing to nms, this function takes score_threshold - as argument and automatically filters valid anchor boxes. - - Parameters - ---------- - data : tvm.Tensor - 3-D tensor with shape [batch_size, num_anchors, 6]. - The last dimension should be in format of - [class_id, score, box_left, box_top, box_right, box_bottom]. - - iou_threshold : optional, float - Non-maximum suppression threshold. - - score_threshold : optional, float - Lower limit of score for valid bounding boxes. - - force_suppress : optional, boolean - Whether to suppress all detections regardless of class_id. - - topk : optional, int - Keep maximum top k detections before nms, -1 for no limit. - - Returns - ------- - out : tvm.Tensor - 3-D tensor with shape [batch_size, num_anchors, 6]. - """ - score_threshold_const = tvm.const(score_threshold, - dtype="float32") - valid_count, out = get_valid_counts(data, score_threshold_const) - return nms(out, valid_count, iou_threshold, - force_suppress, topk, True) - - -if __name__ == '__main__': - import tvm - import topi - import numpy as np - - score_threshold = 0.13 - overlap_thresh = 0.5 - - # This works. - # Here we first call get_valid_counts with np data, - # then build nms function and feed data into it. - np_data = np.random.uniform(size=(1, 5000, 6)).astype("float32") - np_valid_count, np_inter_out = topi.vision.get_valid_counts(np_data, score_threshold) - data = tvm.placeholder((1, 5000, 6), name="data", dtype="float32") - valid_count = tvm.placeholder((1,), name="valid_count", dtype="int32") - result = topi.vision.nms(data, valid_count, iou_threshold=overlap_thresh, force_suppress=True, do_rearrange=True) - st = tvm.create_schedule(result.op) - f = tvm.build(st, [data, valid_count, result], "llvm") - ctx = tvm.cpu(0) - np_out = np.zeros(np_inter_out.shape) - aa = tvm.nd.array(np_inter_out.astype(data.dtype), ctx) - bb = tvm.nd.array(np_valid_count.astype(valid_count.dtype), ctx) - cc = tvm.nd.array(np_out.astype(result.dtype), ctx) - f(aa, bb, cc) - - - # This will fail - # We combine get_valid_counts and nms into box_nms - data = tvm.placeholder((1, 5000, 6), name="data", dtype="float32") - result = topi.vision.box_nms(data, iou_threshold=overlap_thresh, force_suppress=True, score_threshold=score_threshold) - st = tvm.create_schedule(result.op) - f = tvm.build(st, [data, result], "llvm") diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 3c0c3aa854d7..d77520c60cf8 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -8,7 +8,55 @@ from tvm.contrib.pickle_memoize import memoize from topi.util import get_const_tuple -from topi.vision import ssd, nms +from topi.vision import ssd, nms, get_valid_counts + + +def verify_get_valid_counts(dshape, score_threshold): + dtype = "float32" + batch_size, num_anchor, elem_length = dshape + np_data = np.random.uniform(size=dshape).astype(dtype) + np_out1 = np.zeros(shape=(batch_size,)) + np_out2 = np.zeros(shape=dshape).astype("float32") + for i in range(batch_size): + np_out1[i] = 0 + inter_idx = 0 + for j in range(num_anchor): + score = np_data[i, j, 1] + if score >= score_threshold: + for k in range(elem_length): + np_out2[i, inter_idx, k] = np_data[i, j, k] + np_out1[i] += 1 + inter_idx += 1 + + def check_device(device): + ctx = tvm.context(device, 0) + if not ctx.exist: + print("Skip because %s is not enabled" % device) + return + print("Running on target: %s" % device) + with tvm.target.create(device): + data = tvm.placeholder(dshape, name="data", dtype=dtype) + outs = get_valid_counts(data, score_threshold) + s = topi.generic.schedule_multibox_prior(outs) + + tvm_input_data = tvm.nd.array(np_data, ctx) + tvm_out1 = tvm.nd.array(np.zeros(np_out1.shape, dtype="int32"), ctx) + tvm_out2 = tvm.nd.array(np.zeros(np_out2.shape, dtype=dtype), ctx) + f = tvm.build(s, [data, outs[0], outs[1]], device) + f(tvm_input_data, tvm_out1, tvm_out2) + tvm.testing.assert_allclose(tvm_out1.asnumpy(), np_out1, rtol=1e-3) + tvm.testing.assert_allclose(tvm_out2.asnumpy(), np_out2, rtol=1e-3) + + for device in ['llvm']: + check_device(device) + + +def test_get_valid_counts(): + verify_get_valid_counts((1, 2500, 6), 0) + verify_get_valid_counts((1, 2500, 6), -1) + verify_get_valid_counts((3, 1000, 6), 0.15) + verify_get_valid_counts((16, 500, 6), 0.95) +>>>>>>> Add test for get_valid_counts def test_nms(): @@ -274,6 +322,7 @@ def test_proposal(): if __name__ == "__main__": + test_get_valid_counts() test_nms() test_multibox_prior() test_multibox_detection() From 9e0eee78a319fc705574473e4d86ce966093188c Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 15 Dec 2018 16:10:28 -0800 Subject: [PATCH 06/43] Add missing operators --- nnvm/include/nnvm/top/nn.h | 19 ++- nnvm/include/nnvm/top/tensor.h | 15 +++ nnvm/python/nnvm/frontend/mxnet.py | 62 +++++++++- nnvm/python/nnvm/top/transform.py | 4 + nnvm/python/nnvm/top/vision.py | 24 +++- nnvm/src/top/tensor/elemwise.cc | 109 ++++++++++++++++++ nnvm/src/top/tensor/transform.cc | 85 ++++++++++++++ nnvm/src/top/vision/nms.cc | 53 +++++++++ nnvm/tests/python/compiler/test_top_level4.py | 81 ++++++++++++- topi/python/topi/vision/nms.py | 11 +- topi/tests/python/test_topi_vision.py | 7 +- 11 files changed, 444 insertions(+), 26 deletions(-) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 143a9548f18a..543820e724bd 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -443,17 +443,28 @@ struct MultiBoxTransformLocParam : public dmlc::Parameter { + float score_threshold; + DMLC_DECLARE_PARAMETER(GetValidCountsParam) { + DMLC_DECLARE_FIELD(score_threshold).set_default(0.0) + .describe("Lower limit of score for valid bounding boxes."); + } +}; + struct NMSParam : public dmlc::Parameter { - float nms_threshold; + float iou_threshold; bool force_suppress; - int nms_topk; + int topk; + bool do_rearrange; DMLC_DECLARE_PARAMETER(NMSParam) { - DMLC_DECLARE_FIELD(nms_threshold).set_default(0.5) + DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) .describe("Suppress all detections regardless of class_id."); - DMLC_DECLARE_FIELD(nms_topk).set_default(-1) + DMLC_DECLARE_FIELD(topk).set_default(-1) .describe("Keep maximum top k detections before nms, -1 for no limit."); + DMLC_DECLARE_FIELD(do_rearrange).set_default(false) + .describe("Whether to move all valid bounding boxes to the top."); } }; diff --git a/nnvm/include/nnvm/top/tensor.h b/nnvm/include/nnvm/top/tensor.h index bed1b05984da..dc3c23a6198b 100644 --- a/nnvm/include/nnvm/top/tensor.h +++ b/nnvm/include/nnvm/top/tensor.h @@ -74,6 +74,21 @@ struct StridedSliceParam : public dmlc::Parameter { } }; +struct SliceAxisParam : public dmlc::Parameter { + int axis; + int begin; + int end; + + DMLC_DECLARE_PARAMETER(SliceAxisParam) { + DMLC_DECLARE_FIELD(axis) + .describe("Axis along which to be sliced."); + DMLC_DECLARE_FIELD(begin) + .describe("Index for begin of slice"); + DMLC_DECLARE_FIELD(end).set_default(0) + .describe("Index for end of the slice"); + } +}; + enum TypeFlag { kFloat32 = 0, kFloat64 = 1, diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py index 179e1126fd4d..29a6f65c6eb2 100644 --- a/nnvm/python/nnvm/frontend/mxnet.py +++ b/nnvm/python/nnvm/frontend/mxnet.py @@ -238,15 +238,15 @@ def _clip(inputs, attrs): def _contrib_multibox_detection(inputs, attrs): clip = _parse_bool_str(attrs, 'clip', default='True') - threshold = attrs.get('threshold') or 0.01 - nms_threshold = attrs.get('nms_threshold') or 0.5 + threshold = attrs.get('threshold', 0.01) + iou_threshold = attrs.get('nms_threshold', 0.5) force_suppress = _parse_bool_str(attrs, 'force_suppress', default='False') variances = tuple([float(x.strip()) for x in attrs.get('variances').strip('()').split(',')]) \ if attrs.get('variances') is not None else (0.1, 0.1, 0.2, 0.2) - nms_topk = attrs.get('nms_topk') or -1 + topk = attrs.get('nms_topk', -1) new_attrs0 = {'clip': clip, 'threshold': float(threshold), 'variances': variances} - new_attrs1 = {'nms_threshold': float(nms_threshold), 'force_suppress': force_suppress, - 'nms_topk': int(nms_topk)} + new_attrs1 = {'iou_threshold': float(iou_threshold), 'force_suppress': force_suppress, + 'topk': int(topk)} data, valid_count = _get_nnvm_op('multibox_transform_loc')(inputs[0], inputs[1], inputs[2], **new_attrs0) return _get_nnvm_op('nms')(data, valid_count, **new_attrs1) @@ -314,6 +314,47 @@ def _argmin(inputs, attrs): new_attrs['keepdims'] = _parse_bool_str(attrs, 'keepdims', default="False") return _get_nnvm_op(op_name)(*inputs, **new_attrs) +def _contrib_box_nms(inputs, attrs): + force_suppress = _parse_bool_str(attrs, 'force_suppress', default="False") + overlap_thresh = attrs.get('overlap_thresh', 0.5) + topk = attrs.get('topk', -1) + valid_thresh = attrs.get('valid_thresh', 0) + coord_start = attrs.get('coord_start', 2) + score_index = attrs.get('score_index', 1) + id_index = attrs.get('id_index', -1) + in_format = attrs.get('in_format', 'corner') + out_format = attrs.get('out_format', 'corner') + if int(coord_start) != 2: + _raise_not_supported('coord_start: %s' % coord_start, 'box_nms') + if int(score_index) != 1: + _raise_not_supported('score_index: %s' % score_index, 'box_nms') + if int(id_index) != -1 and int(id_index) != 0: + _raise_not_supported('id_index: %s' % id_index, 'box_nms') + if in_format != 'corner': + _raise_not_supported('in_format: %s' % in_format, 'box_nms') + if out_format != 'corner': + _raise_not_supported('out_format: %s' % out_format, 'box_nms') + + valid_counts, inter_out = \ + _get_nnvm_op('get_valid_counts')(inputs[0], score_threshold=valid_thresh) + nms_out = _get_nnvm_op('nms')(inter_out, valid_counts, + iou_threshold=overlap_thresh, + force_suppress=force_suppress, + topk=topk, do_rearrange=True) + return nms_out + +def _slice_like(inputs, attrs): + op_name = 'slice_like' + axis = attrs.get('axes', ()) + return _get_nnvm_op(op_name)(inputs[0], inputs[1], axis=axis) + +def _slice_axis(inputs, attrs): + op_name, new_attrs = 'slice_axis', {} + new_attrs['axis'] = attrs.get('axis') + new_attrs['begin'] = attrs.get('begin') + new_attrs['end'] = 0 if attrs.get('end') == "None" else attrs.get('end') + return _get_nnvm_op(op_name)(inputs[0], **new_attrs) + _identity_list = ['__add_scalar__', '__add_symbol__', '__div_scalar__', '__div_symbol__', '__mul_scalar__', '__mul_symbol__', '__pow_scalar__', '__rdiv_scalar__', '__rpow_scalar__', @@ -322,7 +363,7 @@ def _argmin(inputs, attrs): 'broadcast_sub', 'broadcast_to', 'cast', 'elemwise_add', 'elemwise_div', 'elemwise_mul', 'elemwise_sub', 'exp', 'flatten', 'log', 'log_softmax', 'max', 'min', 'negative', - 'ones_like', 'relu', 'sigmoid', 'slice_like', 'softmax', + 'ones_like', 'relu', 'sigmoid', 'softmax', 'sum', 'tanh', 'transpose', 'zeros_like', 'gather_nd', 'reshape_like', 'where'] @@ -334,6 +375,13 @@ def _argmin(inputs, attrs): '_plus_scalar' : _rename('__add_scalar__'), '_rdiv_scalar' : _rename('__rdiv_scalar__'), '_rminus_scalar': _rename('__rsub_scalar__'), + '_equal_scalar' : _rename('__equal_scalar__'), + '_not_equal_scalar': _rename('__not_equal_scalar__'), + '_greater_scalar': _rename('__greater_scalar__'), + '_greater_equal_scalar': _rename('__greater_equal_scalar__'), + '_less_scalar': _rename('__less_scalar__'), + '_less_equal_scalar': _rename('__less_equal_scalar__'), + '_contrib_box_nms' : _contrib_box_nms, '_contrib_MultiBoxPrior' : _rename('multibox_prior'), '_contrib_MultiBoxDetection' : _contrib_multibox_detection, '_minimum' : _minimum, @@ -360,6 +408,8 @@ def _argmin(inputs, attrs): 'Reshape' : _reshape, 'slice' : _slice, 'SliceChannel' : _split, + 'slice_axis' : _slice_axis, + 'slice_like' : _slice_like, 'split' : _split, 'Softmax' : _rename('softmax'), 'SoftmaxActivation' : _softmax_activation, diff --git a/nnvm/python/nnvm/top/transform.py b/nnvm/python/nnvm/top/transform.py index 8fde9632a8af..d6c85ea283d3 100644 --- a/nnvm/python/nnvm/top/transform.py +++ b/nnvm/python/nnvm/top/transform.py @@ -83,6 +83,10 @@ def schedule_concatenate(_, outs, target): reg.register_pattern("slice_like", OpPattern.INJECTIVE) reg.register_schedule("slice_like", _fschedule_injective) +# slice_axis +reg.register_pattern("slice_axis", OpPattern.INJECTIVE) +reg.register_schedule("slice_axis", _fschedule_injective) + # where reg.register_pattern("where", OpPattern.INJECTIVE) reg.register_schedule("where", _fschedule_injective) diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index 1b20baab47c3..5df1bb34aa3a 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -60,6 +60,21 @@ def compute_multibox_transform_loc(attrs, inputs, _): reg.register_pattern("multibox_detection", OpPattern.OPAQUE) +# Get valid number of anchor boxes +@reg.register_schedule("get_valid_counts") +def schedule_get_valid_counts(_, outs, target): + """Schedule definition of get_valid_counts""" + with tvm.target.create(target): + return topi.generic.schedule_get_valid_counts(outs) + +@reg.register_compute("get_valid_counts") +def compute_get_valid_counts(attrs, inputs, _): + """Compute definition of get_valid_counts""" + score_threshold = attrs.get_float("score_threshold") + return topi.vision.get_valid_counts(inputs[0], score_threshold) + +reg.register_pattern("get_valid_counts", OpPattern.OPAQUE) + # non-maximum suppression @reg.register_schedule("nms") def schedule_nms(_, outs, target): @@ -70,11 +85,12 @@ def schedule_nms(_, outs, target): @reg.register_compute("nms") def compute_nms(attrs, inputs, _): """Compute definition of nms""" - nms_threshold = attrs.get_float('nms_threshold') + iou_threshold = attrs.get_float('iou_threshold') force_suppress = attrs.get_bool('force_suppress') - nms_topk = attrs.get_int('nms_topk') + topk = attrs.get_int('topk') + do_rearrange = attrs.get_bool('do_rearrange') - return topi.vision.nms(inputs[0], inputs[1], nms_threshold, - force_suppress, nms_topk) + return topi.vision.nms(inputs[0], inputs[1], iou_threshold, + force_suppress, topk, do_rearrange) reg.register_pattern("nms", OpPattern.OPAQUE) diff --git a/nnvm/src/top/tensor/elemwise.cc b/nnvm/src/top/tensor/elemwise.cc index 2d9813e22131..9c1687beab35 100644 --- a/nnvm/src/top/tensor/elemwise.cc +++ b/nnvm/src/top/tensor/elemwise.cc @@ -806,6 +806,115 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__) }; }); +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__equal_scalar__) +.describe(R"code(Tensor equal scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + + Tensor out = topi::cast( + binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x == y; }), + out_info[0]->dtype + ); + return Array{ out }; +}) +.set_attr("FGradient", MakeZeroGradNodes); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__not_equal_scalar__) +.describe(R"code(Tensor not equal scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + Tensor out = topi::cast( + binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x != y; }), + out_info[0]->dtype + ); + return Array{ out }; +}) +.set_attr("FGradient", MakeZeroGradNodes); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__greater_scalar__) +.describe(R"code(Tensor greater scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + Tensor out = topi::cast( + binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x > y; }), + out_info[0]->dtype + ); + return Array{ out }; +}) +.set_attr("FGradient", MakeZeroGradNodes); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__greater_equal_scalar__) +.describe(R"code(Tensor greater equal scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + Tensor out = topi::cast( + binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x >= y; }), + out_info[0]->dtype + ); + return Array{ out }; +}) +.set_attr("FGradient", MakeZeroGradNodes); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__less_scalar__) +.describe(R"code(Tensor less scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + Tensor out = topi::cast( + binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x < y; }), + out_info[0]->dtype + ); + return Array{ out }; +}) +.set_attr("FGradient", MakeZeroGradNodes); + +NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__less_equal_scalar__) +.describe(R"code(Tensor less equal scalar + +)code" NNVM_ADD_FILELINE) +.set_support_level(3) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + Tensor out = topi::cast( + binary_scalar_op(attrs, inputs[0], + [](Expr x, Expr y) { return x <= y; }), + out_info[0]->dtype + ); + return Array{ out }; +}) +.set_attr("FGradient", MakeZeroGradNodes); + DMLC_REGISTER_PARAMETER(ElementWiseReduceParam); NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum) diff --git a/nnvm/src/top/tensor/transform.cc b/nnvm/src/top/tensor/transform.cc index 9d259ae77d9b..4f09062ac607 100644 --- a/nnvm/src/top/tensor/transform.cc +++ b/nnvm/src/top/tensor/transform.cc @@ -1283,6 +1283,91 @@ NNVM_REGISTER_OP(slice_like) }) .set_support_level(4); +// SliceAxis +DMLC_REGISTER_PARAMETER(SliceAxisParam); + +inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, + std::vector* in_attrs, + std::vector* out_attrs) { + const SliceAxisParam& param = nnvm::get(attrs.parsed); + const TShape& src_shape = in_attrs->at(0); + int axis = param.axis; + int begin = param.begin; + int end = param.end; + + if (axis < 0) { + axis += src_shape.ndim(); + } + if (begin < 0) { + begin += src_shape[axis]; + } + if (end <= 0) { + end += src_shape[axis]; + } + CHECK_LT(begin, end) + << "Begin index must be smaller than end index: " + << begin << " vs " << end; + + TShape out_shape(src_shape); + out_shape[axis] = end - begin; + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape); + return true; +} + +NNVM_REGISTER_OP(slice_axis) +.describe(R"code(Slices along a given axis. +Returns an array slice along a given axis starting from +the begin index to the end index. +)code" NNVM_ADD_FILELINE) +.add_argument("data", "Tensor", "Input data to be sliced.") +.set_num_outputs(1) +.set_num_inputs(1) +.add_arguments(SliceAxisParam::__FIELDS__()) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", ParamGetAttrDict) +.set_attr("FInferShape", SliceAxisShape) +.set_attr("FInferType", ElemwiseType<1, 1>) +.set_attr("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>) +.set_attr( + "FTVMCompute", [](const NodeAttrs& attrs, + const Array& inputs, + const Array& out_info) { + const SliceAxisParam& param = nnvm::get(attrs.parsed); + const Array src_shape = inputs[0]->shape; + Array begin_idx, end_idx, strides; + int axis = param.axis; + int begin = param.begin; + int end = param.end; + + if (axis < 0) { + axis += src_shape.size(); + } + if (begin < 0) { + begin += topi::GetConstInt(src_shape[axis]); + } + if (end <= 0) { + end += topi::GetConstInt(src_shape[axis]); + } + for (size_t i = 0; i < src_shape.size(); ++i) { + begin_idx.push_back(make_const(tvm::Int(32), 0)); + strides.push_back(make_const(tvm::Int(32), 1)); + } + end_idx = Array(src_shape); + begin_idx.Set(axis, make_const(tvm::Int(32), begin)); + end_idx.Set(axis, make_const(tvm::Int(32), end)); + + return Array{ + topi::strided_slice(inputs[0], + GetIntArray(begin_idx), + GetIntArray(end_idx), + GetIntArray(strides)) + }; +}) +.set_attr("FListInputNames", [](const NodeAttrs& attrs) { + return std::vector{"data"}; +}) +.set_support_level(4); + // where inline bool WhereShape(const nnvm::NodeAttrs& attrs, std::vector* in_attrs, diff --git a/nnvm/src/top/vision/nms.cc b/nnvm/src/top/vision/nms.cc index 2680b894255b..a74a135175ba 100644 --- a/nnvm/src/top/vision/nms.cc +++ b/nnvm/src/top/vision/nms.cc @@ -19,6 +19,59 @@ using compiler::FTVMCompute; using tvm::Tensor; using tvm::Array; +DMLC_REGISTER_PARAMETER(GetValidCountsParam); + +bool GetValidCountsShape(const NodeAttrs& attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + TShape dshape = in_attrs->at(0); + TShape vshape = TShape({dshape[0]}); + CHECK_EQ(dshape.ndim(), 3U) << "Input data should be 3-D."; + out_attrs->clear(); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, vshape); + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 1, dshape); + return true; +} + +inline bool GetValidCountsInferType(const NodeAttrs &attrs, + std::vector *in_attrs, + std::vector *out_attrs) { + DTYPE_ASSIGN(out_attrs->at(0), static_cast(kInt32)); + DTYPE_ASSIGN(out_attrs->at(1), in_attrs->at(0)) + return true; +} + +inline bool GetValidCountsInferLayout(const NodeAttrs& attrs, + std::vector *ilayouts, + const std::vector *last_ilayouts, + std::vector *olayouts) { + static const Layout kNCHW("NCHW"); + CHECK_EQ(ilayouts->size(), 1U); + CHECK_EQ(olayouts->size(), 2U); + NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW); + return true; +} + +NNVM_REGISTER_OP(get_valid_counts) +.describe(R"doc("Get valid count of bounding boxes given +a score threshold. Also moves valid boxes to the top of +input data." +)doc" NNVM_ADD_FILELINE) +.set_num_inputs(1) +.set_num_outputs(2) +.set_attr_parser(ParamParser) +.set_attr("FGetAttrDict", + ParamGetAttrDict) +.add_arguments(GetValidCountsParam::__FIELDS__()) +.add_argument("data", "Tensor", "Input data.") +.set_attr("FListInputNames", [](const NodeAttrs& attrs) { + return std::vector{"data"}; +}) +.set_attr("FInferShape", GetValidCountsShape) +.set_attr("FInferType", GetValidCountsInferType) +.set_attr("FCorrectLayout", GetValidCountsInferLayout) +.set_support_level(4); + DMLC_REGISTER_PARAMETER(NMSParam); bool NMSShape(const NodeAttrs& attrs, diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index fc4e62fb7156..aab81565c3ff 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -573,15 +573,56 @@ def test_multibox_transform_loc(): out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype)) tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5) +def verify_get_valid_counts(dshape, score_threshold): + dtype = "float32" + batch_size, num_anchor, elem_length = dshape + np_data = np.random.uniform(size=dshape).astype(dtype) + np_out1 = np.zeros(shape=(batch_size,)) + np_out2 = np.zeros(shape=dshape).astype(dtype) + for i in range(batch_size): + np_out1[i] = 0 + inter_idx = 0 + for j in range(num_anchor): + score = np_data[i, j, 1] + if score >= score_threshold: + for k in range(elem_length): + np_out2[i, inter_idx, k] = np_data[i, j, k] + np_out1[i] += 1 + inter_idx += 1 + if j >= np_out1[i]: + for k in range(elem_length): + np_out2[i, j, k] = -1 + + target = "llvm" + ctx = tvm.cpu() + data = sym.Variable("data", dtype=dtype) + valid_counts, inter_data = sym.get_valid_counts(data, score_threshold=score_threshold) + out = sym.Group([valid_counts, inter_data]) + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input("data", np_data) + m.run() + out1 = m.get_output(0, tvm.nd.empty(np_out1.shape, "int32")) + out2 = m.get_output(1, tvm.nd.empty(dshape, dtype)) + tvm.testing.assert_allclose(out1.asnumpy(), np_out1, rtol=1e-3) + tvm.testing.assert_allclose(out2.asnumpy(), np_out2, rtol=1e-3) + + +def test_get_valid_counts(): + verify_get_valid_counts((1, 2500, 6), 0) + verify_get_valid_counts((1, 2500, 6), -1) + verify_get_valid_counts((3, 1000, 6), 0.55) + verify_get_valid_counts((16, 500, 6), 0.95) + def test_nms(): dshape = (1, 5, 6) data = sym.Variable("data") valid_count = sym.Variable("valid_count", dtype="int32") - nms_threshold = 0.7 + iou_threshold = 0.7 force_suppress = True - nms_topk = 2 - out = sym.nms(data=data, valid_count=valid_count, nms_threshold=nms_threshold, - force_suppress=force_suppress, nms_topk=nms_topk) + topk = 2 + out = sym.nms(data=data, valid_count=valid_count, iou_threshold=iou_threshold, + force_suppress=force_suppress, topk=topk) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], @@ -656,6 +697,35 @@ def test_slice_like(): axis = (2, 3) verify_slice_like(np_data, np_shape_like, axis) +def verify_slice_axis(dshape, axis, begin, end): + data = sym.Variable("data") + net = sym.slice_axis(data, axis=axis, begin=begin, end=end) + if axis < 0: + axis += len(dshape) + if begin < 0: + begin += dshape[axis] + if end <= 0: + end += dshape[axis] + np_data = np.random.uniform(size=dshape) + slc = [slice(None)] * len(dshape) + slc[axis] = slice(begin, end) + np_out = np_data[slc] + + dtype = "float32" + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(net, target, {"data": dshape}, dtype=dtype) + m = graph_runtime.create(graph, lib, ctx) + m.set_input("data", np_data) + m.run() + out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) + +def test_slice_axis(): + verify_slice_axis((1, 2, 3, 4), 3, 0, 2) + verify_slice_axis((100, 50), -1, 1, -1) + verify_slice_axis((20,), -1, -9, -3) + verify_slice_axis((20, 30, 40), 1, 5, 0) + def verify_where(condition, x, y): dtype = "float32" if len(condition.shape) == 1: @@ -710,6 +780,7 @@ def test_argmax(): np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5) if __name__ == "__main__": + test_get_valid_counts() test_reshape() test_broadcast() test_reduce() @@ -726,8 +797,10 @@ def test_argmax(): test_flip() test_multibox_prior() test_multibox_transform_loc() + test_get_valid_counts() test_nms() test_slice_like() + test_slice_axis() test_where() test_argmax() print(nnvm.compiler.engine.dump()) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 0b113733f8da..f937c8779e2f 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -74,14 +74,15 @@ def hybrid_get_valid_counts(data, score_threshold): data.dtype) for i in parallel(batch_size): valid_count[i] = 0 - inter_idx = 0 for j in range(num_anchors): score = data[i, j, 1] if score >= score_threshold: for k in range(box_data_length): - out_tensor[i, inter_idx, k] = data[i, j, k] + out_tensor[i, valid_count[i], k] = data[i, j, k] valid_count[i] += 1 - inter_idx += 1 + if j >= valid_count[i]: + for k in range(box_data_length): + out_tensor[i, j, k] = -1.0 return valid_count, out_tensor @tvm.target.generic_func @@ -168,9 +169,7 @@ def hybrid_nms(data, sorted_index, valid_count, for k in range(valid_count[i]): check_iou = 0 if k > j and output[i, k, 0] >= 0: - if force_suppress: - check_iou = 1 - elif output[i, j, 0] == output[i, k, 0]: + if force_suppress or output[i, j, 0] == output[i, k, 0]: check_iou = 1 if check_iou > 0: batch_idx = i diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index d77520c60cf8..517d1f7ee80b 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -16,7 +16,7 @@ def verify_get_valid_counts(dshape, score_threshold): batch_size, num_anchor, elem_length = dshape np_data = np.random.uniform(size=dshape).astype(dtype) np_out1 = np.zeros(shape=(batch_size,)) - np_out2 = np.zeros(shape=dshape).astype("float32") + np_out2 = np.zeros(shape=dshape).astype(dtype) for i in range(batch_size): np_out1[i] = 0 inter_idx = 0 @@ -27,6 +27,9 @@ def verify_get_valid_counts(dshape, score_threshold): np_out2[i, inter_idx, k] = np_data[i, j, k] np_out1[i] += 1 inter_idx += 1 + if j >= np_out1[i]: + for k in range(elem_length): + np_out2[i, j, k] = -1.0 def check_device(device): ctx = tvm.context(device, 0) @@ -54,7 +57,7 @@ def check_device(device): def test_get_valid_counts(): verify_get_valid_counts((1, 2500, 6), 0) verify_get_valid_counts((1, 2500, 6), -1) - verify_get_valid_counts((3, 1000, 6), 0.15) + verify_get_valid_counts((3, 1000, 6), 0.55) verify_get_valid_counts((16, 500, 6), 0.95) >>>>>>> Add test for get_valid_counts From 037ca23dc63bcbfdc66b53719ebffe7cd8a19f6c Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 15 Dec 2018 19:36:37 -0800 Subject: [PATCH 07/43] Add id_index to box_nms op --- nnvm/include/nnvm/top/nn.h | 3 +++ nnvm/python/nnvm/frontend/mxnet.py | 3 ++- nnvm/python/nnvm/top/vision.py | 4 +++- topi/python/topi/vision/nms.py | 20 +++++++++++++++----- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 543820e724bd..1513be122b41 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -455,6 +455,7 @@ struct NMSParam : public dmlc::Parameter { float iou_threshold; bool force_suppress; int topk; + int id_index; bool do_rearrange; DMLC_DECLARE_PARAMETER(NMSParam) { DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) @@ -463,6 +464,8 @@ struct NMSParam : public dmlc::Parameter { .describe("Suppress all detections regardless of class_id."); DMLC_DECLARE_FIELD(topk).set_default(-1) .describe("Keep maximum top k detections before nms, -1 for no limit."); + DMLC_DECLARE_FIELD(id_index).set_default(0) + .describe("Keep maximum top k detections before nms, -1 for no limit."); DMLC_DECLARE_FIELD(do_rearrange).set_default(false) .describe("Whether to move all valid bounding boxes to the top."); } diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py index 29a6f65c6eb2..8467beb61e4f 100644 --- a/nnvm/python/nnvm/frontend/mxnet.py +++ b/nnvm/python/nnvm/frontend/mxnet.py @@ -340,7 +340,8 @@ def _contrib_box_nms(inputs, attrs): nms_out = _get_nnvm_op('nms')(inter_out, valid_counts, iou_threshold=overlap_thresh, force_suppress=force_suppress, - topk=topk, do_rearrange=True) + topk=topk, id_index=id_index, + do_rearrange=True) return nms_out def _slice_like(inputs, attrs): diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index 5df1bb34aa3a..f5f41d33e363 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -88,9 +88,11 @@ def compute_nms(attrs, inputs, _): iou_threshold = attrs.get_float('iou_threshold') force_suppress = attrs.get_bool('force_suppress') topk = attrs.get_int('topk') + id_index = attrs.get_int('id_index') do_rearrange = attrs.get_bool('do_rearrange') return topi.vision.nms(inputs[0], inputs[1], iou_threshold, - force_suppress, topk, do_rearrange) + force_suppress, topk, id_index, + do_rearrange) reg.register_pattern("nms", OpPattern.OPAQUE) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index f937c8779e2f..43de07c3dc76 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -76,7 +76,7 @@ def hybrid_get_valid_counts(data, score_threshold): valid_count[i] = 0 for j in range(num_anchors): score = data[i, j, 1] - if score >= score_threshold: + if score > score_threshold: for k in range(box_data_length): out_tensor[i, valid_count[i], k] = data[i, j, k] valid_count[i] += 1 @@ -112,7 +112,8 @@ def get_valid_counts(data, score_threshold=0): @hybrid.script def hybrid_nms(data, sorted_index, valid_count, - iou_threshold, force_suppress, topk): + iou_threshold, force_suppress, + topk, id_index): """Hybrid routing for non-maximum suppression. Parameters @@ -137,6 +138,9 @@ def hybrid_nms(data, sorted_index, valid_count, topk : tvm.const Keep maximum top k detections before nms, -1 for no limit. + id_index : tvm.const + index of the class categories, -1 to disable. + Returns ------- valid_count : tvm.Tensor or numpy NDArray @@ -169,7 +173,9 @@ def hybrid_nms(data, sorted_index, valid_count, for k in range(valid_count[i]): check_iou = 0 if k > j and output[i, k, 0] >= 0: - if force_suppress or output[i, j, 0] == output[i, k, 0]: + if force_suppress: + check_iou = 1 + elif id_index < 0 or output[i, j, 0] == output[i, k, 0]: check_iou = 1 if check_iou > 0: batch_idx = i @@ -204,7 +210,7 @@ def hybrid_nms(data, sorted_index, valid_count, @tvm.target.generic_func def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, - topk=-1, do_rearrange=False): + topk=-1, id_index=0, do_rearrange=False): """Non-maximum suppression operator for object detection. Parameters @@ -226,6 +232,9 @@ def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, topk : optional, int Keep maximum top k detections before nms, -1 for no limit. + id_index : optional, int + index of the class categories, -1 to disable. + do_rearrange : optional, boolean Whether to move all valid bounding boxes to the top. @@ -282,7 +291,8 @@ def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, out = hybrid_nms(data, sort_tensor, valid_count, tvm.const(iou_threshold, dtype="float32"), tvm.const(force_suppress, dtype="bool"), - tvm.const(topk, dtype="int32")) + tvm.const(topk, dtype="int32"), + tvm.const(id_index, dtype="int32")) if do_rearrange: out = hybrid_rearrange_out(out) From 8f9e9e2c6b52cdf6c2efdbce846d6af2dab7c1b0 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 16 Dec 2018 07:00:48 +0000 Subject: [PATCH 08/43] Add l2_normalize to from_mxnet --- nnvm/python/nnvm/frontend/mxnet.py | 10 ++++++++++ topi/include/topi/nn/l2_normalize.h | 7 ++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py index 8467beb61e4f..dffc8d960c88 100644 --- a/nnvm/python/nnvm/frontend/mxnet.py +++ b/nnvm/python/nnvm/frontend/mxnet.py @@ -356,6 +356,15 @@ def _slice_axis(inputs, attrs): new_attrs['end'] = 0 if attrs.get('end') == "None" else attrs.get('end') return _get_nnvm_op(op_name)(inputs[0], **new_attrs) +def _l2_normalize(inputs, attrs): + op_name, new_attrs = 'l2_normalize', {} + mode = attrs.get('mode', 'instance') + if mode != 'channel': + _raise_not_supported('mode: %s' % mode, 'L2Normalization') + new_attrs['eps'] = attrs.get('eps', 1e-10) + new_attrs['axis'] = 1 + return _get_nnvm_op(op_name)(inputs[0], **new_attrs) + _identity_list = ['__add_scalar__', '__add_symbol__', '__div_scalar__', '__div_symbol__', '__mul_scalar__', '__mul_symbol__', '__pow_scalar__', '__rdiv_scalar__', '__rpow_scalar__', @@ -404,6 +413,7 @@ def _slice_axis(inputs, attrs): 'Flatten' : _rename('flatten'), 'FullyConnected': _dense, 'LeakyReLU' : _leaky_relu, + 'L2Normalization' : _l2_normalize, 'Pooling' : _pooling, 'Pooling_v1' : _pooling, 'Reshape' : _reshape, diff --git a/topi/include/topi/nn/l2_normalize.h b/topi/include/topi/nn/l2_normalize.h index a9fd49cbee64..e022d76871a0 100644 --- a/topi/include/topi/nn/l2_normalize.h +++ b/topi/include/topi/nn/l2_normalize.h @@ -30,7 +30,12 @@ inline Tensor l2_normalize(const Tensor& data, const Array& axis, std::string name = "tensor", std::string tag = "l2_normalize") { - CHECK_EQ(data->shape.size(), 4) << "L2 normalization requires 4-D input"; + for (size_t i = 0; i < axis.size(); ++i) { + int ax = topi::detail::GetConstInt(axis[i]); + CHECK_LT(ax, data->shape.size()) << + "Axis " << ax << " exceeds input data dim " << + data->shape.size(); + } auto input_shape = data->shape; Tensor dot_value = topi::power(data, static_cast(2.0)); Tensor sum_value = topi::sum(dot_value, axis, true); From 5c5e6f791190c5e8a7109ae8393ba7a7bdc104bb Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 18 Dec 2018 17:05:42 -0800 Subject: [PATCH 09/43] Modify SSD tutorial --- topi/python/topi/vision/nms.py | 8 +- topi/python/topi/vision/ssd/multibox.py | 226 +++++++++++++----------- tutorials/nnvm/deploy_ssd.py | 2 +- 3 files changed, 123 insertions(+), 113 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 43de07c3dc76..1dddffc0a2f4 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -21,13 +21,13 @@ def hybrid_rearrange_out(data): Transformed NMS output. 3-D tensor with shape [batch_size, num_anchors, 6]. """ - output = output_tensor((data.shape[0], - data.shape[1], - data.shape[2],), - data.dtype) batch_size = data.shape[0] num_anchors = data.shape[1] elem_length = data.shape[2] + output = output_tensor((batch_size, + num_anchors, + elem_length), + data.dtype) for i in parallel(batch_size): valid_idx = 0 diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index f1de42430dd6..87a4a84c5ab5 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -1,75 +1,70 @@ -# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments +# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable """SSD multibox operators""" from __future__ import absolute_import as _abs -import math import tvm -from tvm import api +from tvm import hybrid +from tvm.intrin import exp, sqrt import topi from ..nms import nms -def multibox_prior_ir(data, out, sizes, ratios, steps, offsets): - """Low level IR routing for multibox_prior operator. +@hybrid.script +def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): + """Hybrid routing for multibox_prior operator. Parameters ---------- - data : Buffer - Input data buffer. + data : tvm.Tensor or numpy NDArray + 4-D tensor with shape [batch, channel, height, width]] - out : Buffer - Output buffer. + sizes : tvm.ndarray + 1-D tensor of sizes for anchor boxes. - sizes : tuple of float - Tuple of sizes for anchor boxes. - - ratios : tuple of float - Tuple of ratios for anchor boxes. + ratios : tvm.ndarray + 1-D tensor of ratios for anchor boxes. - steps : Tuple of float - Priorbox step across y and x, -1 for auto calculation. + steps : tvm.ndarray + 1-D tensor of priorbox step across y and x, -1 for auto calculation. - offsets : tuple of int - Priorbox center offsets, y and x respectively. + offsets : tvm.ndarray + 1-D tensor priorbox center offsets, y and x respectively. Returns ------- - stmt : Stmt - The result IR statement. + output : tvm.Tensor or numpy NDArray + 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ - ib = tvm.ir_builder.create() - p_out = ib.buffer_ptr(out) - in_height = data.shape[2] - in_width = data.shape[3] - num_sizes = len(sizes) - num_ratios = len(ratios) - size_ratio_concat = sizes + ratios + in_height, in_width = data.shape[2], data.shape[3] + num_sizes, num_ratios = sizes.shape[0], ratios.shape[0] + num_boxes = in_height * in_width * (num_sizes + num_ratios - 1) + output = output_tensor((1, num_boxes, 4), data.dtype) steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width offset_h = offsets[0] offset_w = offsets[1] - with ib.for_range(0, in_height, for_type="parallel", name="i") as i: + for i in parallel(in_height): center_h = (i + offset_h) * steps_h - with ib.for_range(0, in_width, name="j") as j: + for j in range(in_width): center_w = (j + offset_w) * steps_w for k in range(num_sizes + num_ratios - 1): - w = tvm.if_then_else(k < num_sizes, - size_ratio_concat[k] * in_height / in_width / 2.0, - size_ratio_concat[0] * in_height / in_width * - math.sqrt(size_ratio_concat[k + 1]) / 2.0) - h = tvm.if_then_else( - k < num_sizes, size_ratio_concat[k] / 2.0, - size_ratio_concat[0] / math.sqrt(size_ratio_concat[k + 1]) / 2.0) - count = (i * in_width * (num_sizes + num_ratios - 1) + - j * (num_sizes + num_ratios - 1) + k) * 4 - p_out[count] = center_w - w - p_out[count + 1] = center_h - h - p_out[count + 2] = center_w + w - p_out[count + 3] = center_h + h - - return ib.get() + if k < num_sizes: + w = sizes[k] * in_height / in_width / 2.0 + h = sizes[k] / 2.0 + else: + w = sizes[0] * in_height / in_width \ + * sqrt(ratios[k - num_sizes + 1]) / 2.0 + h = sizes[0] * sqrt(ratios[k - num_sizes + 1]) / 2.0 + count = i * in_width * (num_sizes + num_ratios - 1) \ + + j * (num_sizes + num_ratios - 1) + k + output[0, count, 0] = center_w - w + output[0, count, 1] = center_h - h + output[0, count, 2] = center_w + w + output[0, count, 3] = center_h + h + + return output @tvm.target.generic_func @@ -101,48 +96,62 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, out : tvm.Tensor 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ - num_sizes = len(sizes) - num_ratios = len(ratios) - oshape = (1, data.shape[2] * data.shape[3] * (num_sizes + num_ratios - 1), 4) - out = tvm.extern(oshape, [data], lambda ins, outs: - multibox_prior_ir(ins[0], outs[0], sizes, ratios, steps, offsets), - tag="multibox_prior") + out = hybrid_multibox_prior(data, sizes, ratios, steps, offsets) if clip: out = topi.clip(out, 0, 1) return out - -def transform_loc_ir(cls_prob, loc_pred, anchor, valid_count, out, clip, threshold, variances): - """Low level IR routing for transform location in multibox_detection operator. +@hybrid.script +def _hybridy_transform_loc(box, pred_loc, variance, clip): + """Transform prior anchor box to output box through location predictions. + """ + al, at, ar, ab = box[0], box[1], box[2], box[3] + px, py, pw, ph = pred_loc[0], pred_loc[1], \ + pred_loc[2], pred_loc[3] + vx, vy, vw, vh = variance[0], variance[1], \ + variance[2], variance[3] + aw = ar - al + ah = ab - at + ax = (al + ar) / 2.0 + ay = (at + ab) / 2.0 + ox = px * vx * aw + ax + oy = py * vy * ah + ay + ow = exp(pw * vw) * aw / 2.0 + oh = exp(ph * vh) * ah / 2.0 + out_l = max(0, min(1, ox - ow)) if clip else ox - ow + out_t = max(0, min(1, oy - oh)) if clip else oy - oh + out_r = max(0, min(1, ox + ow)) if clip else ox + ow + out_b = max(0, min(1, oy + oh)) if clip else oy + oh + return out_l, out_t, out_r, out_b + +@hybrid.script +def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, + clip, threshold, variances): + """Hybrid routing for transform location in multibox_detection operator. Parameters ---------- - cls_prob : Buffer - Buffer of class probabilities. - - loc_pred : Buffer - Buffer of location regression predictions. + cls_prob : tvm.Tensor or numpy NDArray + 3-D tensor of class probabilities. - anchor : Buffer - Buffer of prior anchor boxes. + loc_pred : tvm.Tensor or numpy NDArray + 3-D tensor of location regression predictions. - valid_count : Buffer - Buffer of number of valid output boxes. + anchor : tvm.Tensor or numpy NDArray + 3-D tensor of prior anchor boxes. - out : Buffer - Output buffer. - - clip : boolean + clip : tvm.const Whether to clip out-of-boundary boxes. - threshold : float + threshold : tvm.const Threshold to be a positive prediction. - variances : tuple of float + variances : tvm.ndarray Variances to be decoded from box regression output. Returns ------- +<<<<<<< HEAD stmt : Stmt The result IR statement. """ @@ -169,21 +178,26 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, tvm.if_then_else(clip, tvm.max(0, tvm.min(1, oy - oh)), oy - oh), \ tvm.if_then_else(clip, tvm.max(0, tvm.min(1, ox + ow)), ox + ow), \ tvm.if_then_else(clip, tvm.max(0, tvm.min(1, oy + oh)), oy + oh) +======= + out_loc : tvm.Tensor or numpy NDArray + 3-D tensor of transformed location. +>>>>>>> Modify SSD tutorial + valid_count : tvm.Tensor or numpy NDArray + 1_d tensor of valid counts for boxes. + """ batch_size = cls_prob.shape[0] num_classes = cls_prob.shape[1] num_anchors = cls_prob.shape[2] + out_loc = output_tensor((batch_size, num_anchors, 6), + loc_pred.dtype) + valid_count = output_tensor((batch_size,), "int32") - ib = tvm.ir_builder.create() - p_cls_prob = ib.buffer_ptr(cls_prob) - p_loc_pred = ib.buffer_ptr(loc_pred) - p_anchor = ib.buffer_ptr(anchor) - p_valid_count = ib.buffer_ptr(valid_count) - p_out = ib.buffer_ptr(out) - with ib.for_range(0, batch_size, for_type="parallel", name="n") as n: - p_valid_count[n] = 0 - with ib.for_range(0, num_anchors, name="i") as i: + for i in parallel(batch_size): + valid_count[i] = 0 + for j in range(num_anchors): # Find the predicted class id and probability +<<<<<<< HEAD score = ib.allocate('float32', (1,), name="score", scope="local") cls_id = ib.allocate('int32', (1,), name="id", scope="local") score[0] = -1.0 @@ -195,21 +209,30 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, score[0] = tvm.max(temp, score[0]) with ib.if_scope(tvm.all(cls_id[0] > 0, score[0] < threshold)): cls_id[0] = 0 +======= + score = -1.0 + cls_id = 0 + for k in range(num_classes): + if k > 0: + temp = cls_prob[i, k, j] + cls_id = j if temp > score else cls_id + score = max(temp, score) + if cls_id > 0 and score < threshold: + cls_id = 0 +>>>>>>> Modify SSD tutorial # [id, prob, xmin, ymin, xmax, ymax] # Remove background, restore original id - with ib.if_scope(cls_id[0] > 0): - out_base_idx = n * num_anchors * 6 + p_valid_count[n] * 6 - p_out[out_base_idx] = cls_id[0] - 1.0 - p_out[out_base_idx + 1] = score[0] - offset = i * 4 - p_out[out_base_idx + 2], p_out[out_base_idx + 3], p_out[out_base_idx + 4], \ - p_out[out_base_idx + 5] = transform_loc(p_loc_pred, n * num_anchors * 4 + offset, - p_anchor, offset, clip, variances[0], - variances[1], variances[2], variances[3]) - p_valid_count[n] += 1 - - return ib.get() - + if cls_id > 0: + out_loc[i, valid_count[i], 0] = cls_id - 1.0 + out_loc[i, valid_count[i], 1] = score + out_coord = _hybridy_transform_loc(anchor[j], loc_pred[i, j], + variances, clip) + out_loc[i, valid_count[i], 2] = out_coord[0] + out_loc[i, valid_count[i], 3] = out_coord[1] + out_loc[i, valid_count[i], 4] = out_coord[2] + out_loc[i, valid_count[i], 5] = out_coord[3] + + return out_loc, valid_count @tvm.target.generic_func def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, @@ -240,24 +263,11 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 ------- ret : tuple of tvm.Tensor """ - batch_size = cls_prob.shape[0] - num_anchors = anchor.shape[1] - oshape = (batch_size, num_anchors, 6) - # Define data alignment for intermediate buffer - valid_count_dtype = "int32" - valid_count_buf = api.decl_buffer((batch_size,), valid_count_dtype, - "valid_count_buf", data_alignment=4) - out_buf = api.decl_buffer(oshape, cls_prob.dtype, "out_buf", data_alignment=8) - valid_count, out = \ - tvm.extern([(batch_size,), oshape], - [cls_prob, loc_pred, anchor], - lambda ins, outs: transform_loc_ir( - ins[0], ins[1], ins[2], outs[0], outs[1], clip, threshold, variances), - dtype=[valid_count_dtype, cls_prob.dtype], - out_buffers=[valid_count_buf, out_buf], - tag="multibox_transform_loc") - return [out, valid_count] - + out, valid_count = hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, + tvm.const(clip, "bool"), + tvm.const(threshold, "float32"), + variances) + return out, valid_count @tvm.target.generic_func def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nms_threshold=0.5, diff --git a/tutorials/nnvm/deploy_ssd.py b/tutorials/nnvm/deploy_ssd.py index eadb8fd28e0c..1a71c96eaa0c 100644 --- a/tutorials/nnvm/deploy_ssd.py +++ b/tutorials/nnvm/deploy_ssd.py @@ -61,7 +61,7 @@ image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \ "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg" inference_symbol_folder = \ -"c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26" + "c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26" inference_symbol_url = "https://gist.github.com/kevinthesun/c1904e900848df4548ce5dfb18c719c7/" \ "archive/a28c4856c827fe766aa3da0e35bad41d44f0fb26.zip" From 3054af5402acb8f3a126a1444215c1aa304e91ce Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 19 Dec 2018 02:08:44 +0000 Subject: [PATCH 10/43] Fix tutorial --- tutorials/nnvm/deploy_ssd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/nnvm/deploy_ssd.py b/tutorials/nnvm/deploy_ssd.py index 1a71c96eaa0c..f7e3b19f9767 100644 --- a/tutorials/nnvm/deploy_ssd.py +++ b/tutorials/nnvm/deploy_ssd.py @@ -165,4 +165,4 @@ def display(img, out, thresh=0.5): plt.show() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) -display(image, tvm_output.asnumpy()[0], thresh=0.45) +display(image, tvm_output.asnumpy()[0], thresh=0.45) \ No newline at end of file From ef21b022fb4fef9c58ddb18e4e6ad9cb8c74b774 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 7 Jan 2019 17:41:39 -0800 Subject: [PATCH 11/43] Relay support --- include/tvm/relay/attrs/transform.h | 15 +++ include/tvm/relay/attrs/vision.h | 32 +++-- nnvm/include/nnvm/top/nn.h | 18 +-- nnvm/src/top/vision/nms.cc | 1 - nnvm/tests/python/compiler/test_top_level4.py | 10 +- .../python/frontend/mxnet/test_forward.py | 13 +- python/tvm/relay/frontend/mxnet.py | 43 +++++++ python/tvm/relay/op/_transform.py | 1 + python/tvm/relay/op/transform.py | 28 +++- python/tvm/relay/op/vision/__init__.py | 2 +- .../op/vision/{_multibox.py => _vision.py} | 25 +++- python/tvm/relay/op/vision/nms.py | 41 +++++- src/relay/op/tensor/transform.cc | 121 ++++++++++++++++-- src/relay/op/vision/multibox_op.cc | 6 +- src/relay/op/vision/nms.cc | 61 ++++++++- tests/python/relay/test_op_level4.py | 23 ++++ topi/python/topi/testing/__init__.py | 4 + topi/python/topi/testing/slice_axis_python.py | 34 +++++ topi/tests/python/test_topi_vision.py | 1 - 19 files changed, 425 insertions(+), 54 deletions(-) rename python/tvm/relay/op/vision/{_multibox.py => _vision.py} (72%) create mode 100644 topi/python/topi/testing/slice_axis_python.py diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index fea2c960d032..44b910aaf0bf 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -171,6 +171,21 @@ struct StridedSliceAttrs : public tvm::AttrsNode { } }; +struct SliceAxisAttrs : public tvm::AttrsNode { + int axis; + int begin; + int end; + + TVM_DECLARE_ATTRS(SliceAxisAttrs, "relay.attrs.SliceAxisAttrs") { + TVM_ATTR_FIELD(axis) + .describe("Axis along which to be sliced."); + TVM_ATTR_FIELD(begin) + .describe("Index for begin of slice"); + TVM_ATTR_FIELD(end).set_default(0) + .describe("Index for end of the slice"); + } +}; + struct SliceLikeAttrs : public tvm::AttrsNode { Array axes; diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index df059a6238e1..345a67655552 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -58,19 +58,35 @@ struct MultiBoxTransformLocAttrs } }; -/*! \brief Attributes used in non_maximum_suppression operators */ +/*! \brief Attributes used in get_valid_counts operator */ +struct GetValidCountsAttrs : public tvm::AttrsNode{ + double score_threshold; + + TVM_DECLARE_ATTRS(GetValidCountsAttrs, "relay.attrs.GetValidCountsAttrs") { + TVM_ATTR_FIELD(score_threshold).set_default(0.0) + .describe("Lower limit of score for valid bounding boxes."); + } +}; + +/*! \brief Attributes used in non_maximum_suppression operator */ struct NMSAttrs : public tvm::AttrsNode{ - double overlap_threshold; + double iou_threshold; bool force_suppress; int topk; + int id_index; + bool do_rearrange; TVM_DECLARE_ATTRS(NMSAttrs, "relay.attrs.NMSAttrs") { - TVM_ATTR_FIELD(overlap_threshold).set_default(0.5) - .describe("Non-maximum suppression threshold."); - TVM_ATTR_FIELD(force_suppress).set_default(false) - .describe("Suppress all detections regardless of class_id."); - TVM_ATTR_FIELD(topk).set_default(-1) - .describe("Keep maximum top k detections before nms, -1 for no limit."); + TVM_ATTR_FIELD(iou_threshold).set_default(0.5) + .describe("Non-maximum suppression threshold."); + TVM_ATTR_FIELD(force_suppress).set_default(false) + .describe("Suppress all detections regardless of class_id."); + TVM_ATTR_FIELD(topk).set_default(-1) + .describe("Keep maximum top k detections before nms, -1 for no limit."); + TVM_ATTR_FIELD(id_index).set_default(0) + .describe("Axis index of id."); + TVM_ATTR_FIELD(do_rearrange).set_default(false) + .describe("Whether to move all valid bounding boxes to the top."); } }; diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 1513be122b41..82f3230b4931 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -402,9 +402,9 @@ struct LayoutTransformParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(LayoutTransformParam) { DMLC_DECLARE_FIELD(src_layout).set_default("__undef__") - .describe("Dimension ordering of data"); + .describe("Dimension ordering of data"); DMLC_DECLARE_FIELD(dst_layout).set_default("__undef__") - .describe("Dimension ordering of data."); + .describe("Dimension ordering of data."); } }; @@ -419,13 +419,13 @@ struct MultiBoxPriorParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(sizes).set_default(Tuple({1.0})) .describe("List of sizes of generated MultiBoxPriores."); DMLC_DECLARE_FIELD(ratios).set_default(Tuple({1.0})) - .describe("List of aspect ratios of generated MultiBoxPriores."); + .describe("List of aspect ratios of generated MultiBoxPriores."); DMLC_DECLARE_FIELD(steps).set_default(Tuple({-1.0, -1.0})) - .describe("Priorbox step across y and x, -1 for auto calculation."); + .describe("Priorbox step across y and x, -1 for auto calculation."); DMLC_DECLARE_FIELD(offsets).set_default(Tuple({0.5, 0.5})) - .describe("Priorbox center offsets, y and x respectively."); + .describe("Priorbox center offsets, y and x respectively."); DMLC_DECLARE_FIELD(clip).set_default(false) - .describe("Whether to clip out-of-boundary boxes."); + .describe("Whether to clip out-of-boundary boxes."); } }; @@ -461,11 +461,11 @@ struct NMSParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) - .describe("Suppress all detections regardless of class_id."); + .describe("Suppress all detections regardless of class_id."); DMLC_DECLARE_FIELD(topk).set_default(-1) - .describe("Keep maximum top k detections before nms, -1 for no limit."); - DMLC_DECLARE_FIELD(id_index).set_default(0) .describe("Keep maximum top k detections before nms, -1 for no limit."); + DMLC_DECLARE_FIELD(id_index).set_default(0) + .describe("Axis index for id."); DMLC_DECLARE_FIELD(do_rearrange).set_default(false) .describe("Whether to move all valid bounding boxes to the top."); } diff --git a/nnvm/src/top/vision/nms.cc b/nnvm/src/top/vision/nms.cc index a74a135175ba..71b4c3ff7860 100644 --- a/nnvm/src/top/vision/nms.cc +++ b/nnvm/src/top/vision/nms.cc @@ -11,7 +11,6 @@ #include #include #include "../op_common.h" -#include "../elemwise_op_common.h" namespace nnvm { namespace top { diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index aab81565c3ff..b84621128614 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -700,16 +700,8 @@ def test_slice_like(): def verify_slice_axis(dshape, axis, begin, end): data = sym.Variable("data") net = sym.slice_axis(data, axis=axis, begin=begin, end=end) - if axis < 0: - axis += len(dshape) - if begin < 0: - begin += dshape[axis] - if end <= 0: - end += dshape[axis] np_data = np.random.uniform(size=dshape) - slc = [slice(None)] * len(dshape) - slc[axis] = slice(begin, end) - np_out = np_data[slc] + np_out = topi.testing.slice_axis_python(np_data, axis, begin, end) dtype = "float32" for target, ctx in ctx_list(): diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index e046f39f02ca..c9d1c7795489 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -227,6 +227,7 @@ def test_forward_slice(): mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2)) verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2)) +<<<<<<< HEAD def test_forward_maximum(): a = mx.sym.var('a') b = mx.sym.var('b') @@ -289,6 +290,15 @@ def test_forward_minimum(): tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5) +def test_forward_slice_axis(): + data = mx.sym.var('data') + mx_sym = mx.sym.slice_axis(data, axis=1, begin=-5) + verify_mxnet_frontend_impl(mx_sym, (1, 10, 6), (1, 5, 6)) + +def test_forward_l2_normalize(): + data = mx.sym.var('data') + mx_sym = mx.sym.L2Normalization(data, mode="channel") + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5)) if __name__ == '__main__': test_forward_mlp() @@ -315,4 +325,5 @@ def test_forward_minimum(): test_forward_slice() test_forward_maximum() test_forward_minimum() - + test_forward_slice_axis() + test_forward_l2_normalize() diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 2e0ccd07fdc1..d53a4f5f75a8 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -380,6 +380,47 @@ def _mx_proposal(inputs, attrs): return _op.vision.proposal(inputs[0], inputs[1], inputs[2], **new_attrs) +def _mx_box_nms(inputs, attrs): + force_suppress = attrs.get_bool("force_suppress", False) + overlap_thresh = attrs.get_float('overlap_thresh', 0.5) + topk = attrs.get_int('topk', -1) + valid_thresh = attrs.get_float('valid_thresh', 0) + coord_start = attrs.get_int('coord_start', 2) + score_index = attrs.get_int('score_index', 1) + id_index = attrs.get_int('id_index', -1) + in_format = attrs.get_str('in_format', 'corner') + out_format = attrs.get_str('out_format', 'corner') + if coord_start != 2: + raise RuntimeError('coord_start %s is not supported.' % coord_start) + if score_index != 1: + raise RuntimeError('score_index %s is not supported.' % score_index) + if id_index != -1 and int(id_index) != 0: + raise RuntimeError('id_index %s is not supported.' % id_index) + if in_format != 'corner': + raise RuntimeError('in_format %s is not supported.' % in_format) + if out_format != 'corner': + raise RuntimeError('out_format %s is not supported.' % out_format) + + valid_counts, inter_out = \ + _op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) + nms_out = _op.vision.nms(inter_out, valid_counts, + iou_threshold=overlap_thresh, + force_suppress=force_suppress, + topk=topk, id_index=id_index, + do_rearrange=True) + return nms_out + + +def _mx_l2_normalize(inputs, attrs): + new_attrs = {} + mode = attrs.get_str('mode', 'instance') + if mode != 'channel': + raise RuntimeError('mode %s is not supported.' % mode) + new_attrs['eps'] = attrs.get_float('eps', 1e-10) + new_attrs['axis'] = 1 + return _op.nn.l2_normalize(inputs[0], **new_attrs) + + # Note: due to attribute conversion constraint # ops in the identity set must be attribute free _identity_list = [ @@ -481,7 +522,9 @@ def _mx_proposal(inputs, attrs): "slice" : _mx_slice, "slice_like" : _mx_slice_like, "slice_axis" : _mx_slice_axis, + "L2Normalization" : _mx_l2_normalize,∂ "SliceChannel" : _mx_split, + "slice_axis" : _mx_slice_axis, "split" : _mx_split, "expand_dims" : _mx_expand_dims, "Concat" : _mx_concat, diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 1389f96b8325..83b5ce5a854f 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -21,6 +21,7 @@ _reg.register_schedule("arange", schedule_injective) _reg.register_schedule("cast", schedule_injective) _reg.register_schedule("strided_slice", schedule_injective) +_reg.register_schedule("slice_axis", schedule_injective) _reg.register_schedule("slice_like", schedule_injective) _reg.register_schedule("split", schedule_injective) _reg.register_schedule("take", schedule_injective) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 845ee02b0582..f19aa19772b4 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -456,7 +456,7 @@ def strided_slice(data, begin, end, strides=None): The indices to begin with in the slicing. end: list of int - Indicies indicating end of the slice. + Indices indicating end of the slice. strides: list of int, optional Specifies the stride values, it can be negative in that case, @@ -471,6 +471,32 @@ def strided_slice(data, begin, end, strides=None): return _make.strided_slice(data, list(begin), list(end), list(strides)) +def slice_axis(data, axis, begin, end=None): + """Slice input array along specific axis. + + Parameters + ---------- + data : relay.Expr + The source array to be sliced. + + axis : int + Axis to be sliced. + + begin: int + The index to begin with in the slicing. + + end: int, optional + The index indicating end of the slice. + + Returns + ------- + ret : relay.Expr + The computed result. + """ + end = end or 0 + return _make.slice_axis(data, axis, begin, end) + + def slice_like(data, shape_like, axes=None): """Slice the first input with respect to the second input. diff --git a/python/tvm/relay/op/vision/__init__.py b/python/tvm/relay/op/vision/__init__.py index 10cf6c2fd3ee..0cee4e4faeec 100644 --- a/python/tvm/relay/op/vision/__init__.py +++ b/python/tvm/relay/op/vision/__init__.py @@ -6,6 +6,6 @@ from .nms import * from .rcnn import * from .yolo import * -from . import _multibox from . import _rcnn from . import _yolo +from . import _vision diff --git a/python/tvm/relay/op/vision/_multibox.py b/python/tvm/relay/op/vision/_vision.py similarity index 72% rename from python/tvm/relay/op/vision/_multibox.py rename to python/tvm/relay/op/vision/_vision.py index e9ef43f7e06f..2d15562995ec 100644 --- a/python/tvm/relay/op/vision/_multibox.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -54,6 +54,23 @@ def compute_multibox_transform_loc(attrs, inputs, _, target): reg.register_pattern("vision.multibox_detection", OpPattern.OPAQUE) +# Get counts of valid boxes +@reg.register_schedule("vision.get_valid_counts") +def schedule_get_valid_counts(_, outs, target): + """Schedule definition of get_valid_counts""" + with target: + return topi.generic.schedule_nms(outs) + + +@reg.register_compute("vision.get_valid_counts") +def compute_get_valid_counts(attrs, inputs, _, target): + """Compute definition of get_valid_counts""" + score_threshold = get_const_float(attrs.score_threshold) + return topi.vision.get_valid_counts(inputs[0], score_threshold) + +reg.register_pattern("vision.get_valid_counts", OpPattern.OPAQUE) + + # non-maximum suppression @reg.register_schedule("vision.nms") def schedule_nms(_, outs, target): @@ -65,12 +82,14 @@ def schedule_nms(_, outs, target): @reg.register_compute("vision.nms") def compute_nms(attrs, inputs, _, target): """Compute definition of nms""" - overlap_threshold = get_const_float(attrs.overlap_threshold) + iou_threshold = get_const_float(attrs.iou_threshold) force_suppress = bool(get_const_int(attrs.force_suppress)) topk = get_const_int(attrs.topk) + id_index = get_const_int(attrs.id_index) + do_rearrange = bool(get_const_int(attrs.do_rearrange)) return [ - topi.vision.nms(inputs[0], inputs[1], overlap_threshold, - force_suppress, topk) + topi.vision.nms(inputs[0], inputs[1], iou_threshold, + force_suppress, topk, id_index, do_rearrange) ] diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index 8035e3030b17..aecc111204b9 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -2,11 +2,37 @@ from __future__ import absolute_import as _abs from . import _make +def get_valid_counts(data, + score_threshold): + """Get valid count of bounding boxes given a score threshold. + Also moves valid boxes to the top of input data. + + Parameters + ---------- + data : relay.Expr + Input data. 3-D tensor with shape [batch_size, num_anchors, 6]. + + score_threshold : optional, float + Lower limit of score for valid bounding boxes. + + Returns + ------- + out_tensor : relay.Expr + Rearranged data tensor. + + valid_count : relay.Expr + 1-D tensor for valid number of boxes. + """ + return _make.get_valid_counts(data, score_threshold) + + def nms(data, valid_count, - overlap_threshold=0.5, + iou_threshold=0.5, force_suppress=False, - topk=-1): + topk=-1, + id_index=0, + do_rearrange=False): """Non-maximum suppression operator for object detection. Parameters @@ -19,7 +45,7 @@ def nms(data, valid_count : relay.Expr 1-D tensor for valid number of boxes. - overlap_threshold : float, optional + iou_threshold : float, optional Non-maximum suppression threshold. force_suppress : bool, optional @@ -28,9 +54,16 @@ def nms(data, topk : int, optional Keep maximum top k detections before nms, -1 for no limit. + id_index : optional, int + index of the class categories, -1 to disable. + + do_rearrange : optional, boolean + Whether to move all valid bounding boxes to the top. + Returns ------- out : relay.Expr 3-D tensor with shape [batch_size, num_anchors, 6]. """ - return _make.nms(data, valid_count, overlap_threshold, force_suppress, topk) + return _make.nms(data, valid_count, iou_threshold, + force_suppress, topk, id_index, do_rearrange) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index de3ac03977f4..dd0cbacb1e59 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1347,6 +1347,118 @@ RELAY_REGISTER_OP("broadcast_to_like") .set_attr("TOpPattern", kBroadcast); +// Adapter function to make int array. +Array GetIntArray(Array arr) { + for (size_t i = 0; i < arr.size(); ++i) { + CHECK(!arr[i].defined() || arr[i].as()) + << "Expect an int array"; + } + return Array(arr.node_); +} + +// slice_axis +TVM_REGISTER_NODE_TYPE(SliceAxisAttrs); + +bool SliceAxisRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + const SliceAxisAttrs *param = attrs.as(); + + auto src_shape = data->shape; + int axis = param->axis; + int begin = param->begin; + int end = param->end; + + if (axis < 0) { + axis += src_shape.size(); + } + if (begin < 0) { + begin += *as_const_int(src_shape[axis]); + } + if (end <= 0) { + end += *as_const_int(src_shape[axis]); + } + CHECK_LT(begin, end) + << "Begin index must be smaller than end index: " + << begin << " vs " << end; + + std::vector&& oshape = AsVector(data->shape); + oshape[axis] = IndexExpr(end - begin); + + // assign output type + reporter->Assign(types[1], TensorTypeNode::make(oshape, data->dtype)); + return true; +} + +Expr MakeSliceAxis(Expr data, + int axis, + int begin, + int end) { + auto attrs = make_node(); + attrs->axis = axis; + attrs->begin = begin; + attrs->end = end; + static const Op& op = Op::Get("slice_axis"); + return CallNode::make(op, {data}, Attrs(attrs), {}); +} + +TVM_REGISTER_API("relay.op._make.slice_axis") +.set_body([](const TVMArgs& args, TVMRetValue* rv) { + runtime::detail::unpack_call(MakeSliceAxis, args, rv); +}); + +Array SliceAxisCompute(const Attrs& attrs, + const Array& inputs, + const Type& out_type, + const Target& target) { + const SliceAxisAttrs *param = attrs.as(); + const Array src_shape = inputs[0]->shape; + Array begin_idx, end_idx, strides; + int axis = param->axis; + int begin = param->begin; + int end = param->end; + + if (axis < 0) { + axis += src_shape.size(); + } + if (begin < 0) { + begin += *as_const_int(src_shape[axis]); + } + if (end <= 0) { + end += *as_const_int(src_shape[axis]); + } + for (size_t i = 0; i < src_shape.size(); ++i) { + begin_idx.push_back(make_const(tvm::Int(32), 0)); + strides.push_back(make_const(tvm::Int(32), 1)); + } + end_idx = Array(src_shape); + begin_idx.Set(axis, make_const(tvm::Int(32), begin)); + end_idx.Set(axis, make_const(tvm::Int(32), end)); + + return Array{ + topi::strided_slice(inputs[0], + GetIntArray(begin_idx), + GetIntArray(end_idx), + GetIntArray(strides)) + }; +} + +RELAY_REGISTER_OP("relay.op._make.slice_axis") +.describe(R"doc(Slices along a given axis. +Returns an array slice along a given axis starting from +the begin index to the end index. +)doc" TVM_ADD_FILELINE) +.set_num_inputs(1) +.add_argument("data", "Tensor", "Input data.") +.set_support_level(4) +.add_type_rel("SliceAxis", SliceAxisRel) +.set_attr("FTVMCompute", SliceAxisCompute) +.set_attr("TOpPattern", kInjective); + + // strided_slice TVM_REGISTER_NODE_TYPE(StridedSliceAttrs); bool StridedSliceRel(const Array& types, @@ -1701,15 +1813,6 @@ Expr MakeSliceLike(Expr data, return CallNode::make(op, {data, shape_like}, Attrs(attrs), {}); } -// Adapter function to make int array. -Array GetIntArray(Array arr) { - for (size_t i = 0; i < arr.size(); ++i) { - CHECK(!arr[i].defined() || arr[i].as()) - << "Expect an int array"; - } - return Array(arr.node_); -} - Array SliceLikeCompute(const Attrs& attrs, const Array& inputs, const Type& out_type, diff --git a/src/relay/op/vision/multibox_op.cc b/src/relay/op/vision/multibox_op.cc index 55db8862e849..04f105c44744 100644 --- a/src/relay/op/vision/multibox_op.cc +++ b/src/relay/op/vision/multibox_op.cc @@ -70,8 +70,10 @@ RELAY_REGISTER_OP("vision.multibox_prior") TVM_REGISTER_NODE_TYPE(MultiBoxTransformLocAttrs); -bool MultiBoxTransformLocRel(const Array& types, int num_inputs, - const Attrs& attrs, const TypeReporter& reporter) { +bool MultiBoxTransformLocRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { CHECK_EQ(types.size(), 4); const auto* cls_prob = types[0].as(); diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index 3e3f73bc6cb4..c284be7c3441 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -9,6 +9,53 @@ namespace tvm { namespace relay { +TVM_REGISTER_NODE_TYPE(GetValidCountsAttrs); + +bool GetValidCountRel(const Array& types, + int num_inputs, + const Attrs& attrs, + const TypeReporter& reporter) { + CHECK_EQ(types.size(), 2); + const auto* data = types[0].as(); + const auto& dshape = data->shape; + CHECK_EQ(dshape.size(), 3) << "Input data should be 3-D."; + + std::vector oshape({data->shape[0]}); + std::vector fields; + fields.push_back(TensorTypeNode::make(data->shape, data->dtype)); + fields.push_back(TensorTypeNode::make(oshape, Int(32))); + + // assign output type + reporter->Assign(types[1], TupleTypeNode::make(Array(fields))); + return true; +} + +Expr MakeGetValidCounts(Expr data, + double score_threshold) { + auto attrs = make_node(); + attrs->score_threshold = score_threshold; + static const Op& op = Op::Get("vision.nms"); + return CallNode::make(op, {data}, Attrs(attrs), {}); +} + + +TVM_REGISTER_API("relay.op.vision._make.get_valid_counts") +.set_body([](const TVMArgs& args, TVMRetValue* rv) { + runtime::detail::unpack_call(MakeGetValidCounts, args, rv); +}); + + +RELAY_REGISTER_OP("vision.get_valid_counts") +.describe(R"doc(Get valid count of bounding boxes given +a score threshold. Also moves valid boxes to the top of +input data. +)doc" TVM_ADD_FILELINE) +.set_num_inputs(1) +.add_argument("data", "Tensor", "Input data.") +.set_support_level(5) +.add_type_rel("GetValidCount", GetValidCountRel); + + TVM_REGISTER_NODE_TYPE(NMSAttrs); bool NMSRel(const Array& types, @@ -31,13 +78,17 @@ bool NMSRel(const Array& types, Expr MakeNMS(Expr data, Expr valid_count, - double overlap_threshold, + double iou_threshold, bool force_suppress, - int topk) { + int topk, + int id_index, + bool do_rearrange) { auto attrs = make_node(); - attrs->overlap_threshold = overlap_threshold; + attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; attrs->topk = topk; + attrs->id_index = id_index; + attrs->do_rearrange = do_rearrange; static const Op& op = Op::Get("vision.nms"); return CallNode::make(op, {data, valid_count}, Attrs(attrs), {}); } @@ -45,12 +96,12 @@ Expr MakeNMS(Expr data, TVM_REGISTER_API("relay.op.vision._make.nms") .set_body([](const TVMArgs& args, TVMRetValue* rv) { - runtime::detail::unpack_call(MakeNMS, args, rv); + runtime::detail::unpack_call(MakeNMS, args, rv); }); RELAY_REGISTER_OP("vision.nms") -.describe(R"doc("Non-maximum suppression." +.describe(R"doc(Non-maximum suppression. )doc" TVM_ADD_FILELINE) .set_num_inputs(2) .add_argument("data", "Tensor", "Input data.") diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index ae7fe320940a..76ca67f56398 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -196,6 +196,29 @@ def _wrapper(data, axis=None, keepdims=False): verify_reduce(func, (128, 24, 128), (0, 2), True, False, (1, 24, 1)) +def test_slice_axis(): + def verify(dshape, axis, begin, end): + x = relay.var("x", relay.TensorType(dshape, "float32")) + z = relay.slice_axis(x, axis=axis, begin=begin, end=end) + func = relay.Function([x], z) + func = relay.ir_pass.infer_type(func) + text = func.astext() + assert "begin=" in text + assert "end=" in text + x_data = np.random.uniform(size=dshape).astype("float32") + ref_res = topi.testing.slice_axis_python( + x_data, axis, begin, end) + for target, ctx in ctx_list(): + intrp = relay.create_executor("graph", ctx=ctx, target=target) + op_res = intrp.evaluate(func)(x_data) + tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) + + verify((1, 2, 3, 4), 3, 0, 2) + verify((100, 50), -1, 1, -1) + verify((20,), -1, -9, -3) + verify((20, 30, 40), 1, 5, 0) + + def test_strided_slice(): def verify(dshape, begin, end, strides, output, test_ref=True): x = relay.var("x", relay.TensorType(dshape, "float32")) diff --git a/topi/python/topi/testing/__init__.py b/topi/python/topi/testing/__init__.py index 0ccc422010c1..90b8e8e0e58c 100644 --- a/topi/python/topi/testing/__init__.py +++ b/topi/python/topi/testing/__init__.py @@ -19,4 +19,8 @@ from .l2_normalize_python import l2_normalize_python from .gather_nd_python import gather_nd_python from .strided_slice_python import strided_slice_python +<<<<<<< HEAD from .batch_matmul import batch_matmul +======= +from .slice_axis_python import slice_axis_python +>>>>>>> Relay support diff --git a/topi/python/topi/testing/slice_axis_python.py b/topi/python/topi/testing/slice_axis_python.py new file mode 100644 index 000000000000..2db646c9e3a8 --- /dev/null +++ b/topi/python/topi/testing/slice_axis_python.py @@ -0,0 +1,34 @@ +"""Slice axis in python""" + +def slice_axis_python(data, axis, begin, end=None): + """Slice input array along specific axis. + + Parameters + ---------- + data : numpy.ndarray + The source array to be sliced. + + axis : int + Axis to be sliced. + + begin: int + The index to begin with in the slicing. + + end: int, optional + The index indicating end of the slice. + + Returns + ------- + ret : numpy.ndarray + The computed result. + """ + dshape = data.shape + if axis < 0: + axis += len(dshape) + if begin < 0: + begin += dshape[axis] + if end <= 0: + end += dshape[axis] + slc = [slice(None)] * len(dshape) + slc[axis] = slice(begin, end) + return data[slc] diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 517d1f7ee80b..9c14c2c95051 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -59,7 +59,6 @@ def test_get_valid_counts(): verify_get_valid_counts((1, 2500, 6), -1) verify_get_valid_counts((3, 1000, 6), 0.55) verify_get_valid_counts((16, 500, 6), 0.95) ->>>>>>> Add test for get_valid_counts def test_nms(): From 31f09279e2c1c1086caa9fad6f5f490ed67ef25a Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 8 Jan 2019 22:30:17 +0000 Subject: [PATCH 12/43] Add more ops to from_mxnet --- nnvm/tests/python/frontend/mxnet/test_forward.py | 2 +- python/tvm/relay/frontend/mxnet.py | 9 ++++----- python/tvm/relay/op/transform.py | 3 +-- python/tvm/relay/op/vision/nms.py | 9 +++++---- src/relay/op/vision/nms.cc | 4 ++-- tests/python/relay/test_op_level4.py | 1 + tests/python/relay/test_op_level5.py | 2 +- topi/python/topi/vision/nms.py | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index c9d1c7795489..67f1ad5ff27d 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -292,7 +292,7 @@ def test_forward_minimum(): def test_forward_slice_axis(): data = mx.sym.var('data') - mx_sym = mx.sym.slice_axis(data, axis=1, begin=-5) + mx_sym = mx.sym.slice_axis(data, axis=1, begin=-5, end=None) verify_mxnet_frontend_impl(mx_sym, (1, 10, 6), (1, 5, 6)) def test_forward_l2_normalize(): diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index d53a4f5f75a8..50e77e08809d 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -401,9 +401,8 @@ def _mx_box_nms(inputs, attrs): if out_format != 'corner': raise RuntimeError('out_format %s is not supported.' % out_format) - valid_counts, inter_out = \ - _op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) - nms_out = _op.vision.nms(inter_out, valid_counts, + ret =_op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) + nms_out = _op.vision.nms(ret[1], ret[0], iou_threshold=overlap_thresh, force_suppress=force_suppress, topk=topk, id_index=id_index, @@ -522,9 +521,8 @@ def _mx_l2_normalize(inputs, attrs): "slice" : _mx_slice, "slice_like" : _mx_slice_like, "slice_axis" : _mx_slice_axis, - "L2Normalization" : _mx_l2_normalize,∂ + "L2Normalization" : _mx_l2_normalize, "SliceChannel" : _mx_split, - "slice_axis" : _mx_slice_axis, "split" : _mx_split, "expand_dims" : _mx_expand_dims, "Concat" : _mx_concat, @@ -541,6 +539,7 @@ def _mx_l2_normalize(inputs, attrs): "_contrib_ROIAlign" : _mx_roi_align, "_contrib_Proposal" : _mx_proposal, "_contrib_MultiProposal" : _mx_proposal, + "_contrib_box_nms" : _mx_box_nms, # List of missing operators that are present in NNVMv1 # TODO(tvm-tvm): support all operators. # diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index f19aa19772b4..9a6f308d341a 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -471,7 +471,7 @@ def strided_slice(data, begin, end, strides=None): return _make.strided_slice(data, list(begin), list(end), list(strides)) -def slice_axis(data, axis, begin, end=None): +def slice_axis(data, axis, begin, end=0): """Slice input array along specific axis. Parameters @@ -493,7 +493,6 @@ def slice_axis(data, axis, begin, end=None): ret : relay.Expr The computed result. """ - end = end or 0 return _make.slice_axis(data, axis, begin, end) diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index aecc111204b9..157008ec2174 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -1,6 +1,7 @@ """Non-maximum suppression operations.""" from __future__ import absolute_import as _abs from . import _make +from ...expr import TupleWrapper def get_valid_counts(data, score_threshold): @@ -17,13 +18,13 @@ def get_valid_counts(data, Returns ------- - out_tensor : relay.Expr - Rearranged data tensor. - valid_count : relay.Expr 1-D tensor for valid number of boxes. + + out_tensor : relay.Expr + Rearranged data tensor. """ - return _make.get_valid_counts(data, score_threshold) + return TupleWrapper(_make.get_valid_counts(data, score_threshold), 2) def nms(data, diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index c284be7c3441..e8a84734b3d7 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -22,8 +22,8 @@ bool GetValidCountRel(const Array& types, std::vector oshape({data->shape[0]}); std::vector fields; - fields.push_back(TensorTypeNode::make(data->shape, data->dtype)); fields.push_back(TensorTypeNode::make(oshape, Int(32))); + fields.push_back(TensorTypeNode::make(data->shape, data->dtype)); // assign output type reporter->Assign(types[1], TupleTypeNode::make(Array(fields))); @@ -34,7 +34,7 @@ Expr MakeGetValidCounts(Expr data, double score_threshold) { auto attrs = make_node(); attrs->score_threshold = score_threshold; - static const Op& op = Op::Get("vision.nms"); + static const Op& op = Op::Get("vision.get_valid_counts"); return CallNode::make(op, {data}, Attrs(attrs), {}); } diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 76ca67f56398..dcb5f985fcb2 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -260,3 +260,4 @@ def verify(dshape, begin, end, strides, output, test_ref=True): test_binary_int_broadcast() test_where() test_reduce_functions() + test_slice_axis() diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 003318f01a2f..c6cbfd921257 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -142,7 +142,7 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, valid_count, x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int")) z = relay.vision.nms(x0, x1, overlap_threshold, force_suppress, topk) - assert "overlap_threshold" in z.astext() + assert "iou_threshold" in z.astext() zz = relay.ir_pass.infer_type(z) assert zz.checked_type == relay.ty.TensorType(dshape, "float32") diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 1dddffc0a2f4..66a4b0df9c30 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -166,7 +166,7 @@ def hybrid_nms(data, sorted_index, valid_count, if 0 < topk < valid_count[i]: for j in range(valid_count[i] - nkeep): for k in range(box_data_length): - output[i, j + nkeep, k] = data[i, j + nkeep, k] + output[i, j + nkeep, k] = -1.0 # Apply nms for j in range(valid_count[i]): if output[i, j, 0] >= 0: From fb43612cb1fe6cbc7eddf55302b1b02c3449ccec Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 10 Jan 2019 18:26:58 -0800 Subject: [PATCH 13/43] Support multibox op with hybrid script --- include/tvm/relay/attrs/transform.h | 8 +-- src/relay/op/tensor/transform.cc | 28 ++++----- topi/python/topi/cuda/nms.py | 35 ++++++----- topi/python/topi/vision/ssd/multibox.py | 79 ++++++++++++++++--------- topi/tests/python/test_topi_vision.py | 2 +- 5 files changed, 89 insertions(+), 63 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index 44b910aaf0bf..d179ae46fade 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -172,16 +172,16 @@ struct StridedSliceAttrs : public tvm::AttrsNode { }; struct SliceAxisAttrs : public tvm::AttrsNode { - int axis; - int begin; - int end; + Integer axis; + Integer begin; + Integer end; TVM_DECLARE_ATTRS(SliceAxisAttrs, "relay.attrs.SliceAxisAttrs") { TVM_ATTR_FIELD(axis) .describe("Axis along which to be sliced."); TVM_ATTR_FIELD(begin) .describe("Index for begin of slice"); - TVM_ATTR_FIELD(end).set_default(0) + TVM_ATTR_FIELD(end) .describe("Index for end of the slice"); } }; diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index dd0cbacb1e59..abdd41e306ab 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1368,9 +1368,9 @@ bool SliceAxisRel(const Array& types, const SliceAxisAttrs *param = attrs.as(); auto src_shape = data->shape; - int axis = param->axis; - int begin = param->begin; - int end = param->end; + int64_t axis = param->axis; + int64_t begin = param->begin; + int64_t end = param->end; if (axis < 0) { axis += src_shape.size(); @@ -1386,7 +1386,7 @@ bool SliceAxisRel(const Array& types, << begin << " vs " << end; std::vector&& oshape = AsVector(data->shape); - oshape[axis] = IndexExpr(end - begin); + oshape[axis] = make_const(Int(64), end - begin); // assign output type reporter->Assign(types[1], TensorTypeNode::make(oshape, data->dtype)); @@ -1394,9 +1394,9 @@ bool SliceAxisRel(const Array& types, } Expr MakeSliceAxis(Expr data, - int axis, - int begin, - int end) { + Integer axis, + Integer begin, + Integer end) { auto attrs = make_node(); attrs->axis = axis; attrs->begin = begin; @@ -1417,9 +1417,9 @@ Array SliceAxisCompute(const Attrs& attrs, const SliceAxisAttrs *param = attrs.as(); const Array src_shape = inputs[0]->shape; Array begin_idx, end_idx, strides; - int axis = param->axis; - int begin = param->begin; - int end = param->end; + int64_t axis = param->axis; + int64_t begin = param->begin; + int64_t end = param->end; if (axis < 0) { axis += src_shape.size(); @@ -1431,12 +1431,12 @@ Array SliceAxisCompute(const Attrs& attrs, end += *as_const_int(src_shape[axis]); } for (size_t i = 0; i < src_shape.size(); ++i) { - begin_idx.push_back(make_const(tvm::Int(32), 0)); - strides.push_back(make_const(tvm::Int(32), 1)); + begin_idx.push_back(make_const(Int(64), 0)); + strides.push_back(make_const(Int(64), 1)); } end_idx = Array(src_shape); - begin_idx.Set(axis, make_const(tvm::Int(32), begin)); - end_idx.Set(axis, make_const(tvm::Int(32), end)); + begin_idx.Set(axis, make_const(Int(64), begin)); + end_idx.Set(axis, make_const(Int(64), end)); return Array{ topi::strided_slice(inputs[0], diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 89c0da381aae..900a8e856ddb 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -182,13 +182,13 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): @nms.register(["cuda", "gpu"]) -def nms_gpu(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk=-1, - do_rearrange=False): +def nms_gpu(data, valid_count, iou_threshold=0.5, force_suppress=False, + topk=-1, id_index=0, do_rearrange=False): """Non-maximum suppression operator for object detection. Parameters ---------- - data: tvm.Tensor + data : tvm.Tensor 3-D tensor with shape [batch_size, num_anchors, 6]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom]. @@ -196,15 +196,21 @@ def nms_gpu(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk valid_count : tvm.Tensor 1-D tensor for valid number of boxes. - nms_threshold : float + iou_threshold : optional, float Non-maximum suppression threshold. - force_suppress : boolean + force_suppress : optional, boolean Whether to suppress all detections regardless of class_id. - nms_topk : int + topk : optional, int Keep maximum top k detections before nms, -1 for no limit. + id_index : optional, int + index of the class categories, -1 to disable. + + do_rearrange : optional, boolean + Whether to move all valid bounding boxes to the top. + Returns ------- out : tvm.Tensor @@ -217,14 +223,13 @@ def nms_gpu(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk # An example to use nms dshape = (1, 5, 6) data = tvm.placeholder(dshape, name="data") - valid_count = tvm.placeholder( - (dshape[0],), dtype="int32", name="valid_count") - nms_threshold = 0.7 + valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") + iou_threshold = 0.7 force_suppress = True - nms_topk = -1 - out = nms(data, valid_count, nms_threshold, force_suppress, nms_topk) - np_data = np.random.uniform(size=dshape).astype("float32") - np_valid_count = np.array([4]).astype("int32") + topk = -1 + out = nms(data, valid_count, iou_threshold, force_suppress, topk) + np_data = np.random.uniform(dshape) + np_valid_count = np.array([4]) s = topi.generic.schedule_nms(out) f = tvm.build(s, [data, valid_count, out], "llvm") ctx = tvm.cpu() @@ -264,8 +269,8 @@ def nms_gpu(data, valid_count, nms_threshold=0.5, force_suppress=False, nms_topk tvm.extern(data.shape, [data, sort_tensor, valid_count], lambda ins, outs: nms_ir( - ins[0], ins[1], ins[2], outs[0], nms_threshold, - force_suppress, nms_topk), + ins[0], ins[1], ins[2], outs[0], iou_threshold, + force_suppress, topk), dtype="float32", in_buffers=[data_buf, sort_tensor_buf, valid_count_buf], tag="nms") diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index 87a4a84c5ab5..907f6ac40346 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -19,29 +19,31 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): data : tvm.Tensor or numpy NDArray 4-D tensor with shape [batch, channel, height, width]] - sizes : tvm.ndarray - 1-D tensor of sizes for anchor boxes. + sizes : tvm ConsExpr + Sizes for anchor boxes. - ratios : tvm.ndarray - 1-D tensor of ratios for anchor boxes. + ratios : tvm ConsExpr + Ratios for anchor boxes. - steps : tvm.ndarray - 1-D tensor of priorbox step across y and x, -1 for auto calculation. + steps : tvm ConsExpr + Priorbox step across y and x, -1 for auto calculation. - offsets : tvm.ndarray - 1-D tensor priorbox center offsets, y and x respectively. + offsets : tvm ConsExpr + Priorbox center offsets, y and x respectively. Returns ------- output : tvm.Tensor or numpy NDArray 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ - in_height, in_width = data.shape[2], data.shape[3] - num_sizes, num_ratios = sizes.shape[0], ratios.shape[0] + in_height = data.shape[2] + in_width = data.shape[3] + num_sizes = len(sizes) + num_ratios = len(ratios) num_boxes = in_height * in_width * (num_sizes + num_ratios - 1) - output = output_tensor((1, num_boxes, 4), data.dtype) - steps_h = steps[0] if steps[0] > 0 else 1.0 / in_height - steps_w = steps[1] if steps[1] > 0 else 1.0 / in_width + output = output_tensor((1, num_boxes, 4), "float32") + steps_h = steps[0] * 1.0 if steps[0] > 0 else 1.0 / in_height + steps_w = steps[1] * 1.0 if steps[1] > 0 else 1.0 / in_width offset_h = offsets[0] offset_w = offsets[1] @@ -49,7 +51,7 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): center_h = (i + offset_h) * steps_h for j in range(in_width): center_w = (j + offset_w) * steps_w - for k in range(num_sizes + num_ratios - 1): + for k in const_range(num_sizes + num_ratios - 1): if k < num_sizes: w = sizes[k] * in_height / in_width / 2.0 h = sizes[k] / 2.0 @@ -96,7 +98,8 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, out : tvm.Tensor 3-D tensor with shape [1, h_in * w_in * (num_sizes + num_ratios - 1), 4] """ - out = hybrid_multibox_prior(data, sizes, ratios, steps, offsets) + out = hybrid_multibox_prior(data, tvm.convert(sizes), tvm.convert(ratios), + tvm.convert(steps), tvm.convert(offsets)) if clip: out = topi.clip(out, 0, 1) return out @@ -105,11 +108,23 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, def _hybridy_transform_loc(box, pred_loc, variance, clip): """Transform prior anchor box to output box through location predictions. """ - al, at, ar, ab = box[0], box[1], box[2], box[3] - px, py, pw, ph = pred_loc[0], pred_loc[1], \ - pred_loc[2], pred_loc[3] - vx, vy, vw, vh = variance[0], variance[1], \ - variance[2], variance[3] + al = box[0] + at = box[1] + ar = box[2] + ab = box[3] + + px = pred_loc[0] + py = pred_loc[1] + pw = pred_loc[2] + ph = pred_loc[3] + + vx = variance[0] + vy = variance[1] + vw = variance[2] + vh = variance[3] + + output = output_tensor((4,), pred_loc.dtype) + aw = ar - al ah = ab - at ax = (al + ar) / 2.0 @@ -118,11 +133,11 @@ def _hybridy_transform_loc(box, pred_loc, variance, clip): oy = py * vy * ah + ay ow = exp(pw * vw) * aw / 2.0 oh = exp(ph * vh) * ah / 2.0 - out_l = max(0, min(1, ox - ow)) if clip else ox - ow - out_t = max(0, min(1, oy - oh)) if clip else oy - oh - out_r = max(0, min(1, ox + ow)) if clip else ox + ow - out_b = max(0, min(1, oy + oh)) if clip else oy + oh - return out_l, out_t, out_r, out_b + output[0] = max(0.0, min(1.0, ox - ow)) if clip else ox - ow + output[1] = max(0.0, min(1.0, oy - oh)) if clip else oy - oh + output[2] = max(0.0, min(1.0, ox + ow)) if clip else ox + ow + output[3] = max(0.0, min(1.0, oy + oh)) if clip else oy + oh + return output @hybrid.script def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, @@ -135,7 +150,7 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, 3-D tensor of class probabilities. loc_pred : tvm.Tensor or numpy NDArray - 3-D tensor of location regression predictions. + 2-D tensor of location regression predictions. anchor : tvm.Tensor or numpy NDArray 3-D tensor of prior anchor boxes. @@ -189,6 +204,8 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, batch_size = cls_prob.shape[0] num_classes = cls_prob.shape[1] num_anchors = cls_prob.shape[2] + box_coord = allocate((4,), loc_pred.dtype) + pred_coord = allocate((4,), loc_pred.dtype) out_loc = output_tensor((batch_size, num_anchors, 6), loc_pred.dtype) valid_count = output_tensor((batch_size,), "int32") @@ -215,7 +232,7 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, for k in range(num_classes): if k > 0: temp = cls_prob[i, k, j] - cls_id = j if temp > score else cls_id + cls_id = k if temp > score else cls_id score = max(temp, score) if cls_id > 0 and score < threshold: cls_id = 0 @@ -225,12 +242,16 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, if cls_id > 0: out_loc[i, valid_count[i], 0] = cls_id - 1.0 out_loc[i, valid_count[i], 1] = score - out_coord = _hybridy_transform_loc(anchor[j], loc_pred[i, j], + for l in range(4): + box_coord[l] = anchor[0, j, l] + pred_coord[l] = loc_pred[i, j * 4 + l] + out_coord = _hybridy_transform_loc(box_coord, pred_coord, variances, clip) out_loc[i, valid_count[i], 2] = out_coord[0] out_loc[i, valid_count[i], 3] = out_coord[1] out_loc[i, valid_count[i], 4] = out_coord[2] out_loc[i, valid_count[i], 5] = out_coord[3] + valid_count[i] += 1 return out_loc, valid_count @@ -266,7 +287,7 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 out, valid_count = hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, tvm.const(clip, "bool"), tvm.const(threshold, "float32"), - variances) + tvm.convert(variances)) return out, valid_count @tvm.target.generic_func diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 9c14c2c95051..fd52f13795a1 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -74,7 +74,7 @@ def test_nms(): [1, 0.5, 100, 60, 70, 110]]]).astype(data.dtype) np_valid_count = np.array([4]).astype(valid_count.dtype) np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], - [0, 0.4, 4, 21, 19, 40], [-1, 0.9, 35, 61, 52, 79], + [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) def check_device(device): From 717e61c6cec1d91d4ddaae77c2d7952c8e6a06bd Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 11 Jan 2019 17:18:18 -0800 Subject: [PATCH 14/43] Fix slice_axis relay register issue --- include/tvm/relay/attrs/transform.h | 6 ++--- src/relay/op/tensor/transform.cc | 24 +++++++++---------- tests/python/relay/test_op_level4.py | 4 ++-- topi/python/topi/testing/slice_axis_python.py | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index d179ae46fade..720d6b9d3690 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -172,9 +172,9 @@ struct StridedSliceAttrs : public tvm::AttrsNode { }; struct SliceAxisAttrs : public tvm::AttrsNode { - Integer axis; - Integer begin; - Integer end; + int axis; + int begin; + int end; TVM_DECLARE_ATTRS(SliceAxisAttrs, "relay.attrs.SliceAxisAttrs") { TVM_ATTR_FIELD(axis) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index abdd41e306ab..73b74a60d756 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1368,9 +1368,9 @@ bool SliceAxisRel(const Array& types, const SliceAxisAttrs *param = attrs.as(); auto src_shape = data->shape; - int64_t axis = param->axis; - int64_t begin = param->begin; - int64_t end = param->end; + int axis = param->axis; + int begin = param->begin; + int end = param->end; if (axis < 0) { axis += src_shape.size(); @@ -1386,7 +1386,7 @@ bool SliceAxisRel(const Array& types, << begin << " vs " << end; std::vector&& oshape = AsVector(data->shape); - oshape[axis] = make_const(Int(64), end - begin); + oshape[axis] = make_const(Int(32), end - begin); // assign output type reporter->Assign(types[1], TensorTypeNode::make(oshape, data->dtype)); @@ -1417,9 +1417,9 @@ Array SliceAxisCompute(const Attrs& attrs, const SliceAxisAttrs *param = attrs.as(); const Array src_shape = inputs[0]->shape; Array begin_idx, end_idx, strides; - int64_t axis = param->axis; - int64_t begin = param->begin; - int64_t end = param->end; + int axis = param->axis; + int begin = param->begin; + int end = param->end; if (axis < 0) { axis += src_shape.size(); @@ -1431,12 +1431,12 @@ Array SliceAxisCompute(const Attrs& attrs, end += *as_const_int(src_shape[axis]); } for (size_t i = 0; i < src_shape.size(); ++i) { - begin_idx.push_back(make_const(Int(64), 0)); - strides.push_back(make_const(Int(64), 1)); + begin_idx.push_back(make_const(Int(32), 0)); + strides.push_back(make_const(Int(32), 1)); } end_idx = Array(src_shape); - begin_idx.Set(axis, make_const(Int(64), begin)); - end_idx.Set(axis, make_const(Int(64), end)); + begin_idx.Set(axis, make_const(Int(32), begin)); + end_idx.Set(axis, make_const(Int(32), end)); return Array{ topi::strided_slice(inputs[0], @@ -1446,7 +1446,7 @@ Array SliceAxisCompute(const Attrs& attrs, }; } -RELAY_REGISTER_OP("relay.op._make.slice_axis") +RELAY_REGISTER_OP("slice_axis") .describe(R"doc(Slices along a given axis. Returns an array slice along a given axis starting from the begin index to the end index. diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index dcb5f985fcb2..4ba7e8cd1e72 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -203,8 +203,8 @@ def verify(dshape, axis, begin, end): func = relay.Function([x], z) func = relay.ir_pass.infer_type(func) text = func.astext() - assert "begin=" in text - assert "end=" in text + assert "begin" in text + assert "end" in text x_data = np.random.uniform(size=dshape).astype("float32") ref_res = topi.testing.slice_axis_python( x_data, axis, begin, end) diff --git a/topi/python/topi/testing/slice_axis_python.py b/topi/python/topi/testing/slice_axis_python.py index 2db646c9e3a8..589e5914a36c 100644 --- a/topi/python/topi/testing/slice_axis_python.py +++ b/topi/python/topi/testing/slice_axis_python.py @@ -31,4 +31,4 @@ def slice_axis_python(data, axis, begin, end=None): end += dshape[axis] slc = [slice(None)] * len(dshape) slc[axis] = slice(begin, end) - return data[slc] + return data[tuple(slc)] From b9681eeed67faa85f1827441a8df0de6b4ddb187 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 13 Jan 2019 11:21:09 -0800 Subject: [PATCH 15/43] Add get_valid_counts relay test --- nnvm/tests/python/compiler/test_top_level4.py | 2 +- tests/python/relay/test_op_level5.py | 56 ++++++++++++++++--- topi/python/topi/vision/ssd/multibox.py | 52 +++-------------- 3 files changed, 55 insertions(+), 55 deletions(-) diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index b84621128614..9e44fa0e5b1c 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -629,7 +629,7 @@ def test_nms(): [1, 0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], - [0, 0.4, 4, 21, 19, 40], [-1, 0.9, 35, 61, 52, 79], + [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) target = "llvm" diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index c6cbfd921257..1e7fe76c0a9e 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -135,8 +135,47 @@ def verify_multibox_prior(x, dshape, ref_res, sizes=(1.0,), verify_multibox_prior(x, dshape, ref_res, clip=False, check_type_only=True) +def test_get_valid_counts(): + def verify_get_valid_counts(dshape, score_threshold): + dtype = "float32" + batch_size, num_anchor, elem_length = dshape + np_data = np.random.uniform(size=dshape).astype(dtype) + np_out1 = np.zeros(shape=(batch_size,)) + np_out2 = np.zeros(shape=dshape).astype(dtype) + for i in range(batch_size): + np_out1[i] = 0 + inter_idx = 0 + for j in range(num_anchor): + score = np_data[i, j, 1] + if score >= score_threshold: + for k in range(elem_length): + np_out2[i, inter_idx, k] = np_data[i, j, k] + np_out1[i] += 1 + inter_idx += 1 + if j >= np_out1[i]: + for k in range(elem_length): + np_out2[i, j, k] = -1 + + x = relay.var("x", relay.ty.TensorType(dshape, dtype)) + z = relay.vision.get_valid_counts(x, score_threshold) + assert "score_threshold" in z.astext() + func = relay.Function([x], z.astuple()) + func = relay.ir_pass.infer_type(func) + ctx_list = [("llvm", tvm.cpu(0))] + for target, ctx in ctx_list: + intrp = relay.create_executor("debug", ctx=ctx, target=target) + out = intrp.evaluate(func)(np_data) + tvm.testing.assert_allclose(out[0].asnumpy(), np_out1, rtol=1e-3) + tvm.testing.assert_allclose(out[1].asnumpy(), np_out2, rtol=1e-3) + + verify_get_valid_counts((1, 2500, 6), 0) + verify_get_valid_counts((1, 2500, 6), -1) + verify_get_valid_counts((3, 1000, 6), 0.55) + verify_get_valid_counts((16, 500, 6), 0.95) + + def test_nms(): - def verify_nms(x0_data, x1_data, dshape, ref_res, valid_count, + def verify_nms(x0_data, x1_data, dshape, ref_res, overlap_threshold=0.5, force_suppress=False, topk=-1, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) @@ -165,26 +204,24 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, valid_count, [1, 0.5, 100, 60, 70, 110]]]).astype("float32") np_valid_count = np.array([4]).astype("int32") np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], - [0, 0.4, 4, 21, 19, 40], [-1, 0.9, 35, 61, 52, 79], + [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) num_anchors = 5 dshape = (tvm.var("n"), num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, dshape[0], + verify_nms(np_data, np_valid_count, dshape, np_result, force_suppress=True, topk=2, check_type_only=True) dshape = (1, num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, dshape[0], + verify_nms(np_data, np_valid_count, dshape, np_result, force_suppress=True, topk=2, check_type_only=False) np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], - [1, 0.7, 30, 60, 50, 80], [-1, 0.9, 35, 61, 52, 79], + [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) dshape = (tvm.var("n"), num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, dshape[0], - check_type_only=True) + verify_nms(np_data, np_valid_count, dshape, np_result, check_type_only=True) dshape = (1, num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, dshape[0], - topk=3) + verify_nms(np_data, np_valid_count, dshape, np_result, topk=3) def test_multibox_transform_loc(): @@ -411,6 +448,7 @@ def verify_yolo_reorg(shape, stride): test_resize() test_multibox_prior() test_multibox_transform_loc() + test_get_valid_counts() test_nms() test_roi_align() test_proposal() diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index 907f6ac40346..c63874750e04 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -47,6 +47,10 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): offset_h = offsets[0] offset_w = offsets[1] + # Need to define var out of const_range + if + w = 0.0 + h = 0.0 + for i in parallel(in_height): center_h = (i + offset_h) * steps_h for j in range(in_width): @@ -57,8 +61,8 @@ def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): h = sizes[k] / 2.0 else: w = sizes[0] * in_height / in_width \ - * sqrt(ratios[k - num_sizes + 1]) / 2.0 - h = sizes[0] * sqrt(ratios[k - num_sizes + 1]) / 2.0 + * sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0 + h = sizes[0] / sqrt(ratios[k - num_sizes + 1] * 1.0) / 2.0 count = i * in_width * (num_sizes + num_ratios - 1) \ + j * (num_sizes + num_ratios - 1) + k output[0, count, 0] = center_w - w @@ -104,6 +108,7 @@ def multibox_prior(data, sizes=(1,), ratios=(1,), steps=(-1, -1), offsets=(0.5, out = topi.clip(out, 0, 1) return out + @hybrid.script def _hybridy_transform_loc(box, pred_loc, variance, clip): """Transform prior anchor box to output box through location predictions. @@ -166,37 +171,8 @@ def hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, Returns ------- -<<<<<<< HEAD - stmt : Stmt - The result IR statement. - """ - def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, vh): - """Transform prior anchor box to output box through location predictions. - """ - al = anchor[anchor_base_idx] - at = anchor[anchor_base_idx + 1] - ar = anchor[anchor_base_idx + 2] - ab = anchor[anchor_base_idx + 3] - aw = ar - al - ah = ab - at - ax = (al + ar) / 2.0 - ay = (at + ab) / 2.0 - px = loc[loc_base_idx] - py = loc[loc_base_idx + 1] - pw = loc[loc_base_idx + 2] - ph = loc[loc_base_idx + 3] - ox = px * vx * aw + ax - oy = py * vy * ah + ay - ow = tvm.exp(pw * vw) * aw / 2.0 - oh = tvm.exp(ph * vh) * ah / 2.0 - return tvm.if_then_else(clip, tvm.max(0, tvm.min(1, ox - ow)), ox - ow), \ - tvm.if_then_else(clip, tvm.max(0, tvm.min(1, oy - oh)), oy - oh), \ - tvm.if_then_else(clip, tvm.max(0, tvm.min(1, ox + ow)), ox + ow), \ - tvm.if_then_else(clip, tvm.max(0, tvm.min(1, oy + oh)), oy + oh) -======= out_loc : tvm.Tensor or numpy NDArray 3-D tensor of transformed location. ->>>>>>> Modify SSD tutorial valid_count : tvm.Tensor or numpy NDArray 1_d tensor of valid counts for boxes. @@ -214,19 +190,6 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, valid_count[i] = 0 for j in range(num_anchors): # Find the predicted class id and probability -<<<<<<< HEAD - score = ib.allocate('float32', (1,), name="score", scope="local") - cls_id = ib.allocate('int32', (1,), name="id", scope="local") - score[0] = -1.0 - cls_id[0] = 0 - with ib.for_range(0, num_classes, name="j") as j: - with ib.if_scope(j > 0): - temp = p_cls_prob[n * num_anchors * num_classes + j * num_anchors + i] - cls_id[0] = tvm.if_then_else(temp > score[0], j, cls_id[0]) - score[0] = tvm.max(temp, score[0]) - with ib.if_scope(tvm.all(cls_id[0] > 0, score[0] < threshold)): - cls_id[0] = 0 -======= score = -1.0 cls_id = 0 for k in range(num_classes): @@ -236,7 +199,6 @@ def transform_loc(loc, loc_base_idx, anchor, anchor_base_idx, clip, vx, vy, vw, score = max(temp, score) if cls_id > 0 and score < threshold: cls_id = 0 ->>>>>>> Modify SSD tutorial # [id, prob, xmin, ymin, xmax, ymax] # Remove background, restore original id if cls_id > 0: From e277a552c03b378e00c7f2286efa44914201dac7 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 13 Jan 2019 22:06:46 -0800 Subject: [PATCH 16/43] Fix multibox_transform_loc --- nnvm/python/nnvm/top/vision.py | 2 +- nnvm/tests/python/compiler/test_top_level4.py | 3 ++- topi/python/topi/vision/ssd/multibox.py | 9 ++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index f5f41d33e363..cb69b897a7be 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -58,7 +58,7 @@ def compute_multibox_transform_loc(attrs, inputs, _): return topi.vision.ssd.multibox_transform_loc(inputs[0], inputs[1], inputs[2], clip, threshold, variance) -reg.register_pattern("multibox_detection", OpPattern.OPAQUE) +reg.register_pattern("multibox_transform_loc", OpPattern.OPAQUE) # Get valid number of anchor boxes @reg.register_schedule("get_valid_counts") diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index 9e44fa0e5b1c..38646b01a4c9 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -3,6 +3,7 @@ import tvm from tvm.contrib import graph_runtime import topi +import topi.testing import nnvm.symbol as sym import nnvm.compiler from nnvm.testing.config import ctx_list @@ -657,7 +658,7 @@ def np_slice_like(np_data, np_shape_like, axis=[]): slice_idx = [] for b, e in zip(begin_idx, end_idx): slice_idx.append(slice(b, e)) - np_result = np_data[slice_idx] + np_result = np_data[tuple(slice_idx)] return np_result def verify_slice_like(np_data, np_shape_like, axis=[]): diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index c63874750e04..f20a286960cc 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -246,11 +246,10 @@ def multibox_transform_loc(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 ------- ret : tuple of tvm.Tensor """ - out, valid_count = hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, - tvm.const(clip, "bool"), - tvm.const(threshold, "float32"), - tvm.convert(variances)) - return out, valid_count + return hybrid_multibox_transform_loc(cls_prob, loc_pred, anchor, + tvm.const(clip, "bool"), + tvm.const(threshold, "float32"), + tvm.convert(variances)) @tvm.target.generic_func def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nms_threshold=0.5, From ef00b7fd7c762a6be3c21d96f7a55d5d4e908f16 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 14 Jan 2019 20:58:50 +0000 Subject: [PATCH 17/43] Fix relay from_mxnet --- python/tvm/relay/frontend/mxnet.py | 2 ++ python/tvm/relay/op/transform.py | 3 ++- python/tvm/relay/op/vision/_vision.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 50e77e08809d..6f6639c5df9a 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -682,6 +682,8 @@ def from_mxnet(symbol, params[k] = _nd.array(v.data().asnumpy()) data = mx.sym.Variable("data") sym = symbol(data) + if isinstance(sym, (list, tuple)): + sym = mx.sym.Group(sym) shape, dtype = _update_shape_dtype(shape, dtype, params) sym = _from_mxnet_impl(sym, shape, dtype) elif isinstance(symbol, mx.gluon.Block): diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 9a6f308d341a..f19aa19772b4 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -471,7 +471,7 @@ def strided_slice(data, begin, end, strides=None): return _make.strided_slice(data, list(begin), list(end), list(strides)) -def slice_axis(data, axis, begin, end=0): +def slice_axis(data, axis, begin, end=None): """Slice input array along specific axis. Parameters @@ -493,6 +493,7 @@ def slice_axis(data, axis, begin, end=0): ret : relay.Expr The computed result. """ + end = end or 0 return _make.slice_axis(data, axis, begin, end) diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 2d15562995ec..57ca6f2da2ce 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -59,7 +59,7 @@ def compute_multibox_transform_loc(attrs, inputs, _, target): def schedule_get_valid_counts(_, outs, target): """Schedule definition of get_valid_counts""" with target: - return topi.generic.schedule_nms(outs) + return topi.generic.schedule_get_valid_counts(outs) @reg.register_compute("vision.get_valid_counts") From fa89a2ab6a69e4305f51b45229d6eef0f5d00cd9 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 14 Jan 2019 14:18:27 -0800 Subject: [PATCH 18/43] Fix l2_normalize --- python/tvm/relay/frontend/mxnet.py | 2 +- tests/python/frontend/mxnet/test_forward.py | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 6f6639c5df9a..c5a6ae8e8b4d 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -416,7 +416,7 @@ def _mx_l2_normalize(inputs, attrs): if mode != 'channel': raise RuntimeError('mode %s is not supported.' % mode) new_attrs['eps'] = attrs.get_float('eps', 1e-10) - new_attrs['axis'] = 1 + new_attrs['axis'] = [1] return _op.nn.l2_normalize(inputs[0], **new_attrs) diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py index 2dfe20c503e6..b6646b3c2a2f 100644 --- a/tests/python/frontend/mxnet/test_forward.py +++ b/tests/python/frontend/mxnet/test_forward.py @@ -257,6 +257,7 @@ def verify(start, stop, step): verify(20, 1, -1) verify(20, 1, -1.5) +<<<<<<< HEAD def _mx_symbol(F, op_name, inputs): op = getattr(F, op_name) return op(*inputs) @@ -374,6 +375,11 @@ def verify(x_shape, y_shape, axes): verify((3, 4), (2, 3), (0)) verify((3, 4), (2, 3), (-1)) +def test_forward_l2_normalize(): + data = mx.sym.var('data') + mx_sym = mx.sym.L2Normalization(data, mode="channel") + verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5)) + if __name__ == '__main__': test_forward_mlp() @@ -401,5 +407,6 @@ def verify(x_shape, y_shape, axes): test_forward_broadcast_ops() test_forward_elemwise_ops() test_forward_scalar_ops() - test_forward_slice_axis() test_forward_slice_like() + test_forward_slice_axis() + test_forward_l2_normalize() From 925c14021a995cdc1e7c367a9514006778bd86ab Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 14 Jan 2019 16:44:08 -0800 Subject: [PATCH 19/43] Fix lint --- nnvm/src/top/tensor/elemwise.cc | 19 ++++++------------- python/tvm/relay/frontend/mxnet.py | 2 +- topi/include/topi/nn/l2_normalize.h | 2 +- 3 files changed, 8 insertions(+), 15 deletions(-) diff --git a/nnvm/src/top/tensor/elemwise.cc b/nnvm/src/top/tensor/elemwise.cc index 9c1687beab35..5a39f3ecc392 100644 --- a/nnvm/src/top/tensor/elemwise.cc +++ b/nnvm/src/top/tensor/elemwise.cc @@ -815,12 +815,10 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__equal_scalar__) "FTVMCompute", [](const NodeAttrs& attrs, const Array& inputs, const Array& out_info) { - Tensor out = topi::cast( binary_scalar_op(attrs, inputs[0], [](Expr x, Expr y) { return x == y; }), - out_info[0]->dtype - ); + out_info[0]->dtype); return Array{ out }; }) .set_attr("FGradient", MakeZeroGradNodes); @@ -837,8 +835,7 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__not_equal_scalar__) Tensor out = topi::cast( binary_scalar_op(attrs, inputs[0], [](Expr x, Expr y) { return x != y; }), - out_info[0]->dtype - ); + out_info[0]->dtype); return Array{ out }; }) .set_attr("FGradient", MakeZeroGradNodes); @@ -855,8 +852,7 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__greater_scalar__) Tensor out = topi::cast( binary_scalar_op(attrs, inputs[0], [](Expr x, Expr y) { return x > y; }), - out_info[0]->dtype - ); + out_info[0]->dtype); return Array{ out }; }) .set_attr("FGradient", MakeZeroGradNodes); @@ -873,8 +869,7 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__greater_equal_scalar__) Tensor out = topi::cast( binary_scalar_op(attrs, inputs[0], [](Expr x, Expr y) { return x >= y; }), - out_info[0]->dtype - ); + out_info[0]->dtype); return Array{ out }; }) .set_attr("FGradient", MakeZeroGradNodes); @@ -891,8 +886,7 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__less_scalar__) Tensor out = topi::cast( binary_scalar_op(attrs, inputs[0], [](Expr x, Expr y) { return x < y; }), - out_info[0]->dtype - ); + out_info[0]->dtype); return Array{ out }; }) .set_attr("FGradient", MakeZeroGradNodes); @@ -909,8 +903,7 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__less_equal_scalar__) Tensor out = topi::cast( binary_scalar_op(attrs, inputs[0], [](Expr x, Expr y) { return x <= y; }), - out_info[0]->dtype - ); + out_info[0]->dtype); return Array{ out }; }) .set_attr("FGradient", MakeZeroGradNodes); diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index c5a6ae8e8b4d..727fa5828aef 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -401,7 +401,7 @@ def _mx_box_nms(inputs, attrs): if out_format != 'corner': raise RuntimeError('out_format %s is not supported.' % out_format) - ret =_op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) + ret = _op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) nms_out = _op.vision.nms(ret[1], ret[0], iou_threshold=overlap_thresh, force_suppress=force_suppress, diff --git a/topi/include/topi/nn/l2_normalize.h b/topi/include/topi/nn/l2_normalize.h index e022d76871a0..4f9bdb61ab70 100644 --- a/topi/include/topi/nn/l2_normalize.h +++ b/topi/include/topi/nn/l2_normalize.h @@ -33,7 +33,7 @@ inline Tensor l2_normalize(const Tensor& data, for (size_t i = 0; i < axis.size(); ++i) { int ax = topi::detail::GetConstInt(axis[i]); CHECK_LT(ax, data->shape.size()) << - "Axis " << ax << " exceeds input data dim " << + "Axis " << ax << " exceeds input data dim " << data->shape.size(); } auto input_shape = data->shape; From 6819dc38a5f915850b434b2a1b852a382380c069 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 14 Jan 2019 17:43:59 -0800 Subject: [PATCH 20/43] Add cuda schedule for get_valid_counts --- topi/python/topi/cuda/vision.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/topi/python/topi/cuda/vision.py b/topi/python/topi/cuda/vision.py index 17497abc0d8b..e3bc0fb9d547 100644 --- a/topi/python/topi/cuda/vision.py +++ b/topi/python/topi/cuda/vision.py @@ -162,3 +162,20 @@ def traverse(op): scheduled_ops.append(op) traverse(outs[0].op) return s + +@generic.schedule_get_valid_counts.register(["cuda", "gpu"]) +def schedule_get_valid_counts(outs): + """Schedule for get_valid_counts operator. + + Parameters + ---------- + outs: Array of Tensor + The computation graph description of get_valid_counts + in the format of an array of tensors. + + Returns + ------- + s: Schedule + The computation schedule for the op. + """ + return _default_schedule(outs) From 8eaff5c82e732d83bfe416f81b6cccd24e04d5e7 Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 15 Jan 2019 10:10:20 -0800 Subject: [PATCH 21/43] Fix tutorial --- tutorials/nnvm/deploy_ssd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/nnvm/deploy_ssd.py b/tutorials/nnvm/deploy_ssd.py index f7e3b19f9767..1a71c96eaa0c 100644 --- a/tutorials/nnvm/deploy_ssd.py +++ b/tutorials/nnvm/deploy_ssd.py @@ -165,4 +165,4 @@ def display(img, out, thresh=0.5): plt.show() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) -display(image, tvm_output.asnumpy()[0], thresh=0.45) \ No newline at end of file +display(image, tvm_output.asnumpy()[0], thresh=0.45) From 742376221ba4a79730e834a6a4b4023033e5b9da Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 17 Jan 2019 22:16:24 -0800 Subject: [PATCH 22/43] Fix ctx_list --- nnvm/tests/python/compiler/test_top_level4.py | 82 ++++++++++--------- tutorials/nnvm/deploy_ssd.py | 2 +- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index 38646b01a4c9..87620c8b3acf 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -528,14 +528,13 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), if clip: np_out = np.clip(np_out, 0, 1) - target = "llvm" - ctx = tvm.cpu() - graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input("data", np.random.uniform(size=dshape).astype(dtype)) - m.run() - out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) - tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) + for target, ctx in ctx_list(): + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input("data", np.random.uniform(size=dshape).astype(dtype)) + m.run() + out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) def test_multibox_prior(): verify_multibox_prior((1, 3, 50, 50)) @@ -562,17 +561,18 @@ def test_multibox_transform_loc(): [0, 0.44999999, 1, 1, 1, 1], [0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]]) - target = "llvm" dtype = "float32" - ctx = tvm.cpu() - graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes), - "loc_preds": (batch_size, num_anchors * 4), - "anchors": (1, num_anchors, 4)}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)}) - m.run() - out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype)) - tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5) + for target, ctx in ctx_list(): + if target == "cuda": + continue + graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes), + "loc_preds": (batch_size, num_anchors * 4), + "anchors": (1, num_anchors, 4)}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)}) + m.run() + out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5) def verify_get_valid_counts(dshape, score_threshold): dtype = "float32" @@ -594,19 +594,20 @@ def verify_get_valid_counts(dshape, score_threshold): for k in range(elem_length): np_out2[i, j, k] = -1 - target = "llvm" - ctx = tvm.cpu() - data = sym.Variable("data", dtype=dtype) - valid_counts, inter_data = sym.get_valid_counts(data, score_threshold=score_threshold) - out = sym.Group([valid_counts, inter_data]) - graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input("data", np_data) - m.run() - out1 = m.get_output(0, tvm.nd.empty(np_out1.shape, "int32")) - out2 = m.get_output(1, tvm.nd.empty(dshape, dtype)) - tvm.testing.assert_allclose(out1.asnumpy(), np_out1, rtol=1e-3) - tvm.testing.assert_allclose(out2.asnumpy(), np_out2, rtol=1e-3) + for target, ctx in ctx_list(): + if target == "cuda": + continue + data = sym.Variable("data", dtype=dtype) + valid_counts, inter_data = sym.get_valid_counts(data, score_threshold=score_threshold) + out = sym.Group([valid_counts, inter_data]) + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input("data", np_data) + m.run() + out1 = m.get_output(0, tvm.nd.empty(np_out1.shape, "int32")) + out2 = m.get_output(1, tvm.nd.empty(dshape, dtype)) + tvm.testing.assert_allclose(out1.asnumpy(), np_out1, rtol=1e-3) + tvm.testing.assert_allclose(out2.asnumpy(), np_out2, rtol=1e-3) def test_get_valid_counts(): @@ -633,15 +634,16 @@ def test_nms(): [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) - target = "llvm" - ctx = tvm.cpu() - graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)}, - dtype={"data": "float32", "valid_count": "int32"}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input(**{"data": np_data, "valid_count": np_valid_count}) - m.run() - out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32")) - tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5) + for target, ctx in ctx_list(): + if target == "cuda": + continue + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)}, + dtype={"data": "float32", "valid_count": "int32"}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"data": np_data, "valid_count": np_valid_count}) + m.run() + out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32")) + tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5) def np_slice_like(np_data, np_shape_like, axis=[]): begin_idx = [0 for _ in np_data.shape] diff --git a/tutorials/nnvm/deploy_ssd.py b/tutorials/nnvm/deploy_ssd.py index 1a71c96eaa0c..f7e3b19f9767 100644 --- a/tutorials/nnvm/deploy_ssd.py +++ b/tutorials/nnvm/deploy_ssd.py @@ -165,4 +165,4 @@ def display(img, out, thresh=0.5): plt.show() image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) -display(image, tvm_output.asnumpy()[0], thresh=0.45) +display(image, tvm_output.asnumpy()[0], thresh=0.45) \ No newline at end of file From b3c8a7c32a34b34883d32978b38ef758cd76fbea Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 18 Jan 2019 10:27:49 -0800 Subject: [PATCH 23/43] Add install gluoncv --- nnvm/tests/python/compiler/test_top_level4.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index 87620c8b3acf..f3c297e5f6e9 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -529,6 +529,8 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), np_out = np.clip(np_out, 0, 1) for target, ctx in ctx_list(): + if target == "cuda": + continue graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) m = graph_runtime.create(graph, lib, ctx) m.set_input("data", np.random.uniform(size=dshape).astype(dtype)) From 986c4f7b4cf02b2317dcea4c8a5c9b194e45515f Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 27 Jan 2019 21:38:50 -0800 Subject: [PATCH 24/43] Disable box_nms in frontend test --- tests/python/frontend/mxnet/test_forward.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py index b6646b3c2a2f..fb975c11add0 100644 --- a/tests/python/frontend/mxnet/test_forward.py +++ b/tests/python/frontend/mxnet/test_forward.py @@ -257,6 +257,7 @@ def verify(start, stop, step): verify(20, 1, -1) verify(20, 1, -1.5) +<<<<<<< HEAD <<<<<<< HEAD def _mx_symbol(F, op_name, inputs): op = getattr(F, op_name) From e7df94c3d024fe842bfea63af8c265a20592e600 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 28 Jan 2019 11:59:24 -0800 Subject: [PATCH 25/43] Fix test get_valid_counts numpy result --- topi/tests/python/test_topi_vision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index fd52f13795a1..d7dbab0bca4e 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -22,7 +22,7 @@ def verify_get_valid_counts(dshape, score_threshold): inter_idx = 0 for j in range(num_anchor): score = np_data[i, j, 1] - if score >= score_threshold: + if score > score_threshold: for k in range(elem_length): np_out2[i, inter_idx, k] = np_data[i, j, k] np_out1[i] += 1 From 45b6aacf3b9ed6229105d2df31edb97b2dc8fa8a Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 31 Jan 2019 12:07:48 -0800 Subject: [PATCH 26/43] Rename ssd tutorial --- tutorials/nnvm/{deploy_ssd.py => deploy_ssd_gluoncv.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tutorials/nnvm/{deploy_ssd.py => deploy_ssd_gluoncv.py} (100%) diff --git a/tutorials/nnvm/deploy_ssd.py b/tutorials/nnvm/deploy_ssd_gluoncv.py similarity index 100% rename from tutorials/nnvm/deploy_ssd.py rename to tutorials/nnvm/deploy_ssd_gluoncv.py From 26ece34043173adf184591c6409d4d1f2b14847a Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 31 Jan 2019 12:14:58 -0800 Subject: [PATCH 27/43] Fix rebase --- tutorials/nnvm/deploy_ssd_gluoncv.py | 200 ++++++++++----------------- tutorials/nnvm/deploy_ssd_mxnet.py | 168 ++++++++++++++++++++++ 2 files changed, 240 insertions(+), 128 deletions(-) create mode 100644 tutorials/nnvm/deploy_ssd_mxnet.py diff --git a/tutorials/nnvm/deploy_ssd_gluoncv.py b/tutorials/nnvm/deploy_ssd_gluoncv.py index f7e3b19f9767..d83d1f86b75e 100644 --- a/tutorials/nnvm/deploy_ssd_gluoncv.py +++ b/tutorials/nnvm/deploy_ssd_gluoncv.py @@ -1,26 +1,20 @@ """ Deploy Single Shot Multibox Detector(SSD) model =============================================== -**Author**: `Yao Wang `_, \ -`Leyuan Wang `_ +**Author**: `Yao Wang `_ This article is an introductory tutorial to deploy SSD models with TVM. -We will use mxnet pretrained SSD model with Resnet50 as body network and -convert it to NNVM graph; +We will use GluonCV pre-trained SSD model and convert it to NNVM graph. """ -import os -import zipfile import tvm -import mxnet as mx -import cv2 -import numpy as np +from matplotlib import pyplot as plt from nnvm import compiler from nnvm.frontend import from_mxnet +from nnvm.testing.config import ctx_list from tvm import relay -from tvm.contrib.download import download from tvm.contrib import graph_runtime -from mxnet.model import load_checkpoint +from gluoncv import model_zoo, data, utils ###################################################################### @@ -33,136 +27,86 @@ # echo "set(USE_SORT ON)" > config.mk # make -j8 # - -model_name = "ssd_resnet50_512" -model_file = "%s.zip" % model_name -test_image = "dog.jpg" +# .. note:: +# +# Currently we support compiling SSD on CPU only. +# GPU support is in progress. +# +# To get best inference performance on CPU, change +# target argument according to your device and +# follow the :ref:`tune_nnvm_x86` to tune x86 CPU and +# :ref:`tune_nnvm_arm` for arm cpu. +# +# SSD with VGG as body network is not supported yet since +# x86 conv2d schedule doesn't support dilation. + +supported_model = [ + 'ssd_512_resnet18_v1_voc', + 'ssd_512_resnet18_v1_coco', + 'ssd_512_resnet50_v1_voc', + 'ssd_512_resnet50_v1_coco', + 'ssd_512_resnet101_v2_voc', + 'ssd_512_mobilenet1_0_voc', + 'ssd_512_mobilenet1_0_coco', +] + +model_name = "ssd_512_resnet50_v1_voc" dshape = (1, 3, 512, 512) dtype = "float32" - -# Target settings -# Use these commented settings to build for cuda. -#target = 'cuda' -#ctx = tvm.gpu(0) -# Use these commented settings to build for opencl. -#target = 'opencl' -#ctx = tvm.opencl(0) -target = "llvm" -ctx = tvm.cpu() +target_list = ctx_list() +frontend_list = ["nnvm", "relay"] ###################################################################### -# Download MXNet SSD pre-trained model and demo image -# --------------------------------------------------- -# Pre-trained model available at -# https://github.com/apache/incubator-\mxnet/tree/master/example/ssd - -model_url = "https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/" \ - "resnet50_ssd_512_voc0712_trainval.zip" -image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \ - "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg" -inference_symbol_folder = \ - "c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26" -inference_symbol_url = "https://gist.github.com/kevinthesun/c1904e900848df4548ce5dfb18c719c7/" \ - "archive/a28c4856c827fe766aa3da0e35bad41d44f0fb26.zip" - -dir = "ssd_model" -if not os.path.exists(dir): - os.makedirs(dir) -model_file_path = "%s/%s" % (dir, model_file) -test_image_path = "%s/%s" % (dir, test_image) -inference_symbol_path = "%s/inference_model.zip" % dir -download(model_url, model_file_path) -download(image_url, test_image_path) -download(inference_symbol_url, inference_symbol_path) - -zip_ref = zipfile.ZipFile(model_file_path, 'r') -zip_ref.extractall(dir) -zip_ref.close() -zip_ref = zipfile.ZipFile(inference_symbol_path) -zip_ref.extractall(dir) -zip_ref.close() +# Download and pre-process demo image + +im_fname = utils.download('https://github.com/dmlc/web-data/blob/master/' + + 'gluoncv/detection/street_small.jpg?raw=true', + path='street_small.jpg') +x, img = data.transforms.presets.ssd.load_test(im_fname, short=512) ###################################################################### # Convert and compile model with NNVM or Relay for CPU. -sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (dir, inference_symbol_folder)) -_, arg_params, aux_params = load_checkpoint("%s/%s" % (dir, model_name), 0) - -import argparse -parser = argparse.ArgumentParser() -parser.add_argument( - "-f", "--frontend", - help="Frontend for compilation, nnvm or relay", - type=str, - default="nnvm") -args = parser.parse_args() -if args.frontend == "relay": - net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \ - aux_params=aux_params) - with relay.build_config(opt_level=3): - graph, lib, params = relay.build(net, target, params=params) -elif args.frontend == "nnvm": - net, params = from_mxnet(sym, arg_params, aux_params) - with compiler.build_config(opt_level=3): - graph, lib, params = compiler.build( - net, target, {"data": dshape}, params=params) -else: - parser.print_help() - parser.exit() +block = model_zoo.get_model(model_name, pretrained=True) + +def compile(frontend, target): + if frontend == "relay": + net, params = relay.frontend.from_mxnet(block, {"data": dshape}) + with relay.build_config(opt_level=3): + graph, lib, params = relay.build(net, target, params=params) + else: + net, params = from_mxnet(block) + with compiler.build_config(opt_level=3): + graph, lib, params = compiler.build( + net, target, {"data": dshape}, params=params) + return graph, lib, params ###################################################################### # Create TVM runtime and do inference -# Preprocess image -image = cv2.imread(test_image_path) -img_data = cv2.resize(image, (dshape[2], dshape[3])) -img_data = img_data[:, :, (2, 1, 0)].astype(np.float32) -img_data -= np.array([123, 117, 104]) -img_data = np.transpose(np.array(img_data), (2, 0, 1)) -img_data = np.expand_dims(img_data, axis=0) -# Build TVM runtime -m = graph_runtime.create(graph, lib, ctx) -m.set_input('data', tvm.nd.array(img_data.astype(dtype))) -m.set_input(**params) -# execute -m.run() -# get outputs -tvm_output = m.get_output(0) - +def run(graph, lib, params, ctx): + # Build TVM runtime + m = graph_runtime.create(graph, lib, ctx) + tvm_input = tvm.nd.array(x.asnumpy(), ctx=ctx) + m.set_input('data', tvm_input) + m.set_input(**params) + # execute + m.run() + # get outputs + class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2) + return class_IDs, scores, bounding_boxs + +for target, ctx in target_list: + if target == "cuda": + print("GPU not supported yet, skip.") + continue + for frontend in frontend_list: + graph, lib, params = compile(frontend, target) + class_IDs, scores, bounding_boxs = run(graph, lib, params, ctx) ###################################################################### # Display result -class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", - "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", - "sheep", "sofa", "train", "tvmonitor"] -def display(img, out, thresh=0.5): - import random - import matplotlib as mpl - import matplotlib.pyplot as plt - mpl.rcParams['figure.figsize'] = (10, 10) - pens = dict() - plt.clf() - plt.imshow(img) - for det in out: - cid = int(det[0]) - if cid < 0: - continue - score = det[1] - if score < thresh: - continue - if cid not in pens: - pens[cid] = (random.random(), random.random(), random.random()) - scales = [img.shape[1], img.shape[0]] * 2 - xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)] - rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, - edgecolor=pens[cid], linewidth=3) - plt.gca().add_patch(rect) - text = class_names[cid] - plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score), - bbox=dict(facecolor=pens[cid], alpha=0.5), - fontsize=12, color='white') - plt.show() - -image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) -display(image, tvm_output.asnumpy()[0], thresh=0.45) \ No newline at end of file +ax = utils.viz.plot_bbox(img, bounding_boxs.asnumpy()[0], scores.asnumpy()[0], + class_IDs.asnumpy()[0], class_names=block.classes) +plt.show() diff --git a/tutorials/nnvm/deploy_ssd_mxnet.py b/tutorials/nnvm/deploy_ssd_mxnet.py new file mode 100644 index 000000000000..1a71c96eaa0c --- /dev/null +++ b/tutorials/nnvm/deploy_ssd_mxnet.py @@ -0,0 +1,168 @@ +""" +Deploy Single Shot Multibox Detector(SSD) model +=============================================== +**Author**: `Yao Wang `_, \ +`Leyuan Wang `_ + +This article is an introductory tutorial to deploy SSD models with TVM. +We will use mxnet pretrained SSD model with Resnet50 as body network and +convert it to NNVM graph; +""" +import os +import zipfile +import tvm +import mxnet as mx +import cv2 +import numpy as np + +from nnvm import compiler +from nnvm.frontend import from_mxnet +from tvm import relay +from tvm.contrib.download import download +from tvm.contrib import graph_runtime +from mxnet.model import load_checkpoint + + +###################################################################### +# Preliminary and Set parameters +# ------------------------------ +# We should build TVM with sort support, in TVM root directory +# +# .. code-block:: bash +# +# echo "set(USE_SORT ON)" > config.mk +# make -j8 +# + +model_name = "ssd_resnet50_512" +model_file = "%s.zip" % model_name +test_image = "dog.jpg" +dshape = (1, 3, 512, 512) +dtype = "float32" + +# Target settings +# Use these commented settings to build for cuda. +#target = 'cuda' +#ctx = tvm.gpu(0) +# Use these commented settings to build for opencl. +#target = 'opencl' +#ctx = tvm.opencl(0) +target = "llvm" +ctx = tvm.cpu() + +###################################################################### +# Download MXNet SSD pre-trained model and demo image +# --------------------------------------------------- +# Pre-trained model available at +# https://github.com/apache/incubator-\mxnet/tree/master/example/ssd + +model_url = "https://github.com/zhreshold/mxnet-ssd/releases/download/v0.6/" \ + "resnet50_ssd_512_voc0712_trainval.zip" +image_url = "https://cloud.githubusercontent.com/assets/3307514/20012567/" \ + "cbb60336-a27d-11e6-93ff-cbc3f09f5c9e.jpg" +inference_symbol_folder = \ + "c1904e900848df4548ce5dfb18c719c7-a28c4856c827fe766aa3da0e35bad41d44f0fb26" +inference_symbol_url = "https://gist.github.com/kevinthesun/c1904e900848df4548ce5dfb18c719c7/" \ + "archive/a28c4856c827fe766aa3da0e35bad41d44f0fb26.zip" + +dir = "ssd_model" +if not os.path.exists(dir): + os.makedirs(dir) +model_file_path = "%s/%s" % (dir, model_file) +test_image_path = "%s/%s" % (dir, test_image) +inference_symbol_path = "%s/inference_model.zip" % dir +download(model_url, model_file_path) +download(image_url, test_image_path) +download(inference_symbol_url, inference_symbol_path) + +zip_ref = zipfile.ZipFile(model_file_path, 'r') +zip_ref.extractall(dir) +zip_ref.close() +zip_ref = zipfile.ZipFile(inference_symbol_path) +zip_ref.extractall(dir) +zip_ref.close() + +###################################################################### +# Convert and compile model with NNVM or Relay for CPU. + +sym = mx.sym.load("%s/%s/ssd_resnet50_inference.json" % (dir, inference_symbol_folder)) +_, arg_params, aux_params = load_checkpoint("%s/%s" % (dir, model_name), 0) + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument( + "-f", "--frontend", + help="Frontend for compilation, nnvm or relay", + type=str, + default="nnvm") +args = parser.parse_args() +if args.frontend == "relay": + net, params = relay.frontend.from_mxnet(sym, {"data": dshape}, arg_params=arg_params, \ + aux_params=aux_params) + with relay.build_config(opt_level=3): + graph, lib, params = relay.build(net, target, params=params) +elif args.frontend == "nnvm": + net, params = from_mxnet(sym, arg_params, aux_params) + with compiler.build_config(opt_level=3): + graph, lib, params = compiler.build( + net, target, {"data": dshape}, params=params) +else: + parser.print_help() + parser.exit() + +###################################################################### +# Create TVM runtime and do inference + +# Preprocess image +image = cv2.imread(test_image_path) +img_data = cv2.resize(image, (dshape[2], dshape[3])) +img_data = img_data[:, :, (2, 1, 0)].astype(np.float32) +img_data -= np.array([123, 117, 104]) +img_data = np.transpose(np.array(img_data), (2, 0, 1)) +img_data = np.expand_dims(img_data, axis=0) +# Build TVM runtime +m = graph_runtime.create(graph, lib, ctx) +m.set_input('data', tvm.nd.array(img_data.astype(dtype))) +m.set_input(**params) +# execute +m.run() +# get outputs +tvm_output = m.get_output(0) + + +###################################################################### +# Display result + +class_names = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", + "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", + "sheep", "sofa", "train", "tvmonitor"] +def display(img, out, thresh=0.5): + import random + import matplotlib as mpl + import matplotlib.pyplot as plt + mpl.rcParams['figure.figsize'] = (10, 10) + pens = dict() + plt.clf() + plt.imshow(img) + for det in out: + cid = int(det[0]) + if cid < 0: + continue + score = det[1] + if score < thresh: + continue + if cid not in pens: + pens[cid] = (random.random(), random.random(), random.random()) + scales = [img.shape[1], img.shape[0]] * 2 + xmin, ymin, xmax, ymax = [int(p * s) for p, s in zip(det[2:6].tolist(), scales)] + rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, + edgecolor=pens[cid], linewidth=3) + plt.gca().add_patch(rect) + text = class_names[cid] + plt.gca().text(xmin, ymin-2, '{:s} {:.3f}'.format(text, score), + bbox=dict(facecolor=pens[cid], alpha=0.5), + fontsize=12, color='white') + plt.show() + +image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) +display(image, tvm_output.asnumpy()[0], thresh=0.45) From 2f8aef8653e3afec343dbf729b579ea8fbc6f04f Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 19 Feb 2019 13:01:45 +0800 Subject: [PATCH 28/43] Refactor nms --- topi/python/topi/cuda/nms.py | 9 ++++-- topi/python/topi/vision/nms.py | 39 ++++++++++++++++--------- topi/python/topi/vision/ssd/multibox.py | 2 +- topi/tests/python/test_topi_vision.py | 15 ++++++++-- 4 files changed, 46 insertions(+), 19 deletions(-) diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 900a8e856ddb..26dc5704d9d9 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -182,8 +182,8 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): @nms.register(["cuda", "gpu"]) -def nms_gpu(data, valid_count, iou_threshold=0.5, force_suppress=False, - topk=-1, id_index=0, do_rearrange=False): +def nms_gpu(data, valid_count, return_indices, iou_threshold=0.5, force_suppress=False, + topk=-1, id_index=0, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. Parameters @@ -196,6 +196,9 @@ def nms_gpu(data, valid_count, iou_threshold=0.5, force_suppress=False, valid_count : tvm.Tensor 1-D tensor for valid number of boxes. + return_indices : boolean + Whether to return box indices in input data. + iou_threshold : optional, float Non-maximum suppression threshold. @@ -208,7 +211,7 @@ def nms_gpu(data, valid_count, iou_threshold=0.5, force_suppress=False, id_index : optional, int index of the class categories, -1 to disable. - do_rearrange : optional, boolean + invalid_to_bottom : optional, boolean Whether to move all valid bounding boxes to the top. Returns diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 66a4b0df9c30..9c8fae23fa93 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -143,16 +143,21 @@ def hybrid_nms(data, sorted_index, valid_count, Returns ------- - valid_count : tvm.Tensor or numpy NDArray - 1-D tensor for valid number of boxes. + output : tvm.Tensor + 3-D tensor with shape [batch_size, num_anchors, 6]. + + box_indices: tvm.Tensor + 2-D tensor with shape [batch_size, num_anchors]. """ batch_size = data.shape[0] num_anchors = data.shape[1] box_data_length = data.shape[2] + box_indices = output_tensor((batch_size, num_anchors), "int32") output = output_tensor((batch_size, num_anchors, box_data_length,), data.dtype) + for i in parallel(batch_size): if iou_threshold > 0: if valid_count[i] > 0: @@ -163,10 +168,12 @@ def hybrid_nms(data, sorted_index, valid_count, for j in range(nkeep): for k in range(box_data_length): output[i, j, k] = data[i, sorted_index[i, j], k] + box_indices[i, j] = sorted_index[i, j] if 0 < topk < valid_count[i]: for j in range(valid_count[i] - nkeep): for k in range(box_data_length): output[i, j + nkeep, k] = -1.0 + box_indices[i, j + nkeep] = -1 # Apply nms for j in range(valid_count[i]): if output[i, j, 0] >= 0: @@ -197,20 +204,23 @@ def hybrid_nms(data, sorted_index, valid_count, iou = 0.0 if u <= 0.0 else area / u if iou >= iou_threshold: output[i, k, 0] = -1.0 + box_indices[i, k] = -1 else: for j in range(valid_count[i]): for k in range(box_data_length): output[i, j, k] = data[i, j, k] + box_indices[i, j] = j # Set invalid entry to be -1 for j in range(num_anchors - valid_count[i]): for k in range(box_data_length): output[i, j + valid_count[i], k] = -1.0 - return output + box_indices[i, j + valid_count[i]] = -1 + return output, box_indices @tvm.target.generic_func -def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, - topk=-1, id_index=0, do_rearrange=False): +def nms(data, valid_count, return_indices, iou_threshold=0.5, force_suppress=False, + topk=-1, id_index=0, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. Parameters @@ -223,6 +233,9 @@ def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, valid_count : tvm.Tensor 1-D tensor for valid number of boxes. + return_indices : boolean + Whether to return box indices in input data. + iou_threshold : optional, float Non-maximum suppression threshold. @@ -235,7 +248,7 @@ def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, id_index : optional, int index of the class categories, -1 to disable. - do_rearrange : optional, boolean + invalid_to_bottom : optional, boolean Whether to move all valid bounding boxes to the top. Returns @@ -288,12 +301,12 @@ def nms(data, valid_count, iou_threshold=0.5, force_suppress=False, in_buffers=[score_tensor_buf, valid_count_buf], out_buffers=sort_tensor_buf, name="nms_sort") - out = hybrid_nms(data, sort_tensor, valid_count, - tvm.const(iou_threshold, dtype="float32"), - tvm.const(force_suppress, dtype="bool"), - tvm.const(topk, dtype="int32"), - tvm.const(id_index, dtype="int32")) - if do_rearrange: + out, box_indices = hybrid_nms(data, sort_tensor, valid_count, + tvm.const(iou_threshold, dtype="float32"), + tvm.const(force_suppress, dtype="bool"), + tvm.const(topk, dtype="int32"), + tvm.const(id_index, dtype="int32")) + if not return_indices and invalid_to_bottom: out = hybrid_rearrange_out(out) - return out + return box_indices if return_indices else out diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index f20a286960cc..c663d3873587 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -292,5 +292,5 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = nms(inter_out[0], inter_out[1], nms_threshold, force_suppress, nms_topk) + out = nms(inter_out[0], inter_out[1], False, nms_threshold, force_suppress, nms_topk) return out diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index d7dbab0bca4e..1f50199fcfba 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -63,6 +63,7 @@ def test_get_valid_counts(): def test_nms(): dshape = (1, 5, 6) + indices_dshape = (1, 5) data = tvm.placeholder(dshape, name="data") valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") nms_threshold = 0.7 @@ -76,6 +77,7 @@ def test_nms(): np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) + np_indices_result = np.array([[3, 0, -1, -1, -1]]) def check_device(device): ctx = tvm.context(device, 0) @@ -85,18 +87,27 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): if device == 'llvm': - out = nms(data, valid_count, nms_threshold, force_suppress, nms_topk) + out = nms(data, valid_count, False, nms_threshold, force_suppress, nms_topk) + indices_out = nms(data, valid_count, True, nms_threshold, force_suppress, nms_topk) else: - out = topi.cuda.nms(data, valid_count, nms_threshold, force_suppress, nms_topk) + out = topi.cuda.nms(data, valid_count, False, nms_threshold, force_suppress, nms_topk) + indices_out = topi.cuda.nms(data, valid_count, True, nms_threshold, force_suppress, nms_topk) s = topi.generic.schedule_nms(out) + indices_s = topi.generic.schedule_nms(indices_out) tvm_data = tvm.nd.array(np_data, ctx) tvm_valid_count = tvm.nd.array(np_valid_count, ctx) + tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data.dtype), ctx) f = tvm.build(s, [data, valid_count, out], device) f(tvm_data, tvm_valid_count, tvm_out) tvm.testing.assert_allclose(tvm_out.asnumpy(), np_result, rtol=1e-4) + tvm_indices_out = tvm.nd.array(np.zeros(indices_dshape, dtype="int32"), ctx) + f = tvm.build(indices_s, [data, valid_count, indices_out], device) + f(tvm_data, tvm_valid_count, tvm_indices_out) + tvm.testing.assert_allclose(tvm_indices_out.asnumpy(), np_indices_result, rtol=1e-4) + for device in ['llvm']: check_device(device) From d30be86f743208839ef4bcd1918bce9198a0e6bb Mon Sep 17 00:00:00 2001 From: Wang Date: Tue, 19 Feb 2019 14:11:43 +0800 Subject: [PATCH 29/43] Rollback nnvm --- nnvm/include/nnvm/top/nn.h | 33 ++--- nnvm/include/nnvm/top/tensor.h | 15 -- nnvm/python/nnvm/frontend/mxnet.py | 77 ++-------- nnvm/python/nnvm/top/transform.py | 4 - nnvm/python/nnvm/top/vision.py | 35 ++--- nnvm/src/top/tensor/elemwise.cc | 102 ------------- nnvm/src/top/tensor/transform.cc | 85 ----------- nnvm/src/top/vision/nms.cc | 66 ++------- nnvm/tests/python/compiler/test_top_level4.py | 136 +++++------------- .../python/frontend/mxnet/test_forward.py | 1 + topi/python/topi/cuda/nms.py | 4 +- topi/python/topi/cuda/ssd/multibox.py | 4 +- topi/python/topi/vision/nms.py | 4 +- topi/python/topi/vision/ssd/multibox.py | 5 +- topi/tests/python/test_topi_vision.py | 14 +- 15 files changed, 89 insertions(+), 496 deletions(-) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 82f3230b4931..69d81a98cb4c 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -402,9 +402,9 @@ struct LayoutTransformParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(LayoutTransformParam) { DMLC_DECLARE_FIELD(src_layout).set_default("__undef__") - .describe("Dimension ordering of data"); + .describe("Dimension ordering of data"); DMLC_DECLARE_FIELD(dst_layout).set_default("__undef__") - .describe("Dimension ordering of data."); + .describe("Dimension ordering of data."); } }; @@ -419,13 +419,13 @@ struct MultiBoxPriorParam : public dmlc::Parameter { DMLC_DECLARE_FIELD(sizes).set_default(Tuple({1.0})) .describe("List of sizes of generated MultiBoxPriores."); DMLC_DECLARE_FIELD(ratios).set_default(Tuple({1.0})) - .describe("List of aspect ratios of generated MultiBoxPriores."); + .describe("List of aspect ratios of generated MultiBoxPriores."); DMLC_DECLARE_FIELD(steps).set_default(Tuple({-1.0, -1.0})) - .describe("Priorbox step across y and x, -1 for auto calculation."); + .describe("Priorbox step across y and x, -1 for auto calculation."); DMLC_DECLARE_FIELD(offsets).set_default(Tuple({0.5, 0.5})) - .describe("Priorbox center offsets, y and x respectively."); + .describe("Priorbox center offsets, y and x respectively."); DMLC_DECLARE_FIELD(clip).set_default(false) - .describe("Whether to clip out-of-boundary boxes."); + .describe("Whether to clip out-of-boundary boxes."); } }; @@ -443,31 +443,20 @@ struct MultiBoxTransformLocParam : public dmlc::Parameter { - float score_threshold; - DMLC_DECLARE_PARAMETER(GetValidCountsParam) { - DMLC_DECLARE_FIELD(score_threshold).set_default(0.0) - .describe("Lower limit of score for valid bounding boxes."); - } -}; - struct NMSParam : public dmlc::Parameter { + bool return_indices; float iou_threshold; bool force_suppress; int topk; - int id_index; - bool do_rearrange; DMLC_DECLARE_PARAMETER(NMSParam) { + DMLC_DECLARE_FIELD(return_indices) + .describe("Whether to return box indices in input data."); DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) - .describe("Suppress all detections regardless of class_id."); + .describe("Suppress all detections regardless of class_id."); DMLC_DECLARE_FIELD(topk).set_default(-1) - .describe("Keep maximum top k detections before nms, -1 for no limit."); - DMLC_DECLARE_FIELD(id_index).set_default(0) - .describe("Axis index for id."); - DMLC_DECLARE_FIELD(do_rearrange).set_default(false) - .describe("Whether to move all valid bounding boxes to the top."); + .describe("Keep maximum top k detections before nms, -1 for no limit."); } }; diff --git a/nnvm/include/nnvm/top/tensor.h b/nnvm/include/nnvm/top/tensor.h index dc3c23a6198b..bed1b05984da 100644 --- a/nnvm/include/nnvm/top/tensor.h +++ b/nnvm/include/nnvm/top/tensor.h @@ -74,21 +74,6 @@ struct StridedSliceParam : public dmlc::Parameter { } }; -struct SliceAxisParam : public dmlc::Parameter { - int axis; - int begin; - int end; - - DMLC_DECLARE_PARAMETER(SliceAxisParam) { - DMLC_DECLARE_FIELD(axis) - .describe("Axis along which to be sliced."); - DMLC_DECLARE_FIELD(begin) - .describe("Index for begin of slice"); - DMLC_DECLARE_FIELD(end).set_default(0) - .describe("Index for end of the slice"); - } -}; - enum TypeFlag { kFloat32 = 0, kFloat64 = 1, diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py index dffc8d960c88..2df67d9967ca 100644 --- a/nnvm/python/nnvm/frontend/mxnet.py +++ b/nnvm/python/nnvm/frontend/mxnet.py @@ -238,18 +238,18 @@ def _clip(inputs, attrs): def _contrib_multibox_detection(inputs, attrs): clip = _parse_bool_str(attrs, 'clip', default='True') - threshold = attrs.get('threshold', 0.01) - iou_threshold = attrs.get('nms_threshold', 0.5) + threshold = attrs.get('threshold') or 0.01 + nms_threshold = attrs.get('nms_threshold') or 0.5 force_suppress = _parse_bool_str(attrs, 'force_suppress', default='False') variances = tuple([float(x.strip()) for x in attrs.get('variances').strip('()').split(',')]) \ if attrs.get('variances') is not None else (0.1, 0.1, 0.2, 0.2) - topk = attrs.get('nms_topk', -1) + nms_topk = attrs.get('nms_topk') or -1 new_attrs0 = {'clip': clip, 'threshold': float(threshold), 'variances': variances} - new_attrs1 = {'iou_threshold': float(iou_threshold), 'force_suppress': force_suppress, - 'topk': int(topk)} + new_attrs1 = {'return_indices': False, 'iou_threshold': float(nms_threshold), + 'force_suppress': force_suppress, 'topk': int(nms_topk)} data, valid_count = _get_nnvm_op('multibox_transform_loc')(inputs[0], inputs[1], inputs[2], **new_attrs0) - return _get_nnvm_op('nms')(data, valid_count, **new_attrs1) + return _get_nnvm_op('non_max_suppression')(data, valid_count, **new_attrs1) def _elemwise_sum(inputs, _): new_attrs = {'num_args':len(inputs)} @@ -314,57 +314,6 @@ def _argmin(inputs, attrs): new_attrs['keepdims'] = _parse_bool_str(attrs, 'keepdims', default="False") return _get_nnvm_op(op_name)(*inputs, **new_attrs) -def _contrib_box_nms(inputs, attrs): - force_suppress = _parse_bool_str(attrs, 'force_suppress', default="False") - overlap_thresh = attrs.get('overlap_thresh', 0.5) - topk = attrs.get('topk', -1) - valid_thresh = attrs.get('valid_thresh', 0) - coord_start = attrs.get('coord_start', 2) - score_index = attrs.get('score_index', 1) - id_index = attrs.get('id_index', -1) - in_format = attrs.get('in_format', 'corner') - out_format = attrs.get('out_format', 'corner') - if int(coord_start) != 2: - _raise_not_supported('coord_start: %s' % coord_start, 'box_nms') - if int(score_index) != 1: - _raise_not_supported('score_index: %s' % score_index, 'box_nms') - if int(id_index) != -1 and int(id_index) != 0: - _raise_not_supported('id_index: %s' % id_index, 'box_nms') - if in_format != 'corner': - _raise_not_supported('in_format: %s' % in_format, 'box_nms') - if out_format != 'corner': - _raise_not_supported('out_format: %s' % out_format, 'box_nms') - - valid_counts, inter_out = \ - _get_nnvm_op('get_valid_counts')(inputs[0], score_threshold=valid_thresh) - nms_out = _get_nnvm_op('nms')(inter_out, valid_counts, - iou_threshold=overlap_thresh, - force_suppress=force_suppress, - topk=topk, id_index=id_index, - do_rearrange=True) - return nms_out - -def _slice_like(inputs, attrs): - op_name = 'slice_like' - axis = attrs.get('axes', ()) - return _get_nnvm_op(op_name)(inputs[0], inputs[1], axis=axis) - -def _slice_axis(inputs, attrs): - op_name, new_attrs = 'slice_axis', {} - new_attrs['axis'] = attrs.get('axis') - new_attrs['begin'] = attrs.get('begin') - new_attrs['end'] = 0 if attrs.get('end') == "None" else attrs.get('end') - return _get_nnvm_op(op_name)(inputs[0], **new_attrs) - -def _l2_normalize(inputs, attrs): - op_name, new_attrs = 'l2_normalize', {} - mode = attrs.get('mode', 'instance') - if mode != 'channel': - _raise_not_supported('mode: %s' % mode, 'L2Normalization') - new_attrs['eps'] = attrs.get('eps', 1e-10) - new_attrs['axis'] = 1 - return _get_nnvm_op(op_name)(inputs[0], **new_attrs) - _identity_list = ['__add_scalar__', '__add_symbol__', '__div_scalar__', '__div_symbol__', '__mul_scalar__', '__mul_symbol__', '__pow_scalar__', '__rdiv_scalar__', '__rpow_scalar__', @@ -373,9 +322,9 @@ def _l2_normalize(inputs, attrs): 'broadcast_sub', 'broadcast_to', 'cast', 'elemwise_add', 'elemwise_div', 'elemwise_mul', 'elemwise_sub', 'exp', 'flatten', 'log', 'log_softmax', 'max', 'min', 'negative', - 'ones_like', 'relu', 'sigmoid', 'softmax', + 'ones_like', 'relu', 'sigmoid', 'slice_like', 'softmax', 'sum', 'tanh', 'transpose', 'zeros_like', 'gather_nd', - 'reshape_like', 'where'] + 'reshape_like'] _convert_map = { '_copy' : _rename('copy'), @@ -385,13 +334,6 @@ def _l2_normalize(inputs, attrs): '_plus_scalar' : _rename('__add_scalar__'), '_rdiv_scalar' : _rename('__rdiv_scalar__'), '_rminus_scalar': _rename('__rsub_scalar__'), - '_equal_scalar' : _rename('__equal_scalar__'), - '_not_equal_scalar': _rename('__not_equal_scalar__'), - '_greater_scalar': _rename('__greater_scalar__'), - '_greater_equal_scalar': _rename('__greater_equal_scalar__'), - '_less_scalar': _rename('__less_scalar__'), - '_less_equal_scalar': _rename('__less_equal_scalar__'), - '_contrib_box_nms' : _contrib_box_nms, '_contrib_MultiBoxPrior' : _rename('multibox_prior'), '_contrib_MultiBoxDetection' : _contrib_multibox_detection, '_minimum' : _minimum, @@ -413,14 +355,11 @@ def _l2_normalize(inputs, attrs): 'Flatten' : _rename('flatten'), 'FullyConnected': _dense, 'LeakyReLU' : _leaky_relu, - 'L2Normalization' : _l2_normalize, 'Pooling' : _pooling, 'Pooling_v1' : _pooling, 'Reshape' : _reshape, 'slice' : _slice, 'SliceChannel' : _split, - 'slice_axis' : _slice_axis, - 'slice_like' : _slice_like, 'split' : _split, 'Softmax' : _rename('softmax'), 'SoftmaxActivation' : _softmax_activation, diff --git a/nnvm/python/nnvm/top/transform.py b/nnvm/python/nnvm/top/transform.py index d6c85ea283d3..8fde9632a8af 100644 --- a/nnvm/python/nnvm/top/transform.py +++ b/nnvm/python/nnvm/top/transform.py @@ -83,10 +83,6 @@ def schedule_concatenate(_, outs, target): reg.register_pattern("slice_like", OpPattern.INJECTIVE) reg.register_schedule("slice_like", _fschedule_injective) -# slice_axis -reg.register_pattern("slice_axis", OpPattern.INJECTIVE) -reg.register_schedule("slice_axis", _fschedule_injective) - # where reg.register_pattern("where", OpPattern.INJECTIVE) reg.register_schedule("where", _fschedule_injective) diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index cb69b897a7be..c369fee5f6e7 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -58,41 +58,24 @@ def compute_multibox_transform_loc(attrs, inputs, _): return topi.vision.ssd.multibox_transform_loc(inputs[0], inputs[1], inputs[2], clip, threshold, variance) -reg.register_pattern("multibox_transform_loc", OpPattern.OPAQUE) - -# Get valid number of anchor boxes -@reg.register_schedule("get_valid_counts") -def schedule_get_valid_counts(_, outs, target): - """Schedule definition of get_valid_counts""" - with tvm.target.create(target): - return topi.generic.schedule_get_valid_counts(outs) - -@reg.register_compute("get_valid_counts") -def compute_get_valid_counts(attrs, inputs, _): - """Compute definition of get_valid_counts""" - score_threshold = attrs.get_float("score_threshold") - return topi.vision.get_valid_counts(inputs[0], score_threshold) - -reg.register_pattern("get_valid_counts", OpPattern.OPAQUE) +reg.register_pattern("multibox_detection", OpPattern.OPAQUE) # non-maximum suppression -@reg.register_schedule("nms") +@reg.register_schedule("non_max_suppression") def schedule_nms(_, outs, target): - """Schedule definition of nms""" + """Schedule definition of non_max_suppression""" with tvm.target.create(target): return topi.generic.schedule_nms(outs) -@reg.register_compute("nms") +@reg.register_compute("non_max_suppression") def compute_nms(attrs, inputs, _): - """Compute definition of nms""" + """Compute definition of non_max_suppression""" + return_indices = attrs.get_bool('return_indices') iou_threshold = attrs.get_float('iou_threshold') force_suppress = attrs.get_bool('force_suppress') topk = attrs.get_int('topk') - id_index = attrs.get_int('id_index') - do_rearrange = attrs.get_bool('do_rearrange') - return topi.vision.nms(inputs[0], inputs[1], iou_threshold, - force_suppress, topk, id_index, - do_rearrange) + return topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, + iou_threshold, force_suppress, topk) -reg.register_pattern("nms", OpPattern.OPAQUE) +reg.register_pattern("non_max_suppression", OpPattern.OPAQUE) diff --git a/nnvm/src/top/tensor/elemwise.cc b/nnvm/src/top/tensor/elemwise.cc index 5a39f3ecc392..2d9813e22131 100644 --- a/nnvm/src/top/tensor/elemwise.cc +++ b/nnvm/src/top/tensor/elemwise.cc @@ -806,108 +806,6 @@ NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__rpow_scalar__) }; }); -NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__equal_scalar__) -.describe(R"code(Tensor equal scalar - -)code" NNVM_ADD_FILELINE) -.set_support_level(3) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - Tensor out = topi::cast( - binary_scalar_op(attrs, inputs[0], - [](Expr x, Expr y) { return x == y; }), - out_info[0]->dtype); - return Array{ out }; -}) -.set_attr("FGradient", MakeZeroGradNodes); - -NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__not_equal_scalar__) -.describe(R"code(Tensor not equal scalar - -)code" NNVM_ADD_FILELINE) -.set_support_level(3) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - Tensor out = topi::cast( - binary_scalar_op(attrs, inputs[0], - [](Expr x, Expr y) { return x != y; }), - out_info[0]->dtype); - return Array{ out }; -}) -.set_attr("FGradient", MakeZeroGradNodes); - -NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__greater_scalar__) -.describe(R"code(Tensor greater scalar - -)code" NNVM_ADD_FILELINE) -.set_support_level(3) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - Tensor out = topi::cast( - binary_scalar_op(attrs, inputs[0], - [](Expr x, Expr y) { return x > y; }), - out_info[0]->dtype); - return Array{ out }; -}) -.set_attr("FGradient", MakeZeroGradNodes); - -NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__greater_equal_scalar__) -.describe(R"code(Tensor greater equal scalar - -)code" NNVM_ADD_FILELINE) -.set_support_level(3) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - Tensor out = topi::cast( - binary_scalar_op(attrs, inputs[0], - [](Expr x, Expr y) { return x >= y; }), - out_info[0]->dtype); - return Array{ out }; -}) -.set_attr("FGradient", MakeZeroGradNodes); - -NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__less_scalar__) -.describe(R"code(Tensor less scalar - -)code" NNVM_ADD_FILELINE) -.set_support_level(3) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - Tensor out = topi::cast( - binary_scalar_op(attrs, inputs[0], - [](Expr x, Expr y) { return x < y; }), - out_info[0]->dtype); - return Array{ out }; -}) -.set_attr("FGradient", MakeZeroGradNodes); - -NNVM_REGISTER_ELEMWISE_BINARY_SCALAR(__less_equal_scalar__) -.describe(R"code(Tensor less equal scalar - -)code" NNVM_ADD_FILELINE) -.set_support_level(3) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - Tensor out = topi::cast( - binary_scalar_op(attrs, inputs[0], - [](Expr x, Expr y) { return x <= y; }), - out_info[0]->dtype); - return Array{ out }; -}) -.set_attr("FGradient", MakeZeroGradNodes); - DMLC_REGISTER_PARAMETER(ElementWiseReduceParam); NNVM_REGISTER_ELEMWISE_REDUCE_OP(elemwise_sum) diff --git a/nnvm/src/top/tensor/transform.cc b/nnvm/src/top/tensor/transform.cc index 4f09062ac607..9d259ae77d9b 100644 --- a/nnvm/src/top/tensor/transform.cc +++ b/nnvm/src/top/tensor/transform.cc @@ -1283,91 +1283,6 @@ NNVM_REGISTER_OP(slice_like) }) .set_support_level(4); -// SliceAxis -DMLC_REGISTER_PARAMETER(SliceAxisParam); - -inline bool SliceAxisShape(const nnvm::NodeAttrs& attrs, - std::vector* in_attrs, - std::vector* out_attrs) { - const SliceAxisParam& param = nnvm::get(attrs.parsed); - const TShape& src_shape = in_attrs->at(0); - int axis = param.axis; - int begin = param.begin; - int end = param.end; - - if (axis < 0) { - axis += src_shape.ndim(); - } - if (begin < 0) { - begin += src_shape[axis]; - } - if (end <= 0) { - end += src_shape[axis]; - } - CHECK_LT(begin, end) - << "Begin index must be smaller than end index: " - << begin << " vs " << end; - - TShape out_shape(src_shape); - out_shape[axis] = end - begin; - NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, out_shape); - return true; -} - -NNVM_REGISTER_OP(slice_axis) -.describe(R"code(Slices along a given axis. -Returns an array slice along a given axis starting from -the begin index to the end index. -)code" NNVM_ADD_FILELINE) -.add_argument("data", "Tensor", "Input data to be sliced.") -.set_num_outputs(1) -.set_num_inputs(1) -.add_arguments(SliceAxisParam::__FIELDS__()) -.set_attr_parser(ParamParser) -.set_attr("FGetAttrDict", ParamGetAttrDict) -.set_attr("FInferShape", SliceAxisShape) -.set_attr("FInferType", ElemwiseType<1, 1>) -.set_attr("FCorrectLayout", ElemwiseArbitraryLayout<1, 1>) -.set_attr( - "FTVMCompute", [](const NodeAttrs& attrs, - const Array& inputs, - const Array& out_info) { - const SliceAxisParam& param = nnvm::get(attrs.parsed); - const Array src_shape = inputs[0]->shape; - Array begin_idx, end_idx, strides; - int axis = param.axis; - int begin = param.begin; - int end = param.end; - - if (axis < 0) { - axis += src_shape.size(); - } - if (begin < 0) { - begin += topi::GetConstInt(src_shape[axis]); - } - if (end <= 0) { - end += topi::GetConstInt(src_shape[axis]); - } - for (size_t i = 0; i < src_shape.size(); ++i) { - begin_idx.push_back(make_const(tvm::Int(32), 0)); - strides.push_back(make_const(tvm::Int(32), 1)); - } - end_idx = Array(src_shape); - begin_idx.Set(axis, make_const(tvm::Int(32), begin)); - end_idx.Set(axis, make_const(tvm::Int(32), end)); - - return Array{ - topi::strided_slice(inputs[0], - GetIntArray(begin_idx), - GetIntArray(end_idx), - GetIntArray(strides)) - }; -}) -.set_attr("FListInputNames", [](const NodeAttrs& attrs) { - return std::vector{"data"}; -}) -.set_support_level(4); - // where inline bool WhereShape(const nnvm::NodeAttrs& attrs, std::vector* in_attrs, diff --git a/nnvm/src/top/vision/nms.cc b/nnvm/src/top/vision/nms.cc index 71b4c3ff7860..315e06e22ee5 100644 --- a/nnvm/src/top/vision/nms.cc +++ b/nnvm/src/top/vision/nms.cc @@ -11,6 +11,7 @@ #include #include #include "../op_common.h" +#include "../elemwise_op_common.h" namespace nnvm { namespace top { @@ -18,64 +19,12 @@ using compiler::FTVMCompute; using tvm::Tensor; using tvm::Array; -DMLC_REGISTER_PARAMETER(GetValidCountsParam); - -bool GetValidCountsShape(const NodeAttrs& attrs, - std::vector *in_attrs, - std::vector *out_attrs) { - TShape dshape = in_attrs->at(0); - TShape vshape = TShape({dshape[0]}); - CHECK_EQ(dshape.ndim(), 3U) << "Input data should be 3-D."; - out_attrs->clear(); - NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, vshape); - NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 1, dshape); - return true; -} - -inline bool GetValidCountsInferType(const NodeAttrs &attrs, - std::vector *in_attrs, - std::vector *out_attrs) { - DTYPE_ASSIGN(out_attrs->at(0), static_cast(kInt32)); - DTYPE_ASSIGN(out_attrs->at(1), in_attrs->at(0)) - return true; -} - -inline bool GetValidCountsInferLayout(const NodeAttrs& attrs, - std::vector *ilayouts, - const std::vector *last_ilayouts, - std::vector *olayouts) { - static const Layout kNCHW("NCHW"); - CHECK_EQ(ilayouts->size(), 1U); - CHECK_EQ(olayouts->size(), 2U); - NNVM_ASSIGN_LAYOUT(*ilayouts, 0, kNCHW); - return true; -} - -NNVM_REGISTER_OP(get_valid_counts) -.describe(R"doc("Get valid count of bounding boxes given -a score threshold. Also moves valid boxes to the top of -input data." -)doc" NNVM_ADD_FILELINE) -.set_num_inputs(1) -.set_num_outputs(2) -.set_attr_parser(ParamParser) -.set_attr("FGetAttrDict", - ParamGetAttrDict) -.add_arguments(GetValidCountsParam::__FIELDS__()) -.add_argument("data", "Tensor", "Input data.") -.set_attr("FListInputNames", [](const NodeAttrs& attrs) { - return std::vector{"data"}; -}) -.set_attr("FInferShape", GetValidCountsShape) -.set_attr("FInferType", GetValidCountsInferType) -.set_attr("FCorrectLayout", GetValidCountsInferLayout) -.set_support_level(4); - DMLC_REGISTER_PARAMETER(NMSParam); bool NMSShape(const NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { + const NMSParam& param = nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 2U) << "Inputs: [data, valid_count]"; TShape dshape = in_attrs->at(0); TShape vshape = in_attrs->at(1); @@ -85,7 +34,14 @@ bool NMSShape(const NodeAttrs& attrs, "(batch_size, num_anchors, 6)."; CHECK_EQ(dshape[0], vshape[0]) << "batch_size mismatch."; out_attrs->clear(); - NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, dshape); + if (param.return_indices) { + TShape oshape = TShape(2); + oshape[0] = dshape[0]; + oshape[1] = dshape[1]; + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, oshape); + } else { + NNVM_ASSIGN_OUTPUT_SHAPE(attrs, *out_attrs, 0, dshape); + } return true; } @@ -108,7 +64,7 @@ inline bool NMSInferLayout(const NodeAttrs& attrs, return true; } -NNVM_REGISTER_OP(nms) +NNVM_REGISTER_OP(non_max_suppression) .describe(R"doc("Non-maximum suppression." )doc" NNVM_ADD_FILELINE) .set_num_inputs(2) diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index f3c297e5f6e9..991b9c2b15be 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -3,7 +3,6 @@ import tvm from tvm.contrib import graph_runtime import topi -import topi.testing import nnvm.symbol as sym import nnvm.compiler from nnvm.testing.config import ctx_list @@ -528,15 +527,14 @@ def verify_multibox_prior(dshape, sizes=(1,), ratios=(1,), steps=(-1, -1), if clip: np_out = np.clip(np_out, 0, 1) - for target, ctx in ctx_list(): - if target == "cuda": - continue - graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input("data", np.random.uniform(size=dshape).astype(dtype)) - m.run() - out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) - tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) + target = "llvm" + ctx = tvm.cpu() + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input("data", np.random.uniform(size=dshape).astype(dtype)) + m.run() + out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) def test_multibox_prior(): verify_multibox_prior((1, 3, 50, 50)) @@ -552,7 +550,7 @@ def test_multibox_transform_loc(): anchors = sym.Variable("anchors") transform_loc_data, valid_count = sym.multibox_transform_loc(cls_prob=cls_prob, loc_pred=loc_preds, anchor=anchors) - out = sym.nms(data=transform_loc_data, valid_count=valid_count) + out = sym.non_max_suppression(data=transform_loc_data, valid_count=valid_count, return_indices=False) # Manually create test case np_cls_prob = np.array([[[0.2, 0.5, 0.3], [0.25, 0.3, 0.45], [0.7, 0.1, 0.2]]]) @@ -563,70 +561,27 @@ def test_multibox_transform_loc(): [0, 0.44999999, 1, 1, 1, 1], [0, 0.30000001, 0, 0, 0.22903419, 0.20435292]]]) + target = "llvm" dtype = "float32" - for target, ctx in ctx_list(): - if target == "cuda": - continue - graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes), - "loc_preds": (batch_size, num_anchors * 4), - "anchors": (1, num_anchors, 4)}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)}) - m.run() - out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype)) - tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5) - -def verify_get_valid_counts(dshape, score_threshold): - dtype = "float32" - batch_size, num_anchor, elem_length = dshape - np_data = np.random.uniform(size=dshape).astype(dtype) - np_out1 = np.zeros(shape=(batch_size,)) - np_out2 = np.zeros(shape=dshape).astype(dtype) - for i in range(batch_size): - np_out1[i] = 0 - inter_idx = 0 - for j in range(num_anchor): - score = np_data[i, j, 1] - if score >= score_threshold: - for k in range(elem_length): - np_out2[i, inter_idx, k] = np_data[i, j, k] - np_out1[i] += 1 - inter_idx += 1 - if j >= np_out1[i]: - for k in range(elem_length): - np_out2[i, j, k] = -1 - - for target, ctx in ctx_list(): - if target == "cuda": - continue - data = sym.Variable("data", dtype=dtype) - valid_counts, inter_data = sym.get_valid_counts(data, score_threshold=score_threshold) - out = sym.Group([valid_counts, inter_data]) - graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input("data", np_data) - m.run() - out1 = m.get_output(0, tvm.nd.empty(np_out1.shape, "int32")) - out2 = m.get_output(1, tvm.nd.empty(dshape, dtype)) - tvm.testing.assert_allclose(out1.asnumpy(), np_out1, rtol=1e-3) - tvm.testing.assert_allclose(out2.asnumpy(), np_out2, rtol=1e-3) - - -def test_get_valid_counts(): - verify_get_valid_counts((1, 2500, 6), 0) - verify_get_valid_counts((1, 2500, 6), -1) - verify_get_valid_counts((3, 1000, 6), 0.55) - verify_get_valid_counts((16, 500, 6), 0.95) + ctx = tvm.cpu() + graph, lib, _ = nnvm.compiler.build(out, target, {"cls_prob": (batch_size, num_anchors, num_classes), + "loc_preds": (batch_size, num_anchors * 4), + "anchors": (1, num_anchors, 4)}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"cls_prob": np_cls_prob.astype(dtype), "loc_preds": np_loc_preds.astype(dtype), "anchors": np_anchors.astype(dtype)}) + m.run() + out = m.get_output(0, tvm.nd.empty(expected_np_out.shape, dtype)) + tvm.testing.assert_allclose(out.asnumpy(), expected_np_out, atol=1e-5, rtol=1e-5) -def test_nms(): +def test_non_max_suppression(): dshape = (1, 5, 6) data = sym.Variable("data") valid_count = sym.Variable("valid_count", dtype="int32") iou_threshold = 0.7 force_suppress = True topk = 2 - out = sym.nms(data=data, valid_count=valid_count, iou_threshold=iou_threshold, - force_suppress=force_suppress, topk=topk) + out = sym.non_max_suppression(data=data, valid_count=valid_count, return_indices=False, + iou_threshold=iou_threshold, force_suppress=force_suppress, topk=topk) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], @@ -636,16 +591,15 @@ def test_nms(): [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) - for target, ctx in ctx_list(): - if target == "cuda": - continue - graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)}, - dtype={"data": "float32", "valid_count": "int32"}) - m = graph_runtime.create(graph, lib, ctx) - m.set_input(**{"data": np_data, "valid_count": np_valid_count}) - m.run() - out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32")) - tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5) + target = "llvm" + ctx = tvm.cpu() + graph, lib, _ = nnvm.compiler.build(out, target, {"data": dshape, "valid_count": (dshape[0],)}, + dtype={"data": "float32", "valid_count": "int32"}) + m = graph_runtime.create(graph, lib, ctx) + m.set_input(**{"data": np_data, "valid_count": np_valid_count}) + m.run() + out = m.get_output(0, tvm.nd.empty(np_result.shape, "float32")) + tvm.testing.assert_allclose(out.asnumpy(), np_result, atol=1e-5, rtol=1e-5) def np_slice_like(np_data, np_shape_like, axis=[]): begin_idx = [0 for _ in np_data.shape] @@ -662,7 +616,7 @@ def np_slice_like(np_data, np_shape_like, axis=[]): slice_idx = [] for b, e in zip(begin_idx, end_idx): slice_idx.append(slice(b, e)) - np_result = np_data[tuple(slice_idx)] + np_result = np_data[slice_idx] return np_result def verify_slice_like(np_data, np_shape_like, axis=[]): @@ -702,27 +656,6 @@ def test_slice_like(): axis = (2, 3) verify_slice_like(np_data, np_shape_like, axis) -def verify_slice_axis(dshape, axis, begin, end): - data = sym.Variable("data") - net = sym.slice_axis(data, axis=axis, begin=begin, end=end) - np_data = np.random.uniform(size=dshape) - np_out = topi.testing.slice_axis_python(np_data, axis, begin, end) - - dtype = "float32" - for target, ctx in ctx_list(): - graph, lib, _ = nnvm.compiler.build(net, target, {"data": dshape}, dtype=dtype) - m = graph_runtime.create(graph, lib, ctx) - m.set_input("data", np_data) - m.run() - out = m.get_output(0, tvm.nd.empty(np_out.shape, dtype)) - tvm.testing.assert_allclose(out.asnumpy(), np_out, atol=1e-5, rtol=1e-5) - -def test_slice_axis(): - verify_slice_axis((1, 2, 3, 4), 3, 0, 2) - verify_slice_axis((100, 50), -1, 1, -1) - verify_slice_axis((20,), -1, -9, -3) - verify_slice_axis((20, 30, 40), 1, 5, 0) - def verify_where(condition, x, y): dtype = "float32" if len(condition.shape) == 1: @@ -777,7 +710,6 @@ def test_argmax(): np.testing.assert_allclose(out.asnumpy(), np_argmax, atol=1e-5, rtol=1e-5) if __name__ == "__main__": - test_get_valid_counts() test_reshape() test_broadcast() test_reduce() @@ -794,10 +726,8 @@ def test_argmax(): test_flip() test_multibox_prior() test_multibox_transform_loc() - test_get_valid_counts() - test_nms() + test_non_max_suppression() test_slice_like() - test_slice_axis() test_where() test_argmax() print(nnvm.compiler.engine.dump()) diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index 67f1ad5ff27d..8992799528e7 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -300,6 +300,7 @@ def test_forward_l2_normalize(): mx_sym = mx.sym.L2Normalization(data, mode="channel") verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5)) + if __name__ == '__main__': test_forward_mlp() test_forward_vgg() diff --git a/topi/python/topi/cuda/nms.py b/topi/python/topi/cuda/nms.py index 26dc5704d9d9..5f79de25e835 100644 --- a/topi/python/topi/cuda/nms.py +++ b/topi/python/topi/cuda/nms.py @@ -4,7 +4,7 @@ import tvm from tvm import api -from topi.vision import nms +from topi.vision import non_max_suppression from ..util import get_const_tuple def sort_ir(data, index, output): @@ -181,7 +181,7 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): return body -@nms.register(["cuda", "gpu"]) +@non_max_suppression.register(["cuda", "gpu"]) def nms_gpu(data, valid_count, return_indices, iou_threshold=0.5, force_suppress=False, topk=-1, id_index=0, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. diff --git a/topi/python/topi/cuda/ssd/multibox.py b/topi/python/topi/cuda/ssd/multibox.py index 746be092ebbe..11062824deb0 100644 --- a/topi/python/topi/cuda/ssd/multibox.py +++ b/topi/python/topi/cuda/ssd/multibox.py @@ -11,7 +11,7 @@ from topi.vision.ssd import multibox_prior from topi.vision.ssd import multibox_detection from topi.vision.ssd import multibox_transform_loc -from ..nms import nms +from ..nms import non_max_suppression def multibox_prior_ir(data, out, sizes, ratios, steps, offsets): @@ -437,6 +437,6 @@ def multibox_detection_gpu(cls_prob, loc_pred, anchor, clip=True, threshold=0.01 """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = nms( + out = non_max_suppression( inter_out[0], inter_out[1], nms_threshold, force_suppress, nms_topk) return out diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 9c8fae23fa93..60715c91f0dc 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -219,8 +219,8 @@ def hybrid_nms(data, sorted_index, valid_count, @tvm.target.generic_func -def nms(data, valid_count, return_indices, iou_threshold=0.5, force_suppress=False, - topk=-1, id_index=0, invalid_to_bottom=False): +def non_max_suppression(data, valid_count, return_indices, iou_threshold=0.5, + force_suppress=False, topk=-1, id_index=0, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. Parameters diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index c663d3873587..3c8cc6c07417 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -8,7 +8,7 @@ import topi -from ..nms import nms +from ..nms import non_max_suppression @hybrid.script def hybrid_multibox_prior(data, sizes, ratios, steps, offsets): @@ -292,5 +292,6 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = nms(inter_out[0], inter_out[1], False, nms_threshold, force_suppress, nms_topk) + out = non_max_suppression(inter_out[0], inter_out[1], False, nms_threshold, + force_suppress, nms_topk) return out diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 1f50199fcfba..489f2abb92fd 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -8,7 +8,7 @@ from tvm.contrib.pickle_memoize import memoize from topi.util import get_const_tuple -from topi.vision import ssd, nms, get_valid_counts +from topi.vision import ssd, non_max_suppression, get_valid_counts def verify_get_valid_counts(dshape, score_threshold): @@ -61,7 +61,7 @@ def test_get_valid_counts(): verify_get_valid_counts((16, 500, 6), 0.95) -def test_nms(): +def test_non_max_suppression(): dshape = (1, 5, 6) indices_dshape = (1, 5) data = tvm.placeholder(dshape, name="data") @@ -87,11 +87,11 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): if device == 'llvm': - out = nms(data, valid_count, False, nms_threshold, force_suppress, nms_topk) - indices_out = nms(data, valid_count, True, nms_threshold, force_suppress, nms_topk) + out = non_max_suppression(data, valid_count, False, nms_threshold, force_suppress, nms_topk) + indices_out = non_max_suppression(data, valid_count, True, nms_threshold, force_suppress, nms_topk) else: - out = topi.cuda.nms(data, valid_count, False, nms_threshold, force_suppress, nms_topk) - indices_out = topi.cuda.nms(data, valid_count, True, nms_threshold, force_suppress, nms_topk) + out = topi.cuda.non_max_suppression(data, valid_count, False, nms_threshold, force_suppress, nms_topk) + indices_out = topi.cuda.non_max_suppression(data, valid_count, True, nms_threshold, force_suppress, nms_topk) s = topi.generic.schedule_nms(out) indices_s = topi.generic.schedule_nms(indices_out) @@ -336,7 +336,7 @@ def test_proposal(): if __name__ == "__main__": test_get_valid_counts() - test_nms() + test_non_max_suppression() test_multibox_prior() test_multibox_detection() test_roi_align() From 6b1fd7a5e95bb69495ababaaaf2ff23f698fd07e Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 20 Feb 2019 17:31:00 +0800 Subject: [PATCH 30/43] Refactor relay nms --- include/tvm/relay/attrs/vision.h | 9 +++-- nnvm/include/nnvm/top/nn.h | 10 ++++-- nnvm/python/nnvm/top/vision.py | 5 ++- python/tvm/relay/frontend/mxnet.py | 20 ++++++----- python/tvm/relay/op/_transform.py | 2 +- python/tvm/relay/op/transform.py | 2 +- python/tvm/relay/op/vision/_vision.py | 14 ++++---- python/tvm/relay/op/vision/nms.py | 25 ++++++++------ src/relay/op/tensor/transform.cc | 8 ++--- src/relay/op/vision/nms.cc | 22 ++++++++---- tests/python/relay/test_op_level10.py | 24 +++++++++++++ tests/python/relay/test_op_level4.py | 24 ------------- tests/python/relay/test_op_level5.py | 34 +++++++++++++------ .../{nnvm => relay}/deploy_ssd_gluoncv.py | 28 ++++++--------- 14 files changed, 132 insertions(+), 95 deletions(-) rename tutorials/{nnvm => relay}/deploy_ssd_gluoncv.py (76%) diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 345a67655552..12523bd0c8ea 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -70,13 +70,16 @@ struct GetValidCountsAttrs : public tvm::AttrsNode{ /*! \brief Attributes used in non_maximum_suppression operator */ struct NMSAttrs : public tvm::AttrsNode{ + bool return_indices; double iou_threshold; bool force_suppress; int topk; int id_index; - bool do_rearrange; + bool invalid_to_bottom; TVM_DECLARE_ATTRS(NMSAttrs, "relay.attrs.NMSAttrs") { + TVM_ATTR_FIELD(return_indices) + .describe("Whether to return box indices in input data."); TVM_ATTR_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); TVM_ATTR_FIELD(force_suppress).set_default(false) @@ -85,8 +88,8 @@ struct NMSAttrs : public tvm::AttrsNode{ .describe("Keep maximum top k detections before nms, -1 for no limit."); TVM_ATTR_FIELD(id_index).set_default(0) .describe("Axis index of id."); - TVM_ATTR_FIELD(do_rearrange).set_default(false) - .describe("Whether to move all valid bounding boxes to the top."); + TVM_ATTR_FIELD(invalid_to_bottom).set_default(false) + .describe("Whether to move all invalid bounding boxes to the bottom."); } }; diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 69d81a98cb4c..6fd283aeb14e 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -448,15 +448,21 @@ struct NMSParam : public dmlc::Parameter { float iou_threshold; bool force_suppress; int topk; + int id_index; + bool invalid_to_bottom; DMLC_DECLARE_PARAMETER(NMSParam) { DMLC_DECLARE_FIELD(return_indices) .describe("Whether to return box indices in input data."); DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) - .describe("Suppress all detections regardless of class_id."); + .describe("Suppress all detections regardless of class_id."); DMLC_DECLARE_FIELD(topk).set_default(-1) - .describe("Keep maximum top k detections before nms, -1 for no limit."); + .describe("Keep maximum top k detections before nms, -1 for no limit."); + DMLC_DECLARE_FIELD(id_index).set_default(0) + .describe("Axis index of id."); + DMLC_DECLARE_FIELD(invalid_to_bottom).set_default(false) + .describe("Whether to move all invalid bounding boxes to the bottom."); } }; diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index c369fee5f6e7..1beb9be2564b 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -74,8 +74,11 @@ def compute_nms(attrs, inputs, _): iou_threshold = attrs.get_float('iou_threshold') force_suppress = attrs.get_bool('force_suppress') topk = attrs.get_int('topk') + id_index = attrs.get_int('id_index') + invalid_to_bottom = attrs.get_bool('invalid_to_bottom') return topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, - iou_threshold, force_suppress, topk) + iou_threshold, force_suppress, topk, + id_index, invalid_to_bottom) reg.register_pattern("non_max_suppression", OpPattern.OPAQUE) diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 727fa5828aef..f00a7f551605 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -324,13 +324,14 @@ def _mx_multibox_detection(inputs, attrs): 0.2, 0.2)) new_attrs1 = {} - new_attrs1["overlap_threshold"] = attrs.get_float("nms_threshold", 0.5) + new_attrs1["return_indices"] = False + new_attrs1["iou_threshold"] = attrs.get_float("nms_threshold", 0.5) new_attrs1["force_suppress"] = attrs.get_bool("force_suppress", False) new_attrs1["topk"] = attrs.get_int("nms_topk", -1) ret = _op.vision.multibox_transform_loc(inputs[0], inputs[1], inputs[2], **new_attrs0) - return _op.vision.nms(ret[0], ret[1], **new_attrs1) + return _op.vision.non_max_suppression(ret[0], ret[1], **new_attrs1) def _mx_batch_dot(inputs, attrs): @@ -382,7 +383,7 @@ def _mx_proposal(inputs, attrs): def _mx_box_nms(inputs, attrs): force_suppress = attrs.get_bool("force_suppress", False) - overlap_thresh = attrs.get_float('overlap_thresh', 0.5) + iou_thresh = attrs.get_float('overlap_thresh', 0.5) topk = attrs.get_int('topk', -1) valid_thresh = attrs.get_float('valid_thresh', 0) coord_start = attrs.get_int('coord_start', 2) @@ -402,11 +403,14 @@ def _mx_box_nms(inputs, attrs): raise RuntimeError('out_format %s is not supported.' % out_format) ret = _op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) - nms_out = _op.vision.nms(ret[1], ret[0], - iou_threshold=overlap_thresh, - force_suppress=force_suppress, - topk=topk, id_index=id_index, - do_rearrange=True) + nms_out = _op.vision.non_max_suppression(ret[1], + ret[0], + return_indices=False, + iou_threshold=iou_thresh, + force_suppress=force_suppress, + topk=topk, + id_index=id_index, + invalid_to_bottom=True) return nms_out diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 83b5ce5a854f..315b448cdc6e 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -21,7 +21,7 @@ _reg.register_schedule("arange", schedule_injective) _reg.register_schedule("cast", schedule_injective) _reg.register_schedule("strided_slice", schedule_injective) -_reg.register_schedule("slice_axis", schedule_injective) +_reg.register_schedule("_contrib_slice_axis", schedule_injective) _reg.register_schedule("slice_like", schedule_injective) _reg.register_schedule("split", schedule_injective) _reg.register_schedule("take", schedule_injective) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index f19aa19772b4..9dc42861a4cf 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -494,7 +494,7 @@ def slice_axis(data, axis, begin, end=None): The computed result. """ end = end or 0 - return _make.slice_axis(data, axis, begin, end) + return _make._contrib_slice_axis(data, axis, begin, end) def slice_like(data, shape_like, axes=None): diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 57ca6f2da2ce..45569a9814af 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -72,25 +72,27 @@ def compute_get_valid_counts(attrs, inputs, _, target): # non-maximum suppression -@reg.register_schedule("vision.nms") +@reg.register_schedule("vision.non_max_suppression") def schedule_nms(_, outs, target): """Schedule definition of nms""" with target: return topi.generic.schedule_nms(outs) -@reg.register_compute("vision.nms") +@reg.register_compute("vision.non_max_suppression") def compute_nms(attrs, inputs, _, target): """Compute definition of nms""" + return_indices = bool(get_const_int(attrs.return_indices)) iou_threshold = get_const_float(attrs.iou_threshold) force_suppress = bool(get_const_int(attrs.force_suppress)) topk = get_const_int(attrs.topk) id_index = get_const_int(attrs.id_index) - do_rearrange = bool(get_const_int(attrs.do_rearrange)) + invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom)) return [ - topi.vision.nms(inputs[0], inputs[1], iou_threshold, - force_suppress, topk, id_index, do_rearrange) + topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, + iou_threshold, force_suppress, topk, + id_index, invalid_to_bottom) ] -reg.register_pattern("vision.nms", OpPattern.OPAQUE) +reg.register_pattern("vision.non_max_suppression", OpPattern.OPAQUE) diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index 157008ec2174..5dd2c5a74555 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -27,13 +27,14 @@ def get_valid_counts(data, return TupleWrapper(_make.get_valid_counts(data, score_threshold), 2) -def nms(data, - valid_count, - iou_threshold=0.5, - force_suppress=False, - topk=-1, - id_index=0, - do_rearrange=False): +def non_max_suppression(data, + valid_count, + return_indices, + iou_threshold=0.5, + force_suppress=False, + topk=-1, + id_index=0, + invalid_to_bottom=False): """Non-maximum suppression operator for object detection. Parameters @@ -46,6 +47,9 @@ def nms(data, valid_count : relay.Expr 1-D tensor for valid number of boxes. + return_indices : bool + Whether to return box indices in input data. + iou_threshold : float, optional Non-maximum suppression threshold. @@ -58,7 +62,7 @@ def nms(data, id_index : optional, int index of the class categories, -1 to disable. - do_rearrange : optional, boolean + invalid_to_bottom : optional, boolean Whether to move all valid bounding boxes to the top. Returns @@ -66,5 +70,6 @@ def nms(data, out : relay.Expr 3-D tensor with shape [batch_size, num_anchors, 6]. """ - return _make.nms(data, valid_count, iou_threshold, - force_suppress, topk, id_index, do_rearrange) + return _make.non_max_suppression(data, valid_count, return_indices, + iou_threshold, force_suppress, topk, + id_index, invalid_to_bottom) diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index 73b74a60d756..c0f279a6b72c 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1401,11 +1401,11 @@ Expr MakeSliceAxis(Expr data, attrs->axis = axis; attrs->begin = begin; attrs->end = end; - static const Op& op = Op::Get("slice_axis"); + static const Op& op = Op::Get("_contrib_slice_axis"); return CallNode::make(op, {data}, Attrs(attrs), {}); } -TVM_REGISTER_API("relay.op._make.slice_axis") +TVM_REGISTER_API("relay.op._make._contrib_slice_axis") .set_body([](const TVMArgs& args, TVMRetValue* rv) { runtime::detail::unpack_call(MakeSliceAxis, args, rv); }); @@ -1446,14 +1446,14 @@ Array SliceAxisCompute(const Attrs& attrs, }; } -RELAY_REGISTER_OP("slice_axis") +RELAY_REGISTER_OP("_contrib_slice_axis") .describe(R"doc(Slices along a given axis. Returns an array slice along a given axis starting from the begin index to the end index. )doc" TVM_ADD_FILELINE) .set_num_inputs(1) .add_argument("data", "Tensor", "Input data.") -.set_support_level(4) +.set_support_level(10) .add_type_rel("SliceAxis", SliceAxisRel) .set_attr("FTVMCompute", SliceAxisCompute) .set_attr("TOpPattern", kInjective); diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index e8a84734b3d7..229983158262 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -65,42 +65,50 @@ bool NMSRel(const Array& types, CHECK_EQ(types.size(), 3); const auto* data = types[0].as(); const auto* valid_count = types[1].as(); + const NMSAttrs* param = attrs.as(); const auto& dshape = data->shape; const auto& vshape = valid_count->shape; CHECK_EQ(dshape.size(), 3) << "Input data should be 3-D."; CHECK_EQ(vshape.size(), 1) << "Input valid count should be 1-D."; // assign output type - reporter->Assign(types[2], TensorTypeNode::make(dshape, data->dtype)); + if (param->return_indices) { + std::vector oshape({dshape[0], dshape[1]}); + reporter->Assign(types[2], TensorTypeNode::make(oshape, Int(32))); + } else { + reporter->Assign(types[2], TensorTypeNode::make(dshape, data->dtype)); + } return true; } Expr MakeNMS(Expr data, Expr valid_count, + bool return_indices, double iou_threshold, bool force_suppress, int topk, int id_index, - bool do_rearrange) { + bool invalid_to_bottom) { auto attrs = make_node(); + attrs->return_indices = return_indices; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; attrs->topk = topk; attrs->id_index = id_index; - attrs->do_rearrange = do_rearrange; - static const Op& op = Op::Get("vision.nms"); + attrs->invalid_to_bottom = invalid_to_bottom; + static const Op& op = Op::Get("vision.non_max_suppression"); return CallNode::make(op, {data, valid_count}, Attrs(attrs), {}); } -TVM_REGISTER_API("relay.op.vision._make.nms") +TVM_REGISTER_API("relay.op.vision._make.non_max_suppression") .set_body([](const TVMArgs& args, TVMRetValue* rv) { - runtime::detail::unpack_call(MakeNMS, args, rv); + runtime::detail::unpack_call(MakeNMS, args, rv); }); -RELAY_REGISTER_OP("vision.nms") +RELAY_REGISTER_OP("vision.non_max_suppression") .describe(R"doc(Non-maximum suppression. )doc" TVM_ADD_FILELINE) .set_num_inputs(2) diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index 34285d2b18dd..e3c331c6a1d0 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -2,6 +2,7 @@ """ import numpy as np import tvm +import topi.testing from tvm import relay from tvm.relay.testing import ctx_list import topi @@ -145,6 +146,7 @@ def verify_reverse_reshape(shape, newshape, oshape): verify_reverse_reshape((2, 3, 4), (-1, 0), (6, 4)) verify_reverse_reshape((2, 3, 4), (0, -3), (2, 12)) +<<<<<<< HEAD def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32"): x = relay.var("x", relay.TensorType(x_shape, dtype)) y = relay.var("y", relay.TensorType(y_shape, dtype)) @@ -176,6 +178,27 @@ def test_batch_matmul(): verify_batch_matmul((5, 16, 32), (5, 20, 32), (5, 16, 20)) verify_batch_matmul((30, 16, 32), (30, 20, 32), (30, 16, 20)) +def test_contrib_slice_axis(): + def verify(dshape, axis, begin, end): + x = relay.var("x", relay.TensorType(dshape, "float32")) + z = relay.slice_axis(x, axis=axis, begin=begin, end=end) + func = relay.Function([x], z) + func = relay.ir_pass.infer_type(func) + text = func.astext() + assert "begin" in text + assert "end" in text + x_data = np.random.uniform(size=dshape).astype("float32") + ref_res = topi.testing.slice_axis_python( + x_data, axis, begin, end) + for target, ctx in ctx_list(): + intrp = relay.create_executor("graph", ctx=ctx, target=target) + op_res = intrp.evaluate(func)(x_data) + tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) + + verify((1, 2, 3, 4), 3, 0, 2) + verify((100, 50), -1, 1, -1) + verify((20,), -1, -9, -3) + verify((20, 30, 40), 1, 5, 0) if __name__ == "__main__": test_collapse_sum_like() @@ -183,3 +206,4 @@ def test_batch_matmul(): test_slice_like() test_reverse_reshape() test_batch_matmul() + test_contrib_slice_axis() diff --git a/tests/python/relay/test_op_level4.py b/tests/python/relay/test_op_level4.py index 4ba7e8cd1e72..ae7fe320940a 100644 --- a/tests/python/relay/test_op_level4.py +++ b/tests/python/relay/test_op_level4.py @@ -196,29 +196,6 @@ def _wrapper(data, axis=None, keepdims=False): verify_reduce(func, (128, 24, 128), (0, 2), True, False, (1, 24, 1)) -def test_slice_axis(): - def verify(dshape, axis, begin, end): - x = relay.var("x", relay.TensorType(dshape, "float32")) - z = relay.slice_axis(x, axis=axis, begin=begin, end=end) - func = relay.Function([x], z) - func = relay.ir_pass.infer_type(func) - text = func.astext() - assert "begin" in text - assert "end" in text - x_data = np.random.uniform(size=dshape).astype("float32") - ref_res = topi.testing.slice_axis_python( - x_data, axis, begin, end) - for target, ctx in ctx_list(): - intrp = relay.create_executor("graph", ctx=ctx, target=target) - op_res = intrp.evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - - verify((1, 2, 3, 4), 3, 0, 2) - verify((100, 50), -1, 1, -1) - verify((20,), -1, -9, -3) - verify((20, 30, 40), 1, 5, 0) - - def test_strided_slice(): def verify(dshape, begin, end, strides, output, test_ref=True): x = relay.var("x", relay.TensorType(dshape, "float32")) @@ -260,4 +237,3 @@ def verify(dshape, begin, end, strides, output, test_ref=True): test_binary_int_broadcast() test_where() test_reduce_functions() - test_slice_axis() diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 1e7fe76c0a9e..09a5d37d51f8 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -174,30 +174,40 @@ def verify_get_valid_counts(dshape, score_threshold): verify_get_valid_counts((16, 500, 6), 0.95) -def test_nms(): - def verify_nms(x0_data, x1_data, dshape, ref_res, - overlap_threshold=0.5, force_suppress=False, topk=-1, +def test_non_max_suppression(): + def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, + iou_threshold=0.5, force_suppress=False, topk=-1, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int")) - z = relay.vision.nms(x0, x1, overlap_threshold, force_suppress, topk) + z = relay.vision.non_max_suppression(x0, x1, False, iou_threshold, force_suppress, topk) + z_indices = relay.vision.non_max_suppression(x0, x1, True, iou_threshold, force_suppress, topk) assert "iou_threshold" in z.astext() + assert "iou_threshold" in z_indices.astext() zz = relay.ir_pass.infer_type(z) + zz_indices = relay.ir_pass.infer_type(z_indices) assert zz.checked_type == relay.ty.TensorType(dshape, "float32") + assert zz_indices.checked_type == relay.ty.TensorType((dshape[0], dshape[1]), "int32") if check_type_only: return func = relay.Function([x0, x1], z) func = relay.ir_pass.infer_type(func) + func_indices = relay.Function([x0, x1], z_indices) + func_indices = relay.ir_pass.infer_type(func_indices) ctx_list = [("llvm", tvm.cpu(0))] for target, ctx in ctx_list: intrp1 = relay.create_executor("graph", ctx=ctx, target=target) op_res1 = intrp1.evaluate(func)(x0_data, x1_data) + op_indices_res1 = intrp1.evaluate(func_indices)(x0_data, x1_data) tvm.testing.assert_allclose(op_res1.asnumpy(), ref_res, rtol=1e-5) + tvm.testing.assert_allclose(op_indices_res1.asnumpy(), ref_indices_res, rtol=1e-5) intrp2 = relay.create_executor("debug", ctx=ctx, target=target) op_res2 = intrp2.evaluate(func)(x0_data, x1_data) + op_indices_res2 = intrp2.evaluate(func_indices)(x0_data, x1_data) tvm.testing.assert_allclose(op_res2.asnumpy(), ref_res, rtol=1e-5) + tvm.testing.assert_allclose(op_indices_res2.asnumpy(), ref_indices_res, rtol=1e-5) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], @@ -206,22 +216,26 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) + np_indices_result = np.array([[3, 0, -1, -1, -1]]) num_anchors = 5 dshape = (tvm.var("n"), num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, + verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, force_suppress=True, topk=2, check_type_only=True) dshape = (1, num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, + verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, force_suppress=True, topk=2, check_type_only=False) np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], [-1, -1, -1, -1, -1, -1]]]) + np_indices_result = np.array([[3, 0, 1, -1, -1]]) dshape = (tvm.var("n"), num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, check_type_only=True) + verify_nms(np_data, np_valid_count, dshape, np_result, + np_indices_result, check_type_only=True) dshape = (1, num_anchors, 6) - verify_nms(np_data, np_valid_count, dshape, np_result, topk=3) + verify_nms(np_data, np_valid_count, dshape, np_result, + np_indices_result, topk=3) def test_multibox_transform_loc(): @@ -263,7 +277,7 @@ def test_default_value(): assert ret.checked_type == ref_type - nms = relay.vision.nms(mtl[0], mtl[1]) + nms = relay.vision.non_max_suppression(mtl[0], mtl[1], False) func = relay.Function([cls_prob, loc_pred, anchors], nms) func = relay.ir_pass.infer_type(func) ctx_list = [("llvm", tvm.cpu(0))] @@ -449,8 +463,8 @@ def verify_yolo_reorg(shape, stride): test_multibox_prior() test_multibox_transform_loc() test_get_valid_counts() - test_nms() test_roi_align() test_proposal() test_yolo_reorg_infer_shape() test_yolo_reorg() + test_non_max_suppression() diff --git a/tutorials/nnvm/deploy_ssd_gluoncv.py b/tutorials/relay/deploy_ssd_gluoncv.py similarity index 76% rename from tutorials/nnvm/deploy_ssd_gluoncv.py rename to tutorials/relay/deploy_ssd_gluoncv.py index d83d1f86b75e..6a5d63b9f8cf 100644 --- a/tutorials/nnvm/deploy_ssd_gluoncv.py +++ b/tutorials/relay/deploy_ssd_gluoncv.py @@ -4,7 +4,7 @@ **Author**: `Yao Wang `_ This article is an introductory tutorial to deploy SSD models with TVM. -We will use GluonCV pre-trained SSD model and convert it to NNVM graph. +We will use GluonCV pre-trained SSD model and convert it to Relay IR """ import tvm @@ -34,8 +34,8 @@ # # To get best inference performance on CPU, change # target argument according to your device and -# follow the :ref:`tune_nnvm_x86` to tune x86 CPU and -# :ref:`tune_nnvm_arm` for arm cpu. +# follow the :ref:`tune_relay_x86` to tune x86 CPU and +# :ref:`tune_relay_arm` for arm cpu. # # SSD with VGG as body network is not supported yet since # x86 conv2d schedule doesn't support dilation. @@ -54,7 +54,6 @@ dshape = (1, 3, 512, 512) dtype = "float32" target_list = ctx_list() -frontend_list = ["nnvm", "relay"] ###################################################################### # Download and pre-process demo image @@ -65,20 +64,14 @@ x, img = data.transforms.presets.ssd.load_test(im_fname, short=512) ###################################################################### -# Convert and compile model with NNVM or Relay for CPU. +# Convert and compile model for CPU. block = model_zoo.get_model(model_name, pretrained=True) -def compile(frontend, target): - if frontend == "relay": - net, params = relay.frontend.from_mxnet(block, {"data": dshape}) - with relay.build_config(opt_level=3): - graph, lib, params = relay.build(net, target, params=params) - else: - net, params = from_mxnet(block) - with compiler.build_config(opt_level=3): - graph, lib, params = compiler.build( - net, target, {"data": dshape}, params=params) +def compile(target): + net, params = relay.frontend.from_mxnet(block, {"data": dshape}) + with relay.build_config(opt_level=3): + graph, lib, params = relay.build(net, target, params=params) return graph, lib, params ###################################################################### @@ -100,9 +93,8 @@ def run(graph, lib, params, ctx): if target == "cuda": print("GPU not supported yet, skip.") continue - for frontend in frontend_list: - graph, lib, params = compile(frontend, target) - class_IDs, scores, bounding_boxs = run(graph, lib, params, ctx) + graph, lib, params = compile(target) + class_IDs, scores, bounding_boxs = run(graph, lib, params, ctx) ###################################################################### # Display result From 42571cf4b4098b5775d7bcfcbc73338ec6a3c4ff Mon Sep 17 00:00:00 2001 From: Wang Date: Fri, 22 Feb 2019 14:03:17 +0800 Subject: [PATCH 31/43] Add max_output_size arg --- include/tvm/relay/attrs/vision.h | 4 ++++ nnvm/include/nnvm/top/nn.h | 4 ++++ nnvm/python/nnvm/top/vision.py | 5 +++-- python/tvm/relay/op/vision/_vision.py | 5 +++-- python/tvm/relay/op/vision/nms.py | 9 ++++++-- src/relay/op/vision/nms.cc | 4 +++- tests/python/relay/test_op_level5.py | 4 ++-- topi/python/topi/vision/nms.py | 29 +++++++++++++++++++++---- topi/python/topi/vision/ssd/multibox.py | 4 ++-- topi/tests/python/test_topi_vision.py | 8 +++---- 10 files changed, 57 insertions(+), 19 deletions(-) diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 12523bd0c8ea..d5aad412a9dc 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -71,6 +71,7 @@ struct GetValidCountsAttrs : public tvm::AttrsNode{ /*! \brief Attributes used in non_maximum_suppression operator */ struct NMSAttrs : public tvm::AttrsNode{ bool return_indices; + int max_output_size; double iou_threshold; bool force_suppress; int topk; @@ -80,6 +81,9 @@ struct NMSAttrs : public tvm::AttrsNode{ TVM_DECLARE_ATTRS(NMSAttrs, "relay.attrs.NMSAttrs") { TVM_ATTR_FIELD(return_indices) .describe("Whether to return box indices in input data."); + TVM_ATTR_FIELD(max_output_size).set_default(-1) + .describe("Max number of output valid boxes for each instance." + "By default all valid boxes are returned."); TVM_ATTR_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); TVM_ATTR_FIELD(force_suppress).set_default(false) diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 6fd283aeb14e..b4ec5950964b 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -445,6 +445,7 @@ struct MultiBoxTransformLocParam : public dmlc::Parameter { bool return_indices; + int max_output_size; float iou_threshold; bool force_suppress; int topk; @@ -453,6 +454,9 @@ struct NMSParam : public dmlc::Parameter { DMLC_DECLARE_PARAMETER(NMSParam) { DMLC_DECLARE_FIELD(return_indices) .describe("Whether to return box indices in input data."); + DMLC_DECLARE_FIELD(max_output_size).set_default(-1) + .describe("Max number of output valid boxes for each instance." + "By default all valid boxes are returned."); DMLC_DECLARE_FIELD(iou_threshold).set_default(0.5) .describe("Non-maximum suppression threshold."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index 1beb9be2564b..1f166e2a00d7 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -71,6 +71,7 @@ def schedule_nms(_, outs, target): def compute_nms(attrs, inputs, _): """Compute definition of non_max_suppression""" return_indices = attrs.get_bool('return_indices') + max_output_size = attrs.get_int('max_output_size') iou_threshold = attrs.get_float('iou_threshold') force_suppress = attrs.get_bool('force_suppress') topk = attrs.get_int('topk') @@ -78,7 +79,7 @@ def compute_nms(attrs, inputs, _): invalid_to_bottom = attrs.get_bool('invalid_to_bottom') return topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, - iou_threshold, force_suppress, topk, - id_index, invalid_to_bottom) + max_output_size, iou_threshold, force_suppress, + topk, id_index, invalid_to_bottom) reg.register_pattern("non_max_suppression", OpPattern.OPAQUE) diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 45569a9814af..40a47eb7b366 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -83,6 +83,7 @@ def schedule_nms(_, outs, target): def compute_nms(attrs, inputs, _, target): """Compute definition of nms""" return_indices = bool(get_const_int(attrs.return_indices)) + max_output_size = get_const_int(attrs.max_output_size) iou_threshold = get_const_float(attrs.iou_threshold) force_suppress = bool(get_const_int(attrs.force_suppress)) topk = get_const_int(attrs.topk) @@ -90,8 +91,8 @@ def compute_nms(attrs, inputs, _, target): invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom)) return [ topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, - iou_threshold, force_suppress, topk, - id_index, invalid_to_bottom) + max_output_size, iou_threshold, force_suppress, + topk, id_index, invalid_to_bottom) ] diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index 5dd2c5a74555..a8a9f677872f 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -30,6 +30,7 @@ def get_valid_counts(data, def non_max_suppression(data, valid_count, return_indices, + max_output_size=-1, iou_threshold=0.5, force_suppress=False, topk=-1, @@ -47,6 +48,10 @@ def non_max_suppression(data, valid_count : relay.Expr 1-D tensor for valid number of boxes. + max_output_size : int, optional + Max number of output valid boxes for each instance. + By default all valid boxes are returned. + return_indices : bool Whether to return box indices in input data. @@ -71,5 +76,5 @@ def non_max_suppression(data, 3-D tensor with shape [batch_size, num_anchors, 6]. """ return _make.non_max_suppression(data, valid_count, return_indices, - iou_threshold, force_suppress, topk, - id_index, invalid_to_bottom) + max_output_size, iou_threshold, force_suppress, + topk, id_index, invalid_to_bottom) diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index 229983158262..a35394cfc216 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -85,6 +85,7 @@ bool NMSRel(const Array& types, Expr MakeNMS(Expr data, Expr valid_count, bool return_indices, + int max_output_size, double iou_threshold, bool force_suppress, int topk, @@ -92,6 +93,7 @@ Expr MakeNMS(Expr data, bool invalid_to_bottom) { auto attrs = make_node(); attrs->return_indices = return_indices; + attrs->max_output_size = max_output_size; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; attrs->topk = topk; @@ -104,7 +106,7 @@ Expr MakeNMS(Expr data, TVM_REGISTER_API("relay.op.vision._make.non_max_suppression") .set_body([](const TVMArgs& args, TVMRetValue* rv) { - runtime::detail::unpack_call(MakeNMS, args, rv); + runtime::detail::unpack_call(MakeNMS, args, rv); }); diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 09a5d37d51f8..ce7bcd1d9abc 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -180,8 +180,8 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int")) - z = relay.vision.non_max_suppression(x0, x1, False, iou_threshold, force_suppress, topk) - z_indices = relay.vision.non_max_suppression(x0, x1, True, iou_threshold, force_suppress, topk) + z = relay.vision.non_max_suppression(x0, x1, False, -1, iou_threshold, force_suppress, topk) + z_indices = relay.vision.non_max_suppression(x0, x1, True, -1, iou_threshold, force_suppress, topk) assert "iou_threshold" in z.astext() assert "iou_threshold" in z_indices.astext() zz = relay.ir_pass.infer_type(z) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 60715c91f0dc..daac25114663 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -1,4 +1,4 @@ -# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches +# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, undefined-variable, too-many-nested-blocks, too-many-branches, too-many-statements """Non-maximum suppression operator""" import tvm @@ -112,7 +112,7 @@ def get_valid_counts(data, score_threshold=0): @hybrid.script def hybrid_nms(data, sorted_index, valid_count, - iou_threshold, force_suppress, + max_output_size, iou_threshold, force_suppress, topk, id_index): """Hybrid routing for non-maximum suppression. @@ -129,6 +129,10 @@ def hybrid_nms(data, sorted_index, valid_count, valid_count : tvm.Tensor or numpy NDArray 1-D tensor for valid number of boxes. + max_output_size : tvm.const + Max number of output valid boxes for each instance. + By default all valid boxes are returned. + iou_threshold : tvm.const Overlapping(IoU) threshold to suppress object with smaller score. @@ -215,12 +219,24 @@ def hybrid_nms(data, sorted_index, valid_count, for k in range(box_data_length): output[i, j + valid_count[i], k] = -1.0 box_indices[i, j + valid_count[i]] = -1 + # Only return max_output_size valid boxes + num_valid_boxes = 0 + if max_output_size > 0: + for j in range(valid_count[i]): + if output[i, j, 0] >= 0: + if num_valid_boxes == max_output_size: + for k in range(box_data_length): + output[i, j, k] = -1.0 + box_indices[i, j] = -1 + else: + num_valid_boxes += 1 return output, box_indices @tvm.target.generic_func -def non_max_suppression(data, valid_count, return_indices, iou_threshold=0.5, - force_suppress=False, topk=-1, id_index=0, invalid_to_bottom=False): +def non_max_suppression(data, valid_count, return_indices, max_output_size=-1, + iou_threshold=0.5, force_suppress=False, topk=-1, + id_index=0, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. Parameters @@ -236,6 +252,10 @@ def non_max_suppression(data, valid_count, return_indices, iou_threshold=0.5, return_indices : boolean Whether to return box indices in input data. + max_output_size : optional, int + Max number of output valid boxes for each instance. + By default all valid boxes are returned. + iou_threshold : optional, float Non-maximum suppression threshold. @@ -302,6 +322,7 @@ def non_max_suppression(data, valid_count, return_indices, iou_threshold=0.5, out_buffers=sort_tensor_buf, name="nms_sort") out, box_indices = hybrid_nms(data, sort_tensor, valid_count, + tvm.const(max_output_size, dtype="int32"), tvm.const(iou_threshold, dtype="float32"), tvm.const(force_suppress, dtype="bool"), tvm.const(topk, dtype="int32"), diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index 3c8cc6c07417..64a4a94f7f06 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -292,6 +292,6 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = non_max_suppression(inter_out[0], inter_out[1], False, nms_threshold, - force_suppress, nms_topk) + out = non_max_suppression(inter_out[0], inter_out[1], False, -1, + nms_threshold, force_suppress, nms_topk) return out diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 489f2abb92fd..337e5e5e665c 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -87,11 +87,11 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): if device == 'llvm': - out = non_max_suppression(data, valid_count, False, nms_threshold, force_suppress, nms_topk) - indices_out = non_max_suppression(data, valid_count, True, nms_threshold, force_suppress, nms_topk) + out = non_max_suppression(data, valid_count, False, -1, nms_threshold, force_suppress, nms_topk) + indices_out = non_max_suppression(data, valid_count, True, -1, nms_threshold, force_suppress, nms_topk) else: - out = topi.cuda.non_max_suppression(data, valid_count, False, nms_threshold, force_suppress, nms_topk) - indices_out = topi.cuda.non_max_suppression(data, valid_count, True, nms_threshold, force_suppress, nms_topk) + out = topi.cuda.non_max_suppression(data, valid_count, False, -1, nms_threshold, force_suppress, nms_topk) + indices_out = topi.cuda.non_max_suppression(data, valid_count, -1, True, nms_threshold, force_suppress, nms_topk) s = topi.generic.schedule_nms(out) indices_s = topi.generic.schedule_nms(indices_out) From f8fecec5539df37d4dadc6ba0f18cd3bc5d8c280 Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 24 Feb 2019 19:58:58 +0800 Subject: [PATCH 32/43] Make return_indices optional --- include/tvm/relay/attrs/vision.h | 6 +++--- nnvm/include/nnvm/top/nn.h | 6 +++--- nnvm/python/nnvm/top/vision.py | 6 +++--- python/tvm/relay/frontend/mxnet.py | 2 +- python/tvm/relay/op/vision/_vision.py | 6 +++--- python/tvm/relay/op/vision/nms.py | 18 +++++++++--------- src/relay/op/vision/nms.cc | 8 +++++--- tests/python/relay/test_op_level5.py | 6 +++--- topi/python/topi/vision/nms.py | 10 +++++----- topi/python/topi/vision/ssd/multibox.py | 5 +++-- topi/tests/python/test_topi_vision.py | 8 ++++---- 11 files changed, 42 insertions(+), 39 deletions(-) diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index d5aad412a9dc..2733e8554e9b 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -70,17 +70,15 @@ struct GetValidCountsAttrs : public tvm::AttrsNode{ /*! \brief Attributes used in non_maximum_suppression operator */ struct NMSAttrs : public tvm::AttrsNode{ - bool return_indices; int max_output_size; double iou_threshold; bool force_suppress; int topk; int id_index; + bool return_indices; bool invalid_to_bottom; TVM_DECLARE_ATTRS(NMSAttrs, "relay.attrs.NMSAttrs") { - TVM_ATTR_FIELD(return_indices) - .describe("Whether to return box indices in input data."); TVM_ATTR_FIELD(max_output_size).set_default(-1) .describe("Max number of output valid boxes for each instance." "By default all valid boxes are returned."); @@ -92,6 +90,8 @@ struct NMSAttrs : public tvm::AttrsNode{ .describe("Keep maximum top k detections before nms, -1 for no limit."); TVM_ATTR_FIELD(id_index).set_default(0) .describe("Axis index of id."); + TVM_ATTR_FIELD(return_indices).set_default(true) + .describe("Whether to return box indices in input data."); TVM_ATTR_FIELD(invalid_to_bottom).set_default(false) .describe("Whether to move all invalid bounding boxes to the bottom."); } diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index b4ec5950964b..0f75096eb75d 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -445,15 +445,13 @@ struct MultiBoxTransformLocParam : public dmlc::Parameter { bool return_indices; - int max_output_size; float iou_threshold; bool force_suppress; int topk; int id_index; + int max_output_size; bool invalid_to_bottom; DMLC_DECLARE_PARAMETER(NMSParam) { - DMLC_DECLARE_FIELD(return_indices) - .describe("Whether to return box indices in input data."); DMLC_DECLARE_FIELD(max_output_size).set_default(-1) .describe("Max number of output valid boxes for each instance." "By default all valid boxes are returned."); @@ -465,6 +463,8 @@ struct NMSParam : public dmlc::Parameter { .describe("Keep maximum top k detections before nms, -1 for no limit."); DMLC_DECLARE_FIELD(id_index).set_default(0) .describe("Axis index of id."); + DMLC_DECLARE_FIELD(return_indices).set_default(true) + .describe("Whether to return box indices in input data."); DMLC_DECLARE_FIELD(invalid_to_bottom).set_default(false) .describe("Whether to move all invalid bounding boxes to the bottom."); } diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index 1f166e2a00d7..8bec66d7b8f4 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -78,8 +78,8 @@ def compute_nms(attrs, inputs, _): id_index = attrs.get_int('id_index') invalid_to_bottom = attrs.get_bool('invalid_to_bottom') - return topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, - max_output_size, iou_threshold, force_suppress, - topk, id_index, invalid_to_bottom) + return topi.vision.non_max_suppression(inputs[0], inputs[1], max_output_size, + iou_threshold, force_suppress, topk, + id_index, return_indices, invalid_to_bottom) reg.register_pattern("non_max_suppression", OpPattern.OPAQUE) diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index f00a7f551605..47d14cc395aa 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -405,11 +405,11 @@ def _mx_box_nms(inputs, attrs): ret = _op.vision.get_valid_counts(inputs[0], score_threshold=valid_thresh) nms_out = _op.vision.non_max_suppression(ret[1], ret[0], - return_indices=False, iou_threshold=iou_thresh, force_suppress=force_suppress, topk=topk, id_index=id_index, + return_indices=False, invalid_to_bottom=True) return nms_out diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 40a47eb7b366..0205d6f3c2b6 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -90,9 +90,9 @@ def compute_nms(attrs, inputs, _, target): id_index = get_const_int(attrs.id_index) invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom)) return [ - topi.vision.non_max_suppression(inputs[0], inputs[1], return_indices, - max_output_size, iou_threshold, force_suppress, - topk, id_index, invalid_to_bottom) + topi.vision.non_max_suppression(inputs[0], inputs[1], max_output_size, + iou_threshold, force_suppress, topk, + id_index, return_indices, invalid_to_bottom) ] diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index a8a9f677872f..e8586866c025 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -29,12 +29,12 @@ def get_valid_counts(data, def non_max_suppression(data, valid_count, - return_indices, max_output_size=-1, iou_threshold=0.5, force_suppress=False, topk=-1, id_index=0, + return_indices=True, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. @@ -52,9 +52,6 @@ def non_max_suppression(data, Max number of output valid boxes for each instance. By default all valid boxes are returned. - return_indices : bool - Whether to return box indices in input data. - iou_threshold : float, optional Non-maximum suppression threshold. @@ -64,10 +61,13 @@ def non_max_suppression(data, topk : int, optional Keep maximum top k detections before nms, -1 for no limit. - id_index : optional, int + id_index : int, optional index of the class categories, -1 to disable. - invalid_to_bottom : optional, boolean + return_indices : bool, optional + Whether to return box indices in input data. + + invalid_to_bottom : bool, optional Whether to move all valid bounding boxes to the top. Returns @@ -75,6 +75,6 @@ def non_max_suppression(data, out : relay.Expr 3-D tensor with shape [batch_size, num_anchors, 6]. """ - return _make.non_max_suppression(data, valid_count, return_indices, - max_output_size, iou_threshold, force_suppress, - topk, id_index, invalid_to_bottom) + return _make.non_max_suppression(data, valid_count, max_output_size, + iou_threshold, force_suppress, topk, + id_index, return_indices, invalid_to_bottom) diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index a35394cfc216..fca40f9426db 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -84,20 +84,20 @@ bool NMSRel(const Array& types, Expr MakeNMS(Expr data, Expr valid_count, - bool return_indices, int max_output_size, double iou_threshold, bool force_suppress, int topk, int id_index, + bool return_indices, bool invalid_to_bottom) { auto attrs = make_node(); - attrs->return_indices = return_indices; attrs->max_output_size = max_output_size; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; attrs->topk = topk; attrs->id_index = id_index; + attrs->return_indices = return_indices; attrs->invalid_to_bottom = invalid_to_bottom; static const Op& op = Op::Get("vision.non_max_suppression"); return CallNode::make(op, {data, valid_count}, Attrs(attrs), {}); @@ -111,7 +111,9 @@ TVM_REGISTER_API("relay.op.vision._make.non_max_suppression") RELAY_REGISTER_OP("vision.non_max_suppression") -.describe(R"doc(Non-maximum suppression. +.describe(R"doc(Non-maximum suppression. The input boxes should +be in the format of [class_id, score, left, top, right, bottom]. +Set id_index to be -1 to ignore class_id axis. )doc" TVM_ADD_FILELINE) .set_num_inputs(2) .add_argument("data", "Tensor", "Input data.") diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index ce7bcd1d9abc..6e027ff232f1 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -180,8 +180,8 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int")) - z = relay.vision.non_max_suppression(x0, x1, False, -1, iou_threshold, force_suppress, topk) - z_indices = relay.vision.non_max_suppression(x0, x1, True, -1, iou_threshold, force_suppress, topk) + z = relay.vision.non_max_suppression(x0, x1, -1, iou_threshold, force_suppress, topk, return_indices=False) + z_indices = relay.vision.non_max_suppression(x0, x1, -1, iou_threshold, force_suppress, topk) assert "iou_threshold" in z.astext() assert "iou_threshold" in z_indices.astext() zz = relay.ir_pass.infer_type(z) @@ -277,7 +277,7 @@ def test_default_value(): assert ret.checked_type == ref_type - nms = relay.vision.non_max_suppression(mtl[0], mtl[1], False) + nms = relay.vision.non_max_suppression(mtl[0], mtl[1], return_indices=False) func = relay.Function([cls_prob, loc_pred, anchors], nms) func = relay.ir_pass.infer_type(func) ctx_list = [("llvm", tvm.cpu(0))] diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index daac25114663..36ab0ed00510 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -234,9 +234,9 @@ def hybrid_nms(data, sorted_index, valid_count, @tvm.target.generic_func -def non_max_suppression(data, valid_count, return_indices, max_output_size=-1, +def non_max_suppression(data, valid_count, max_output_size=-1, iou_threshold=0.5, force_suppress=False, topk=-1, - id_index=0, invalid_to_bottom=False): + id_index=0, return_indices=True, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. Parameters @@ -249,9 +249,6 @@ def non_max_suppression(data, valid_count, return_indices, max_output_size=-1, valid_count : tvm.Tensor 1-D tensor for valid number of boxes. - return_indices : boolean - Whether to return box indices in input data. - max_output_size : optional, int Max number of output valid boxes for each instance. By default all valid boxes are returned. @@ -268,6 +265,9 @@ def non_max_suppression(data, valid_count, return_indices, max_output_size=-1, id_index : optional, int index of the class categories, -1 to disable. + return_indices : optional, boolean + Whether to return box indices in input data. + invalid_to_bottom : optional, boolean Whether to move all valid bounding boxes to the top. diff --git a/topi/python/topi/vision/ssd/multibox.py b/topi/python/topi/vision/ssd/multibox.py index 64a4a94f7f06..2de1723dbd7b 100644 --- a/topi/python/topi/vision/ssd/multibox.py +++ b/topi/python/topi/vision/ssd/multibox.py @@ -292,6 +292,7 @@ def multibox_detection(cls_prob, loc_pred, anchor, clip=True, threshold=0.01, nm """ inter_out = multibox_transform_loc(cls_prob, loc_pred, anchor, clip, threshold, variances) - out = non_max_suppression(inter_out[0], inter_out[1], False, -1, - nms_threshold, force_suppress, nms_topk) + out = non_max_suppression(inter_out[0], inter_out[1], -1, + nms_threshold, force_suppress, nms_topk, + return_indices=False) return out diff --git a/topi/tests/python/test_topi_vision.py b/topi/tests/python/test_topi_vision.py index 337e5e5e665c..02e04212b63e 100644 --- a/topi/tests/python/test_topi_vision.py +++ b/topi/tests/python/test_topi_vision.py @@ -87,11 +87,11 @@ def check_device(device): print("Running on target: %s" % device) with tvm.target.create(device): if device == 'llvm': - out = non_max_suppression(data, valid_count, False, -1, nms_threshold, force_suppress, nms_topk) - indices_out = non_max_suppression(data, valid_count, True, -1, nms_threshold, force_suppress, nms_topk) + out = non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk, return_indices=False) + indices_out = non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk) else: - out = topi.cuda.non_max_suppression(data, valid_count, False, -1, nms_threshold, force_suppress, nms_topk) - indices_out = topi.cuda.non_max_suppression(data, valid_count, -1, True, nms_threshold, force_suppress, nms_topk) + out = topi.cuda.non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk, return_indices=False) + indices_out = topi.cuda.non_max_suppression(data, valid_count, -1, nms_threshold, force_suppress, nms_topk) s = topi.generic.schedule_nms(out) indices_s = topi.generic.schedule_nms(indices_out) From 11c8bba7e6cc2f54ff65046088fe9d3b88d7952b Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 24 Feb 2019 20:05:12 +0800 Subject: [PATCH 33/43] Minor fix --- nnvm/tests/python/frontend/mxnet/test_forward.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index 8992799528e7..67f1ad5ff27d 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -300,7 +300,6 @@ def test_forward_l2_normalize(): mx_sym = mx.sym.L2Normalization(data, mode="channel") verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5)) - if __name__ == '__main__': test_forward_mlp() test_forward_vgg() From 908eedba34760394fbe9b1d52c8b8e187108aa6b Mon Sep 17 00:00:00 2001 From: Wang Date: Sun, 24 Feb 2019 21:29:12 +0800 Subject: [PATCH 34/43] Resolve conflict --- nnvm/python/nnvm/frontend/mxnet.py | 2 +- nnvm/tests/python/frontend/mxnet/test_forward.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py index 2df67d9967ca..56505cfabb69 100644 --- a/nnvm/python/nnvm/frontend/mxnet.py +++ b/nnvm/python/nnvm/frontend/mxnet.py @@ -324,7 +324,7 @@ def _argmin(inputs, attrs): 'flatten', 'log', 'log_softmax', 'max', 'min', 'negative', 'ones_like', 'relu', 'sigmoid', 'slice_like', 'softmax', 'sum', 'tanh', 'transpose', 'zeros_like', 'gather_nd', - 'reshape_like'] + 'reshape_like', 'where'] _convert_map = { '_copy' : _rename('copy'), diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index 67f1ad5ff27d..8992799528e7 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -300,6 +300,7 @@ def test_forward_l2_normalize(): mx_sym = mx.sym.L2Normalization(data, mode="channel") verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5)) + if __name__ == '__main__': test_forward_mlp() test_forward_vgg() From 9743f15c34d5c7d684902cfe8a0cddf7cf317b02 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 25 Feb 2019 17:05:23 +0800 Subject: [PATCH 35/43] Rename topk to top_k --- include/tvm/relay/attrs/vision.h | 4 ++-- nnvm/include/nnvm/top/nn.h | 4 ++-- nnvm/python/nnvm/frontend/mxnet.py | 2 +- nnvm/python/nnvm/top/vision.py | 4 ++-- nnvm/tests/python/compiler/test_top_level4.py | 4 ++-- python/tvm/relay/frontend/mxnet.py | 6 +++--- python/tvm/relay/op/vision/_vision.py | 4 ++-- python/tvm/relay/op/vision/nms.py | 6 +++--- src/relay/op/vision/nms.cc | 4 ++-- tests/python/relay/test_op_level5.py | 12 +++++------ topi/python/topi/vision/nms.py | 20 +++++++++---------- 11 files changed, 35 insertions(+), 35 deletions(-) diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 2733e8554e9b..1b2fb6d9c997 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -73,7 +73,7 @@ struct NMSAttrs : public tvm::AttrsNode{ int max_output_size; double iou_threshold; bool force_suppress; - int topk; + int top_k; int id_index; bool return_indices; bool invalid_to_bottom; @@ -86,7 +86,7 @@ struct NMSAttrs : public tvm::AttrsNode{ .describe("Non-maximum suppression threshold."); TVM_ATTR_FIELD(force_suppress).set_default(false) .describe("Suppress all detections regardless of class_id."); - TVM_ATTR_FIELD(topk).set_default(-1) + TVM_ATTR_FIELD(top_k).set_default(-1) .describe("Keep maximum top k detections before nms, -1 for no limit."); TVM_ATTR_FIELD(id_index).set_default(0) .describe("Axis index of id."); diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 0f75096eb75d..806e497727c4 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -447,7 +447,7 @@ struct NMSParam : public dmlc::Parameter { bool return_indices; float iou_threshold; bool force_suppress; - int topk; + int top_k; int id_index; int max_output_size; bool invalid_to_bottom; @@ -459,7 +459,7 @@ struct NMSParam : public dmlc::Parameter { .describe("Non-maximum suppression threshold."); DMLC_DECLARE_FIELD(force_suppress).set_default(false) .describe("Suppress all detections regardless of class_id."); - DMLC_DECLARE_FIELD(topk).set_default(-1) + DMLC_DECLARE_FIELD(top_k).set_default(-1) .describe("Keep maximum top k detections before nms, -1 for no limit."); DMLC_DECLARE_FIELD(id_index).set_default(0) .describe("Axis index of id."); diff --git a/nnvm/python/nnvm/frontend/mxnet.py b/nnvm/python/nnvm/frontend/mxnet.py index 56505cfabb69..47d7ede96e5f 100644 --- a/nnvm/python/nnvm/frontend/mxnet.py +++ b/nnvm/python/nnvm/frontend/mxnet.py @@ -246,7 +246,7 @@ def _contrib_multibox_detection(inputs, attrs): nms_topk = attrs.get('nms_topk') or -1 new_attrs0 = {'clip': clip, 'threshold': float(threshold), 'variances': variances} new_attrs1 = {'return_indices': False, 'iou_threshold': float(nms_threshold), - 'force_suppress': force_suppress, 'topk': int(nms_topk)} + 'force_suppress': force_suppress, 'top_k': int(nms_topk)} data, valid_count = _get_nnvm_op('multibox_transform_loc')(inputs[0], inputs[1], inputs[2], **new_attrs0) return _get_nnvm_op('non_max_suppression')(data, valid_count, **new_attrs1) diff --git a/nnvm/python/nnvm/top/vision.py b/nnvm/python/nnvm/top/vision.py index 8bec66d7b8f4..ab32838e10ff 100644 --- a/nnvm/python/nnvm/top/vision.py +++ b/nnvm/python/nnvm/top/vision.py @@ -74,12 +74,12 @@ def compute_nms(attrs, inputs, _): max_output_size = attrs.get_int('max_output_size') iou_threshold = attrs.get_float('iou_threshold') force_suppress = attrs.get_bool('force_suppress') - topk = attrs.get_int('topk') + top_k = attrs.get_int('top_k') id_index = attrs.get_int('id_index') invalid_to_bottom = attrs.get_bool('invalid_to_bottom') return topi.vision.non_max_suppression(inputs[0], inputs[1], max_output_size, - iou_threshold, force_suppress, topk, + iou_threshold, force_suppress, top_k, id_index, return_indices, invalid_to_bottom) reg.register_pattern("non_max_suppression", OpPattern.OPAQUE) diff --git a/nnvm/tests/python/compiler/test_top_level4.py b/nnvm/tests/python/compiler/test_top_level4.py index 991b9c2b15be..6a42047151e5 100644 --- a/nnvm/tests/python/compiler/test_top_level4.py +++ b/nnvm/tests/python/compiler/test_top_level4.py @@ -579,9 +579,9 @@ def test_non_max_suppression(): valid_count = sym.Variable("valid_count", dtype="int32") iou_threshold = 0.7 force_suppress = True - topk = 2 + top_k = 2 out = sym.non_max_suppression(data=data, valid_count=valid_count, return_indices=False, - iou_threshold=iou_threshold, force_suppress=force_suppress, topk=topk) + iou_threshold=iou_threshold, force_suppress=force_suppress, top_k=top_k) np_data = np.array([[[0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [0, 0.4, 4, 21, 19, 40], [2, 0.9, 35, 61, 52, 79], diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 47d14cc395aa..0dfab69340b4 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -327,7 +327,7 @@ def _mx_multibox_detection(inputs, attrs): new_attrs1["return_indices"] = False new_attrs1["iou_threshold"] = attrs.get_float("nms_threshold", 0.5) new_attrs1["force_suppress"] = attrs.get_bool("force_suppress", False) - new_attrs1["topk"] = attrs.get_int("nms_topk", -1) + new_attrs1["top_k"] = attrs.get_int("nms_topk", -1) ret = _op.vision.multibox_transform_loc(inputs[0], inputs[1], inputs[2], **new_attrs0) @@ -384,7 +384,7 @@ def _mx_proposal(inputs, attrs): def _mx_box_nms(inputs, attrs): force_suppress = attrs.get_bool("force_suppress", False) iou_thresh = attrs.get_float('overlap_thresh', 0.5) - topk = attrs.get_int('topk', -1) + top_k = attrs.get_int('topk', -1) valid_thresh = attrs.get_float('valid_thresh', 0) coord_start = attrs.get_int('coord_start', 2) score_index = attrs.get_int('score_index', 1) @@ -407,7 +407,7 @@ def _mx_box_nms(inputs, attrs): ret[0], iou_threshold=iou_thresh, force_suppress=force_suppress, - topk=topk, + top_k=top_k, id_index=id_index, return_indices=False, invalid_to_bottom=True) diff --git a/python/tvm/relay/op/vision/_vision.py b/python/tvm/relay/op/vision/_vision.py index 0205d6f3c2b6..c887076e6af8 100644 --- a/python/tvm/relay/op/vision/_vision.py +++ b/python/tvm/relay/op/vision/_vision.py @@ -86,12 +86,12 @@ def compute_nms(attrs, inputs, _, target): max_output_size = get_const_int(attrs.max_output_size) iou_threshold = get_const_float(attrs.iou_threshold) force_suppress = bool(get_const_int(attrs.force_suppress)) - topk = get_const_int(attrs.topk) + top_k = get_const_int(attrs.top_k) id_index = get_const_int(attrs.id_index) invalid_to_bottom = bool(get_const_int(attrs.invalid_to_bottom)) return [ topi.vision.non_max_suppression(inputs[0], inputs[1], max_output_size, - iou_threshold, force_suppress, topk, + iou_threshold, force_suppress, top_k, id_index, return_indices, invalid_to_bottom) ] diff --git a/python/tvm/relay/op/vision/nms.py b/python/tvm/relay/op/vision/nms.py index e8586866c025..0124ee29ab9e 100644 --- a/python/tvm/relay/op/vision/nms.py +++ b/python/tvm/relay/op/vision/nms.py @@ -32,7 +32,7 @@ def non_max_suppression(data, max_output_size=-1, iou_threshold=0.5, force_suppress=False, - topk=-1, + top_k=-1, id_index=0, return_indices=True, invalid_to_bottom=False): @@ -58,7 +58,7 @@ def non_max_suppression(data, force_suppress : bool, optional Suppress all detections regardless of class_id. - topk : int, optional + top_k : int, optional Keep maximum top k detections before nms, -1 for no limit. id_index : int, optional @@ -76,5 +76,5 @@ def non_max_suppression(data, 3-D tensor with shape [batch_size, num_anchors, 6]. """ return _make.non_max_suppression(data, valid_count, max_output_size, - iou_threshold, force_suppress, topk, + iou_threshold, force_suppress, top_k, id_index, return_indices, invalid_to_bottom) diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index fca40f9426db..6ebc2ca49b4d 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -87,7 +87,7 @@ Expr MakeNMS(Expr data, int max_output_size, double iou_threshold, bool force_suppress, - int topk, + int top_k, int id_index, bool return_indices, bool invalid_to_bottom) { @@ -95,7 +95,7 @@ Expr MakeNMS(Expr data, attrs->max_output_size = max_output_size; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; - attrs->topk = topk; + attrs->top_k = top_k; attrs->id_index = id_index; attrs->return_indices = return_indices; attrs->invalid_to_bottom = invalid_to_bottom; diff --git a/tests/python/relay/test_op_level5.py b/tests/python/relay/test_op_level5.py index 6e027ff232f1..eceedc760d4b 100644 --- a/tests/python/relay/test_op_level5.py +++ b/tests/python/relay/test_op_level5.py @@ -176,12 +176,12 @@ def verify_get_valid_counts(dshape, score_threshold): def test_non_max_suppression(): def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, - iou_threshold=0.5, force_suppress=False, topk=-1, + iou_threshold=0.5, force_suppress=False, top_k=-1, check_type_only=False): x0 = relay.var("x0", relay.ty.TensorType(dshape, "float32")) x1 = relay.var("x1", relay.ty.TensorType((dshape[0],), "int")) - z = relay.vision.non_max_suppression(x0, x1, -1, iou_threshold, force_suppress, topk, return_indices=False) - z_indices = relay.vision.non_max_suppression(x0, x1, -1, iou_threshold, force_suppress, topk) + z = relay.vision.non_max_suppression(x0, x1, -1, iou_threshold, force_suppress, top_k, return_indices=False) + z_indices = relay.vision.non_max_suppression(x0, x1, -1, iou_threshold, force_suppress, top_k) assert "iou_threshold" in z.astext() assert "iou_threshold" in z_indices.astext() zz = relay.ir_pass.infer_type(z) @@ -221,10 +221,10 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, dshape = (tvm.var("n"), num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, - force_suppress=True, topk=2, check_type_only=True) + force_suppress=True, top_k=2, check_type_only=True) dshape = (1, num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, np_indices_result, - force_suppress=True, topk=2, check_type_only=False) + force_suppress=True, top_k=2, check_type_only=False) np_result = np.array([[[2, 0.9, 35, 61, 52, 79], [0, 0.8, 1, 20, 25, 45], [1, 0.7, 30, 60, 50, 80], [-1, -1, -1, -1, -1, -1], @@ -235,7 +235,7 @@ def verify_nms(x0_data, x1_data, dshape, ref_res, ref_indices_res, np_indices_result, check_type_only=True) dshape = (1, num_anchors, 6) verify_nms(np_data, np_valid_count, dshape, np_result, - np_indices_result, topk=3) + np_indices_result, top_k=3) def test_multibox_transform_loc(): diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index 36ab0ed00510..af982cba8652 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -113,7 +113,7 @@ def get_valid_counts(data, score_threshold=0): @hybrid.script def hybrid_nms(data, sorted_index, valid_count, max_output_size, iou_threshold, force_suppress, - topk, id_index): + top_k, id_index): """Hybrid routing for non-maximum suppression. Parameters @@ -139,7 +139,7 @@ def hybrid_nms(data, sorted_index, valid_count, force_suppress : tvm.const Whether to suppress all detections regardless of class_id. - topk : tvm.const + top_k : tvm.const Keep maximum top k detections before nms, -1 for no limit. id_index : tvm.const @@ -167,13 +167,13 @@ def hybrid_nms(data, sorted_index, valid_count, if valid_count[i] > 0: # Reorder output nkeep = valid_count[i] - if 0 < topk < nkeep: - nkeep = topk + if 0 < top_k < nkeep: + nkeep = top_k for j in range(nkeep): for k in range(box_data_length): output[i, j, k] = data[i, sorted_index[i, j], k] box_indices[i, j] = sorted_index[i, j] - if 0 < topk < valid_count[i]: + if 0 < top_k < valid_count[i]: for j in range(valid_count[i] - nkeep): for k in range(box_data_length): output[i, j + nkeep, k] = -1.0 @@ -235,7 +235,7 @@ def hybrid_nms(data, sorted_index, valid_count, @tvm.target.generic_func def non_max_suppression(data, valid_count, max_output_size=-1, - iou_threshold=0.5, force_suppress=False, topk=-1, + iou_threshold=0.5, force_suppress=False, top_k=-1, id_index=0, return_indices=True, invalid_to_bottom=False): """Non-maximum suppression operator for object detection. @@ -259,7 +259,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, force_suppress : optional, boolean Whether to suppress all detections regardless of class_id. - topk : optional, int + top_k : optional, int Keep maximum top k detections before nms, -1 for no limit. id_index : optional, int @@ -286,8 +286,8 @@ def non_max_suppression(data, valid_count, max_output_size=-1, valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") iou_threshold = 0.7 force_suppress = True - topk = -1 - out = nms(data, valid_count, iou_threshold, force_suppress, topk) + top_k = -1 + out = nms(data, valid_count, iou_threshold, force_suppress, top_k) np_data = np.random.uniform(dshape) np_valid_count = np.array([4]) s = topi.generic.schedule_nms(out) @@ -325,7 +325,7 @@ def non_max_suppression(data, valid_count, max_output_size=-1, tvm.const(max_output_size, dtype="int32"), tvm.const(iou_threshold, dtype="float32"), tvm.const(force_suppress, dtype="bool"), - tvm.const(topk, dtype="int32"), + tvm.const(top_k, dtype="int32"), tvm.const(id_index, dtype="int32")) if not return_indices and invalid_to_bottom: out = hybrid_rearrange_out(out) From 4bd6fecb4dd05379529ed3f61dfd28b42603d040 Mon Sep 17 00:00:00 2001 From: Wang Date: Wed, 27 Feb 2019 18:49:17 +0800 Subject: [PATCH 36/43] Fix example code --- topi/python/topi/vision/nms.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/topi/python/topi/vision/nms.py b/topi/python/topi/vision/nms.py index af982cba8652..169daea2d4d3 100644 --- a/topi/python/topi/vision/nms.py +++ b/topi/python/topi/vision/nms.py @@ -280,14 +280,15 @@ def non_max_suppression(data, valid_count, max_output_size=-1, -------- .. code-block:: python - # An example to use nms + # An example to use non_max_suppression dshape = (1, 5, 6) data = tvm.placeholder(dshape, name="data") valid_count = tvm.placeholder((dshape[0],), dtype="int32", name="valid_count") iou_threshold = 0.7 force_suppress = True top_k = -1 - out = nms(data, valid_count, iou_threshold, force_suppress, top_k) + out = non_max_suppression(data, valid_count, iou_threshold=iou_threshold, + force_suppress=force_suppress, top_k=top_k) np_data = np.random.uniform(dshape) np_valid_count = np.array([4]) s = topi.generic.schedule_nms(out) From d1e95f959d8a68612042fe86a07ddf46f0e2ff79 Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 2 Mar 2019 21:12:07 -0800 Subject: [PATCH 37/43] Fix lint --- nnvm/tests/python/frontend/mxnet/test_forward.py | 1 - tests/python/relay/test_op_level10.py | 1 - topi/python/topi/testing/__init__.py | 3 --- 3 files changed, 5 deletions(-) diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index 8992799528e7..8a0b10f0eb2a 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -227,7 +227,6 @@ def test_forward_slice(): mx_sym = mx.sym.slice(data, begin=(-1, 1), end=(-3, 4), step=(-1, 2)) verify_mxnet_frontend_impl(mx_sym, (3, 4), (2, 2)) -<<<<<<< HEAD def test_forward_maximum(): a = mx.sym.var('a') b = mx.sym.var('b') diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index e3c331c6a1d0..f5c9410f132a 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -146,7 +146,6 @@ def verify_reverse_reshape(shape, newshape, oshape): verify_reverse_reshape((2, 3, 4), (-1, 0), (6, 4)) verify_reverse_reshape((2, 3, 4), (0, -3), (2, 12)) -<<<<<<< HEAD def verify_batch_matmul(x_shape, y_shape, out_shape, dtype="float32"): x = relay.var("x", relay.TensorType(x_shape, dtype)) y = relay.var("y", relay.TensorType(y_shape, dtype)) diff --git a/topi/python/topi/testing/__init__.py b/topi/python/topi/testing/__init__.py index 90b8e8e0e58c..1743de13fd85 100644 --- a/topi/python/topi/testing/__init__.py +++ b/topi/python/topi/testing/__init__.py @@ -19,8 +19,5 @@ from .l2_normalize_python import l2_normalize_python from .gather_nd_python import gather_nd_python from .strided_slice_python import strided_slice_python -<<<<<<< HEAD from .batch_matmul import batch_matmul -======= from .slice_axis_python import slice_axis_python ->>>>>>> Relay support From 292130d89308903cea611cfe36f503303805f86c Mon Sep 17 00:00:00 2001 From: Wang Date: Sat, 2 Mar 2019 23:01:01 -0800 Subject: [PATCH 38/43] Minor fix --- nnvm/tests/python/frontend/mxnet/test_forward.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/nnvm/tests/python/frontend/mxnet/test_forward.py b/nnvm/tests/python/frontend/mxnet/test_forward.py index 8a0b10f0eb2a..581ae75a4bbc 100644 --- a/nnvm/tests/python/frontend/mxnet/test_forward.py +++ b/nnvm/tests/python/frontend/mxnet/test_forward.py @@ -289,16 +289,6 @@ def test_forward_minimum(): tvm_out = m.get_output(0, tvm.nd.empty(out_shape, dtype)).asnumpy() tvm.testing.assert_allclose(mx_out, tvm_out, rtol=1e-5, atol=1e-5) -def test_forward_slice_axis(): - data = mx.sym.var('data') - mx_sym = mx.sym.slice_axis(data, axis=1, begin=-5, end=None) - verify_mxnet_frontend_impl(mx_sym, (1, 10, 6), (1, 5, 6)) - -def test_forward_l2_normalize(): - data = mx.sym.var('data') - mx_sym = mx.sym.L2Normalization(data, mode="channel") - verify_mxnet_frontend_impl(mx_sym, (2, 3, 4, 5), (2, 3, 4, 5)) - if __name__ == '__main__': test_forward_mlp() @@ -325,5 +315,3 @@ def test_forward_l2_normalize(): test_forward_slice() test_forward_maximum() test_forward_minimum() - test_forward_slice_axis() - test_forward_l2_normalize() From fb94ffeaf5a7582c74ab93b9a0aa1b9adfa55f40 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 4 Mar 2019 12:46:51 -0800 Subject: [PATCH 39/43] Remove contrib_slice_axis --- include/tvm/relay/attrs/transform.h | 15 --- python/tvm/relay/op/_transform.py | 1 - python/tvm/relay/op/transform.py | 26 ----- src/relay/op/tensor/transform.cc | 102 -------------------- tests/python/frontend/mxnet/test_forward.py | 2 - tests/python/relay/test_op_level10.py | 22 ----- 6 files changed, 168 deletions(-) diff --git a/include/tvm/relay/attrs/transform.h b/include/tvm/relay/attrs/transform.h index 720d6b9d3690..fea2c960d032 100644 --- a/include/tvm/relay/attrs/transform.h +++ b/include/tvm/relay/attrs/transform.h @@ -171,21 +171,6 @@ struct StridedSliceAttrs : public tvm::AttrsNode { } }; -struct SliceAxisAttrs : public tvm::AttrsNode { - int axis; - int begin; - int end; - - TVM_DECLARE_ATTRS(SliceAxisAttrs, "relay.attrs.SliceAxisAttrs") { - TVM_ATTR_FIELD(axis) - .describe("Axis along which to be sliced."); - TVM_ATTR_FIELD(begin) - .describe("Index for begin of slice"); - TVM_ATTR_FIELD(end) - .describe("Index for end of the slice"); - } -}; - struct SliceLikeAttrs : public tvm::AttrsNode { Array axes; diff --git a/python/tvm/relay/op/_transform.py b/python/tvm/relay/op/_transform.py index 315b448cdc6e..1389f96b8325 100644 --- a/python/tvm/relay/op/_transform.py +++ b/python/tvm/relay/op/_transform.py @@ -21,7 +21,6 @@ _reg.register_schedule("arange", schedule_injective) _reg.register_schedule("cast", schedule_injective) _reg.register_schedule("strided_slice", schedule_injective) -_reg.register_schedule("_contrib_slice_axis", schedule_injective) _reg.register_schedule("slice_like", schedule_injective) _reg.register_schedule("split", schedule_injective) _reg.register_schedule("take", schedule_injective) diff --git a/python/tvm/relay/op/transform.py b/python/tvm/relay/op/transform.py index 9dc42861a4cf..725f57f54bd8 100644 --- a/python/tvm/relay/op/transform.py +++ b/python/tvm/relay/op/transform.py @@ -471,32 +471,6 @@ def strided_slice(data, begin, end, strides=None): return _make.strided_slice(data, list(begin), list(end), list(strides)) -def slice_axis(data, axis, begin, end=None): - """Slice input array along specific axis. - - Parameters - ---------- - data : relay.Expr - The source array to be sliced. - - axis : int - Axis to be sliced. - - begin: int - The index to begin with in the slicing. - - end: int, optional - The index indicating end of the slice. - - Returns - ------- - ret : relay.Expr - The computed result. - """ - end = end or 0 - return _make._contrib_slice_axis(data, axis, begin, end) - - def slice_like(data, shape_like, axes=None): """Slice the first input with respect to the second input. diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc index c0f279a6b72c..0c26e3da742e 100644 --- a/src/relay/op/tensor/transform.cc +++ b/src/relay/op/tensor/transform.cc @@ -1356,108 +1356,6 @@ Array GetIntArray(Array arr) { return Array(arr.node_); } -// slice_axis -TVM_REGISTER_NODE_TYPE(SliceAxisAttrs); - -bool SliceAxisRel(const Array& types, - int num_inputs, - const Attrs& attrs, - const TypeReporter& reporter) { - CHECK_EQ(types.size(), 2); - const auto* data = types[0].as(); - const SliceAxisAttrs *param = attrs.as(); - - auto src_shape = data->shape; - int axis = param->axis; - int begin = param->begin; - int end = param->end; - - if (axis < 0) { - axis += src_shape.size(); - } - if (begin < 0) { - begin += *as_const_int(src_shape[axis]); - } - if (end <= 0) { - end += *as_const_int(src_shape[axis]); - } - CHECK_LT(begin, end) - << "Begin index must be smaller than end index: " - << begin << " vs " << end; - - std::vector&& oshape = AsVector(data->shape); - oshape[axis] = make_const(Int(32), end - begin); - - // assign output type - reporter->Assign(types[1], TensorTypeNode::make(oshape, data->dtype)); - return true; -} - -Expr MakeSliceAxis(Expr data, - Integer axis, - Integer begin, - Integer end) { - auto attrs = make_node(); - attrs->axis = axis; - attrs->begin = begin; - attrs->end = end; - static const Op& op = Op::Get("_contrib_slice_axis"); - return CallNode::make(op, {data}, Attrs(attrs), {}); -} - -TVM_REGISTER_API("relay.op._make._contrib_slice_axis") -.set_body([](const TVMArgs& args, TVMRetValue* rv) { - runtime::detail::unpack_call(MakeSliceAxis, args, rv); -}); - -Array SliceAxisCompute(const Attrs& attrs, - const Array& inputs, - const Type& out_type, - const Target& target) { - const SliceAxisAttrs *param = attrs.as(); - const Array src_shape = inputs[0]->shape; - Array begin_idx, end_idx, strides; - int axis = param->axis; - int begin = param->begin; - int end = param->end; - - if (axis < 0) { - axis += src_shape.size(); - } - if (begin < 0) { - begin += *as_const_int(src_shape[axis]); - } - if (end <= 0) { - end += *as_const_int(src_shape[axis]); - } - for (size_t i = 0; i < src_shape.size(); ++i) { - begin_idx.push_back(make_const(Int(32), 0)); - strides.push_back(make_const(Int(32), 1)); - } - end_idx = Array(src_shape); - begin_idx.Set(axis, make_const(Int(32), begin)); - end_idx.Set(axis, make_const(Int(32), end)); - - return Array{ - topi::strided_slice(inputs[0], - GetIntArray(begin_idx), - GetIntArray(end_idx), - GetIntArray(strides)) - }; -} - -RELAY_REGISTER_OP("_contrib_slice_axis") -.describe(R"doc(Slices along a given axis. -Returns an array slice along a given axis starting from -the begin index to the end index. -)doc" TVM_ADD_FILELINE) -.set_num_inputs(1) -.add_argument("data", "Tensor", "Input data.") -.set_support_level(10) -.add_type_rel("SliceAxis", SliceAxisRel) -.set_attr("FTVMCompute", SliceAxisCompute) -.set_attr("TOpPattern", kInjective); - // strided_slice TVM_REGISTER_NODE_TYPE(StridedSliceAttrs); diff --git a/tests/python/frontend/mxnet/test_forward.py b/tests/python/frontend/mxnet/test_forward.py index fb975c11add0..4679876c181b 100644 --- a/tests/python/frontend/mxnet/test_forward.py +++ b/tests/python/frontend/mxnet/test_forward.py @@ -257,8 +257,6 @@ def verify(start, stop, step): verify(20, 1, -1) verify(20, 1, -1.5) -<<<<<<< HEAD -<<<<<<< HEAD def _mx_symbol(F, op_name, inputs): op = getattr(F, op_name) return op(*inputs) diff --git a/tests/python/relay/test_op_level10.py b/tests/python/relay/test_op_level10.py index f5c9410f132a..7237cfbc3b87 100644 --- a/tests/python/relay/test_op_level10.py +++ b/tests/python/relay/test_op_level10.py @@ -177,27 +177,6 @@ def test_batch_matmul(): verify_batch_matmul((5, 16, 32), (5, 20, 32), (5, 16, 20)) verify_batch_matmul((30, 16, 32), (30, 20, 32), (30, 16, 20)) -def test_contrib_slice_axis(): - def verify(dshape, axis, begin, end): - x = relay.var("x", relay.TensorType(dshape, "float32")) - z = relay.slice_axis(x, axis=axis, begin=begin, end=end) - func = relay.Function([x], z) - func = relay.ir_pass.infer_type(func) - text = func.astext() - assert "begin" in text - assert "end" in text - x_data = np.random.uniform(size=dshape).astype("float32") - ref_res = topi.testing.slice_axis_python( - x_data, axis, begin, end) - for target, ctx in ctx_list(): - intrp = relay.create_executor("graph", ctx=ctx, target=target) - op_res = intrp.evaluate(func)(x_data) - tvm.testing.assert_allclose(op_res.asnumpy(), ref_res) - - verify((1, 2, 3, 4), 3, 0, 2) - verify((100, 50), -1, 1, -1) - verify((20,), -1, -9, -3) - verify((20, 30, 40), 1, 5, 0) if __name__ == "__main__": test_collapse_sum_like() @@ -205,4 +184,3 @@ def verify(dshape, axis, begin, end): test_slice_like() test_reverse_reshape() test_batch_matmul() - test_contrib_slice_axis() From 5c0cee95acd7ad8f4d415d6687a602045344c3c8 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 4 Mar 2019 14:15:57 -0800 Subject: [PATCH 40/43] Resolve conflict --- python/tvm/relay/frontend/mxnet.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 0dfab69340b4..58de937fdbb8 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -434,6 +434,7 @@ def _mx_l2_normalize(inputs, attrs): "exp", "negative", "reshape_like", + "slice_like", "zeros_like", "ones_like", "where", @@ -522,10 +523,10 @@ def _mx_l2_normalize(inputs, attrs): "BatchNorm" : _mx_batch_norm, "BatchNorm_v1" : _mx_batch_norm, "LRN" : _mx_lrn, + "L2Normalization" : _mx_l2_normalize, "slice" : _mx_slice, "slice_like" : _mx_slice_like, "slice_axis" : _mx_slice_axis, - "L2Normalization" : _mx_l2_normalize, "SliceChannel" : _mx_split, "split" : _mx_split, "expand_dims" : _mx_expand_dims, From 28d479a2db98ef7bd2d4db6b3aa7dd352a6fffd5 Mon Sep 17 00:00:00 2001 From: Wang Date: Mon, 4 Mar 2019 17:14:14 -0800 Subject: [PATCH 41/43] Address minor comments --- include/tvm/relay/attrs/vision.h | 6 +++--- nnvm/include/nnvm/top/nn.h | 4 ++-- nnvm/src/top/vision/nms.cc | 11 ++++++----- src/relay/op/vision/nms.cc | 7 ++++--- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/include/tvm/relay/attrs/vision.h b/include/tvm/relay/attrs/vision.h index 1b2fb6d9c997..20b80f33a2a3 100644 --- a/include/tvm/relay/attrs/vision.h +++ b/include/tvm/relay/attrs/vision.h @@ -59,7 +59,7 @@ struct MultiBoxTransformLocAttrs }; /*! \brief Attributes used in get_valid_counts operator */ -struct GetValidCountsAttrs : public tvm::AttrsNode{ +struct GetValidCountsAttrs : public tvm::AttrsNode { double score_threshold; TVM_DECLARE_ATTRS(GetValidCountsAttrs, "relay.attrs.GetValidCountsAttrs") { @@ -69,7 +69,7 @@ struct GetValidCountsAttrs : public tvm::AttrsNode{ }; /*! \brief Attributes used in non_maximum_suppression operator */ -struct NMSAttrs : public tvm::AttrsNode{ +struct NonMaximumSuppressionAttrs : public tvm::AttrsNode { int max_output_size; double iou_threshold; bool force_suppress; @@ -78,7 +78,7 @@ struct NMSAttrs : public tvm::AttrsNode{ bool return_indices; bool invalid_to_bottom; - TVM_DECLARE_ATTRS(NMSAttrs, "relay.attrs.NMSAttrs") { + TVM_DECLARE_ATTRS(NonMaximumSuppressionAttrs, "relay.attrs.NonMaximumSuppressionAttrs") { TVM_ATTR_FIELD(max_output_size).set_default(-1) .describe("Max number of output valid boxes for each instance." "By default all valid boxes are returned."); diff --git a/nnvm/include/nnvm/top/nn.h b/nnvm/include/nnvm/top/nn.h index 806e497727c4..578f928c5b9f 100644 --- a/nnvm/include/nnvm/top/nn.h +++ b/nnvm/include/nnvm/top/nn.h @@ -443,7 +443,7 @@ struct MultiBoxTransformLocParam : public dmlc::Parameter { +struct NonMaximumSuppressionParam : public dmlc::Parameter { bool return_indices; float iou_threshold; bool force_suppress; @@ -451,7 +451,7 @@ struct NMSParam : public dmlc::Parameter { int id_index; int max_output_size; bool invalid_to_bottom; - DMLC_DECLARE_PARAMETER(NMSParam) { + DMLC_DECLARE_PARAMETER(NonMaximumSuppressionParam) { DMLC_DECLARE_FIELD(max_output_size).set_default(-1) .describe("Max number of output valid boxes for each instance." "By default all valid boxes are returned."); diff --git a/nnvm/src/top/vision/nms.cc b/nnvm/src/top/vision/nms.cc index 315e06e22ee5..e69a7cb2f036 100644 --- a/nnvm/src/top/vision/nms.cc +++ b/nnvm/src/top/vision/nms.cc @@ -19,12 +19,13 @@ using compiler::FTVMCompute; using tvm::Tensor; using tvm::Array; -DMLC_REGISTER_PARAMETER(NMSParam); +DMLC_REGISTER_PARAMETER(NonMaximumSuppressionParam); bool NMSShape(const NodeAttrs& attrs, std::vector *in_attrs, std::vector *out_attrs) { - const NMSParam& param = nnvm::get(attrs.parsed); + const NonMaximumSuppressionParam& param = + nnvm::get(attrs.parsed); CHECK_EQ(in_attrs->size(), 2U) << "Inputs: [data, valid_count]"; TShape dshape = in_attrs->at(0); TShape vshape = in_attrs->at(1); @@ -69,10 +70,10 @@ NNVM_REGISTER_OP(non_max_suppression) )doc" NNVM_ADD_FILELINE) .set_num_inputs(2) .set_num_outputs(1) -.set_attr_parser(ParamParser) +.set_attr_parser(ParamParser) .set_attr("FGetAttrDict", - ParamGetAttrDict) -.add_arguments(NMSParam::__FIELDS__()) + ParamGetAttrDict) +.add_arguments(NonMaximumSuppressionParam::__FIELDS__()) .add_argument("data", "Tensor", "Input data.") .add_argument("valid_count", "Tensor", "Number of valid anchor boxes.") .set_attr("FListInputNames", [](const NodeAttrs& attrs) { diff --git a/src/relay/op/vision/nms.cc b/src/relay/op/vision/nms.cc index 6ebc2ca49b4d..6a94da032196 100644 --- a/src/relay/op/vision/nms.cc +++ b/src/relay/op/vision/nms.cc @@ -56,7 +56,7 @@ input data. .add_type_rel("GetValidCount", GetValidCountRel); -TVM_REGISTER_NODE_TYPE(NMSAttrs); +TVM_REGISTER_NODE_TYPE(NonMaximumSuppressionAttrs); bool NMSRel(const Array& types, int num_inputs, @@ -65,7 +65,8 @@ bool NMSRel(const Array& types, CHECK_EQ(types.size(), 3); const auto* data = types[0].as(); const auto* valid_count = types[1].as(); - const NMSAttrs* param = attrs.as(); + const NonMaximumSuppressionAttrs* param = + attrs.as(); const auto& dshape = data->shape; const auto& vshape = valid_count->shape; CHECK_EQ(dshape.size(), 3) << "Input data should be 3-D."; @@ -91,7 +92,7 @@ Expr MakeNMS(Expr data, int id_index, bool return_indices, bool invalid_to_bottom) { - auto attrs = make_node(); + auto attrs = make_node(); attrs->max_output_size = max_output_size; attrs->iou_threshold = iou_threshold; attrs->force_suppress = force_suppress; From c2e02e415e7ce9c7cdfc57023ce3e733150284ba Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 7 Mar 2019 14:56:58 -0800 Subject: [PATCH 42/43] Move tutorial --- tutorials/{relay => frontend}/deploy_ssd_gluoncv.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tutorials/{relay => frontend}/deploy_ssd_gluoncv.py (100%) diff --git a/tutorials/relay/deploy_ssd_gluoncv.py b/tutorials/frontend/deploy_ssd_gluoncv.py similarity index 100% rename from tutorials/relay/deploy_ssd_gluoncv.py rename to tutorials/frontend/deploy_ssd_gluoncv.py From d20024c2a35802c5f0ab1717a8d32851123857ad Mon Sep 17 00:00:00 2001 From: Wang Date: Thu, 7 Mar 2019 16:54:21 -0800 Subject: [PATCH 43/43] Resolve conflict --- python/tvm/relay/frontend/mxnet.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py index 58de937fdbb8..cdfa75e50419 100644 --- a/python/tvm/relay/frontend/mxnet.py +++ b/python/tvm/relay/frontend/mxnet.py @@ -434,7 +434,6 @@ def _mx_l2_normalize(inputs, attrs): "exp", "negative", "reshape_like", - "slice_like", "zeros_like", "ones_like", "where",