-
Notifications
You must be signed in to change notification settings - Fork 54
/
box_utils.py
executable file
·214 lines (172 loc) · 6.55 KB
/
box_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import tensorflow as tf
def compute_area(top_left, bot_right):
""" Compute area given top_left and bottom_right coordinates
Args:
top_left: tensor (num_boxes, 2)
bot_right: tensor (num_boxes, 2)
Returns:
area: tensor (num_boxes,)
"""
# top_left: N x 2
# bot_right: N x 2
hw = tf.clip_by_value(bot_right - top_left, 0.0, 512.0)
area = hw[..., 0] * hw[..., 1]
return area
def compute_iou(boxes_a, boxes_b):
""" Compute overlap between boxes_a and boxes_b
Args:
boxes_a: tensor (num_boxes_a, 4)
boxes_b: tensor (num_boxes_b, 4)
Returns:
overlap: tensor (num_boxes_a, num_boxes_b)
"""
# boxes_a => num_boxes_a, 1, 4
boxes_a = tf.expand_dims(boxes_a, 1)
# boxes_b => 1, num_boxes_b, 4
boxes_b = tf.expand_dims(boxes_b, 0)
top_left = tf.math.maximum(boxes_a[..., :2], boxes_b[..., :2])
bot_right = tf.math.minimum(boxes_a[..., 2:], boxes_b[..., 2:])
overlap_area = compute_area(top_left, bot_right)
area_a = compute_area(boxes_a[..., :2], boxes_a[..., 2:])
area_b = compute_area(boxes_b[..., :2], boxes_b[..., 2:])
overlap = overlap_area / (area_a + area_b - overlap_area)
return overlap
def compute_target(default_boxes, gt_boxes, gt_labels, iou_threshold=0.5):
""" Compute regression and classification targets
Args:
default_boxes: tensor (num_default, 4)
of format (cx, cy, w, h)
gt_boxes: tensor (num_gt, 4)
of format (xmin, ymin, xmax, ymax)
gt_labels: tensor (num_gt,)
Returns:
gt_confs: classification targets, tensor (num_default,)
gt_locs: regression targets, tensor (num_default, 4)
"""
# Convert default boxes to format (xmin, ymin, xmax, ymax)
# in order to compute overlap with gt boxes
transformed_default_boxes = transform_center_to_corner(default_boxes)
iou = compute_iou(transformed_default_boxes, gt_boxes)
best_gt_iou = tf.math.reduce_max(iou, 1)
best_gt_idx = tf.math.argmax(iou, 1)
best_default_iou = tf.math.reduce_max(iou, 0)
best_default_idx = tf.math.argmax(iou, 0)
best_gt_idx = tf.tensor_scatter_nd_update(
best_gt_idx,
tf.expand_dims(best_default_idx, 1),
tf.range(best_default_idx.shape[0], dtype=tf.int64))
# Normal way: use a for loop
# for gt_idx, default_idx in enumerate(best_default_idx):
# best_gt_idx = tf.tensor_scatter_nd_update(
# best_gt_idx,
# tf.expand_dims([default_idx], 1),
# [gt_idx])
best_gt_iou = tf.tensor_scatter_nd_update(
best_gt_iou,
tf.expand_dims(best_default_idx, 1),
tf.ones_like(best_default_idx, dtype=tf.float32))
gt_confs = tf.gather(gt_labels, best_gt_idx)
gt_confs = tf.where(
tf.less(best_gt_iou, iou_threshold),
tf.zeros_like(gt_confs),
gt_confs)
gt_boxes = tf.gather(gt_boxes, best_gt_idx)
gt_locs = encode(default_boxes, gt_boxes)
return gt_confs, gt_locs
def encode(default_boxes, boxes, variance=[0.1, 0.2]):
""" Compute regression values
Args:
default_boxes: tensor (num_default, 4)
of format (cx, cy, w, h)
boxes: tensor (num_default, 4)
of format (xmin, ymin, xmax, ymax)
variance: variance for center point and size
Returns:
locs: regression values, tensor (num_default, 4)
"""
# Convert boxes to (cx, cy, w, h) format
transformed_boxes = transform_corner_to_center(boxes)
locs = tf.concat([
(transformed_boxes[..., :2] - default_boxes[:, :2]
) / (default_boxes[:, 2:] * variance[0]),
tf.math.log(transformed_boxes[..., 2:] / default_boxes[:, 2:]) / variance[1]],
axis=-1)
return locs
def decode(default_boxes, locs, variance=[0.1, 0.2]):
""" Decode regression values back to coordinates
Args:
default_boxes: tensor (num_default, 4)
of format (cx, cy, w, h)
locs: tensor (batch_size, num_default, 4)
of format (cx, cy, w, h)
variance: variance for center point and size
Returns:
boxes: tensor (num_default, 4)
of format (xmin, ymin, xmax, ymax)
"""
locs = tf.concat([
locs[..., :2] * variance[0] *
default_boxes[:, 2:] + default_boxes[:, :2],
tf.math.exp(locs[..., 2:] * variance[1]) * default_boxes[:, 2:]], axis=-1)
boxes = transform_center_to_corner(locs)
return boxes
def transform_corner_to_center(boxes):
""" Transform boxes of format (xmin, ymin, xmax, ymax)
to format (cx, cy, w, h)
Args:
boxes: tensor (num_boxes, 4)
of format (xmin, ymin, xmax, ymax)
Returns:
boxes: tensor (num_boxes, 4)
of format (cx, cy, w, h)
"""
center_box = tf.concat([
(boxes[..., :2] + boxes[..., 2:]) / 2,
boxes[..., 2:] - boxes[..., :2]], axis=-1)
return center_box
def transform_center_to_corner(boxes):
""" Transform boxes of format (cx, cy, w, h)
to format (xmin, ymin, xmax, ymax)
Args:
boxes: tensor (num_boxes, 4)
of format (cx, cy, w, h)
Returns:
boxes: tensor (num_boxes, 4)
of format (xmin, ymin, xmax, ymax)
"""
corner_box = tf.concat([
boxes[..., :2] - boxes[..., 2:] / 2,
boxes[..., :2] + boxes[..., 2:] / 2], axis=-1)
return corner_box
def compute_nms(boxes, scores, nms_threshold, limit=200):
""" Perform Non Maximum Suppression algorithm
to eliminate boxes with high overlap
Args:
boxes: tensor (num_boxes, 4)
of format (xmin, ymin, xmax, ymax)
scores: tensor (num_boxes,)
nms_threshold: NMS threshold
limit: maximum number of boxes to keep
Returns:
idx: indices of kept boxes
"""
if boxes.shape[0] == 0:
return tf.constant([], dtype=tf.int32)
selected = [0]
idx = tf.argsort(scores, direction='DESCENDING')
idx = idx[:limit]
boxes = tf.gather(boxes, idx)
iou = compute_iou(boxes, boxes)
while True:
row = iou[selected[-1]]
next_indices = row <= nms_threshold
# iou[:, ~next_indices] = 1.0
iou = tf.where(
tf.expand_dims(tf.math.logical_not(next_indices), 0),
tf.ones_like(iou, dtype=tf.float32),
iou)
if not tf.math.reduce_any(next_indices):
break
selected.append(tf.argsort(
tf.dtypes.cast(next_indices, tf.int32), direction='DESCENDING')[0].numpy())
return tf.gather(idx, selected)