This repository has been archived by the owner on Nov 21, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
/
boxes.py
338 lines (282 loc) · 12.6 KB
/
boxes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################
#
# Based on:
# --------------------------------------------------------
# Fast/er R-CNN
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# --------------------------------------------------------
"""Box manipulation functions. The internal Detectron box format is
[x1, y1, x2, y2] where (x1, y1) specify the top-left box corner and (x2, y2)
specify the bottom-right box corner. Boxes from external sources, e.g.,
datasets, may be in other formats (such as [x, y, w, h]) and require conversion.
This module uses a convention that may seem strange at first: the width of a box
is computed as x2 - x1 + 1 (likewise for height). The "+ 1" dates back to old
object detection days when the coordinates were integer pixel indices, rather
than floating point coordinates in a subpixel coordinate frame. A box with x2 =
x1 and y2 = y1 was taken to include a single pixel, having a width of 1, and
hence requiring the "+ 1". Now, most datasets will likely provide boxes with
floating point coordinates and the width should be more reasonably computed as
x2 - x1.
In practice, as long as a model is trained and tested with a consistent
convention either decision seems to be ok (at least in our experience on COCO).
Since we have a long history of training models with the "+ 1" convention, we
are reluctant to change it even if our modern tastes prefer not to use it.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
from detectron.core.config import cfg
import detectron.utils.cython_bbox as cython_bbox
import detectron.utils.cython_nms as cython_nms
bbox_overlaps = cython_bbox.bbox_overlaps
def boxes_area(boxes):
"""Compute the area of an array of boxes."""
w = (boxes[:, 2] - boxes[:, 0] + 1)
h = (boxes[:, 3] - boxes[:, 1] + 1)
areas = w * h
assert np.all(areas >= 0), 'Negative areas founds'
return areas
def unique_boxes(boxes, scale=1.0):
"""Return indices of unique boxes."""
v = np.array([1, 1e3, 1e6, 1e9])
hashes = np.round(boxes * scale).dot(v)
_, index = np.unique(hashes, return_index=True)
return np.sort(index)
def xywh_to_xyxy(xywh):
"""Convert [x1 y1 w h] box format to [x1 y1 x2 y2] format."""
if isinstance(xywh, (list, tuple)):
# Single box given as a list of coordinates
assert len(xywh) == 4
x1, y1 = xywh[0], xywh[1]
x2 = x1 + np.maximum(0., xywh[2] - 1.)
y2 = y1 + np.maximum(0., xywh[3] - 1.)
return (x1, y1, x2, y2)
elif isinstance(xywh, np.ndarray):
# Multiple boxes given as a 2D ndarray
return np.hstack(
(xywh[:, 0:2], xywh[:, 0:2] + np.maximum(0, xywh[:, 2:4] - 1))
)
else:
raise TypeError('Argument xywh must be a list, tuple, or numpy array.')
def xyxy_to_xywh(xyxy):
"""Convert [x1 y1 x2 y2] box format to [x1 y1 w h] format."""
if isinstance(xyxy, (list, tuple)):
# Single box given as a list of coordinates
assert len(xyxy) == 4
x1, y1 = xyxy[0], xyxy[1]
w = xyxy[2] - x1 + 1
h = xyxy[3] - y1 + 1
return (x1, y1, w, h)
elif isinstance(xyxy, np.ndarray):
# Multiple boxes given as a 2D ndarray
return np.hstack((xyxy[:, 0:2], xyxy[:, 2:4] - xyxy[:, 0:2] + 1))
else:
raise TypeError('Argument xyxy must be a list, tuple, or numpy array.')
def filter_small_boxes(boxes, min_size):
"""Keep boxes with width and height both greater than min_size."""
w = boxes[:, 2] - boxes[:, 0] + 1
h = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((w > min_size) & (h > min_size))[0]
return keep
def clip_boxes_to_image(boxes, height, width):
"""Clip an array of boxes to an image with the given height and width."""
boxes[:, [0, 2]] = np.minimum(width - 1., np.maximum(0., boxes[:, [0, 2]]))
boxes[:, [1, 3]] = np.minimum(height - 1., np.maximum(0., boxes[:, [1, 3]]))
return boxes
def clip_xyxy_to_image(x1, y1, x2, y2, height, width):
"""Clip coordinates to an image with the given height and width."""
x1 = np.minimum(width - 1., np.maximum(0., x1))
y1 = np.minimum(height - 1., np.maximum(0., y1))
x2 = np.minimum(width - 1., np.maximum(0., x2))
y2 = np.minimum(height - 1., np.maximum(0., y2))
return x1, y1, x2, y2
def clip_tiled_boxes(boxes, im_shape):
"""Clip boxes to image boundaries. im_shape is [height, width] and boxes
has shape (N, 4 * num_tiled_boxes)."""
assert boxes.shape[1] % 4 == 0, \
'boxes.shape[1] is {:d}, but must be divisible by 4.'.format(
boxes.shape[1]
)
# x1 >= 0
boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
# y1 >= 0
boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
# x2 < im_shape[1]
boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
# y2 < im_shape[0]
boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
return boxes
def bbox_transform(boxes, deltas, weights=(1.0, 1.0, 1.0, 1.0)):
"""Forward transform that maps proposal boxes to predicted ground-truth
boxes using bounding-box regression deltas. See bbox_transform_inv for a
description of the weights argument.
"""
if boxes.shape[0] == 0:
return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
boxes = boxes.astype(deltas.dtype, copy=False)
widths = boxes[:, 2] - boxes[:, 0] + 1.0
heights = boxes[:, 3] - boxes[:, 1] + 1.0
ctr_x = boxes[:, 0] + 0.5 * widths
ctr_y = boxes[:, 1] + 0.5 * heights
wx, wy, ww, wh = weights
dx = deltas[:, 0::4] / wx
dy = deltas[:, 1::4] / wy
dw = deltas[:, 2::4] / ww
dh = deltas[:, 3::4] / wh
# Prevent sending too large values into np.exp()
dw = np.minimum(dw, cfg.BBOX_XFORM_CLIP)
dh = np.minimum(dh, cfg.BBOX_XFORM_CLIP)
pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
pred_w = np.exp(dw) * widths[:, np.newaxis]
pred_h = np.exp(dh) * heights[:, np.newaxis]
pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
# x1
pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
# y1
pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
# x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
# y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
return pred_boxes
def bbox_transform_inv(boxes, gt_boxes, weights=(1.0, 1.0, 1.0, 1.0)):
"""Inverse transform that computes target bounding-box regression deltas
given proposal boxes and ground-truth boxes. The weights argument should be
a 4-tuple of multiplicative weights that are applied to the regression
target.
In older versions of this code (and in py-faster-rcnn), the weights were set
such that the regression deltas would have unit standard deviation on the
training dataset. Presently, rather than computing these statistics exactly,
we use a fixed set of weights (10., 10., 5., 5.) by default. These are
approximately the weights one would get from COCO using the previous unit
stdev heuristic.
"""
ex_widths = boxes[:, 2] - boxes[:, 0] + 1.0
ex_heights = boxes[:, 3] - boxes[:, 1] + 1.0
ex_ctr_x = boxes[:, 0] + 0.5 * ex_widths
ex_ctr_y = boxes[:, 1] + 0.5 * ex_heights
gt_widths = gt_boxes[:, 2] - gt_boxes[:, 0] + 1.0
gt_heights = gt_boxes[:, 3] - gt_boxes[:, 1] + 1.0
gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_widths
gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_heights
wx, wy, ww, wh = weights
targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
targets_dw = ww * np.log(gt_widths / ex_widths)
targets_dh = wh * np.log(gt_heights / ex_heights)
targets = np.vstack((targets_dx, targets_dy, targets_dw,
targets_dh)).transpose()
return targets
def expand_boxes(boxes, scale):
"""Expand an array of boxes by a given scale."""
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
def flip_boxes(boxes, im_width):
"""Flip boxes horizontally."""
boxes_flipped = boxes.copy()
boxes_flipped[:, 0::4] = im_width - boxes[:, 2::4] - 1
boxes_flipped[:, 2::4] = im_width - boxes[:, 0::4] - 1
return boxes_flipped
def aspect_ratio(boxes, aspect_ratio):
"""Perform width-relative aspect ratio transformation."""
boxes_ar = boxes.copy()
boxes_ar[:, 0::4] = aspect_ratio * boxes[:, 0::4]
boxes_ar[:, 2::4] = aspect_ratio * boxes[:, 2::4]
return boxes_ar
def box_voting(top_dets, all_dets, thresh, scoring_method='ID', beta=1.0):
"""Apply bounding-box voting to refine `top_dets` by voting with `all_dets`.
See: https://arxiv.org/abs/1505.01749. Optional score averaging (not in the
referenced paper) can be applied by setting `scoring_method` appropriately.
"""
# top_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
# all_dets is [N, 5] each row is [x1 y1 x2 y2, sore]
top_dets_out = top_dets.copy()
top_boxes = top_dets[:, :4]
all_boxes = all_dets[:, :4]
all_scores = all_dets[:, 4]
top_to_all_overlaps = bbox_overlaps(top_boxes, all_boxes)
for k in range(top_dets_out.shape[0]):
inds_to_vote = np.where(top_to_all_overlaps[k] >= thresh)[0]
boxes_to_vote = all_boxes[inds_to_vote, :]
ws = all_scores[inds_to_vote]
top_dets_out[k, :4] = np.average(boxes_to_vote, axis=0, weights=ws)
if scoring_method == 'ID':
# Identity, nothing to do
pass
elif scoring_method == 'TEMP_AVG':
# Average probabilities (considered as P(detected class) vs.
# P(not the detected class)) after smoothing with a temperature
# hyperparameter.
P = np.vstack((ws, 1.0 - ws))
P_max = np.max(P, axis=0)
X = np.log(P / P_max)
X_exp = np.exp(X / beta)
P_temp = X_exp / np.sum(X_exp, axis=0)
P_avg = P_temp[0].mean()
top_dets_out[k, 4] = P_avg
elif scoring_method == 'AVG':
# Combine new probs from overlapping boxes
top_dets_out[k, 4] = ws.mean()
elif scoring_method == 'IOU_AVG':
P = ws
ws = top_to_all_overlaps[k, inds_to_vote]
P_avg = np.average(P, weights=ws)
top_dets_out[k, 4] = P_avg
elif scoring_method == 'GENERALIZED_AVG':
P_avg = np.mean(ws**beta)**(1.0 / beta)
top_dets_out[k, 4] = P_avg
elif scoring_method == 'QUASI_SUM':
top_dets_out[k, 4] = ws.sum() / float(len(ws))**beta
else:
raise NotImplementedError(
'Unknown scoring method {}'.format(scoring_method)
)
return top_dets_out
def nms(dets, thresh):
"""Apply classic DPM-style greedy NMS."""
if dets.shape[0] == 0:
return []
return cython_nms.nms(dets, thresh)
def soft_nms(
dets, sigma=0.5, overlap_thresh=0.3, score_thresh=0.001, method='linear'
):
"""Apply the soft NMS algorithm from https://arxiv.org/abs/1704.04503."""
if dets.shape[0] == 0:
return dets, []
methods = {'hard': 0, 'linear': 1, 'gaussian': 2}
assert method in methods, 'Unknown soft_nms method: {}'.format(method)
dets, keep = cython_nms.soft_nms(
np.ascontiguousarray(dets, dtype=np.float32),
np.float32(sigma),
np.float32(overlap_thresh),
np.float32(score_thresh),
np.uint8(methods[method])
)
return dets, keep