-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathops.py
121 lines (105 loc) · 3.49 KB
/
ops.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Opearations
Fred Zhang <[email protected]>
The Australian National University
Australian Centre for Robotic Vision
"""
import torch
import torchvision.ops.boxes as box_ops
from torch import Tensor
from typing import List, Tuple
def compute_spatial_encodings(
boxes_1: List[Tensor], boxes_2: List[Tensor],
shapes: List[Tuple[int, int]], eps: float = 1e-10
) -> Tensor:
"""
Parameters:
-----------
boxes_1: List[Tensor]
First set of bounding boxes (M, 4)
boxes_1: List[Tensor]
Second set of bounding boxes (M, 4)
shapes: List[Tuple[int, int]]
Image shapes, heights followed by widths
eps: float
A small constant used for numerical stability
Returns:
--------
Tensor
Computed spatial encodings between the boxes (N, 36)
"""
features = []
for b1, b2, shape in zip(boxes_1, boxes_2, shapes):
h, w = shape
c1_x = (b1[:, 0] + b1[:, 2]) / 2; c1_y = (b1[:, 1] + b1[:, 3]) / 2
c2_x = (b2[:, 0] + b2[:, 2]) / 2; c2_y = (b2[:, 1] + b2[:, 3]) / 2
b1_w = b1[:, 2] - b1[:, 0]; b1_h = b1[:, 3] - b1[:, 1]
b2_w = b2[:, 2] - b2[:, 0]; b2_h = b2[:, 3] - b2[:, 1]
d_x = torch.abs(c2_x - c1_x) / (b1_w + eps)
d_y = torch.abs(c2_y - c1_y) / (b1_h + eps)
iou = torch.diag(box_ops.box_iou(b1, b2))
# Construct spatial encoding
f = torch.stack([
# Relative position of box centre
c1_x / w, c1_y / h, c2_x / w, c2_y / h,
# Relative box width and height
b1_w / w, b1_h / h, b2_w / w, b2_h / h,
# Relative box area
b1_w * b1_h / (h * w), b2_w * b2_h / (h * w),
b2_w * b2_h / (b1_w * b1_h + eps),
# Box aspect ratio
b1_w / (b1_h + eps), b2_w / (b2_h + eps),
# Intersection over union
iou,
# Relative distance and direction of the object w.r.t. the person
(c2_x > c1_x).float() * d_x,
(c2_x < c1_x).float() * d_x,
(c2_y > c1_y).float() * d_y,
(c2_y < c1_y).float() * d_y,
], 1)
features.append(
torch.cat([f, torch.log(f + eps)], 1)
)
return torch.cat(features)
def binary_focal_loss(
x: Tensor, y: Tensor,
alpha: float = 0.5,
gamma: float = 2.0,
reduction: str = 'mean',
eps: float = 1e-6
) -> Tensor:
"""
Focal loss by Lin et al.
https://arxiv.org/pdf/1708.02002.pdf
L = - |1-y-alpha| * |y-x|^{gamma} * log(|1-y-x|)
Parameters:
-----------
x: Tensor[N, K]
Post-normalisation scores
y: Tensor[N, K]
Binary labels
alpha: float
Hyper-parameter that balances between postive and negative examples
gamma: float
Hyper-paramter suppresses well-classified examples
reduction: str
Reduction methods
eps: float
A small constant to avoid NaN values from 'PowBackward'
Returns:
--------
loss: Tensor
Computed loss tensor
"""
loss = (1 - y - alpha).abs() * ((y-x).abs() + eps) ** gamma * \
torch.nn.functional.binary_cross_entropy(
x, y, reduction='none'
)
if reduction == 'mean':
return loss.mean()
elif reduction == 'sum':
return loss.sum()
elif reduction == 'none':
return loss
else:
raise ValueError("Unsupported reduction method {}".format(reduction))