Skip to content

Commit

Permalink
Fixed issue when converting but not saving bbox formats
Browse files Browse the repository at this point in the history
- allowed changing of State.bbox under some circumstances
- added some better explanations to the pose_similarity.py functions
  • Loading branch information
bmmtstb committed Jul 17, 2024
1 parent d04e2e6 commit 0450085
Show file tree
Hide file tree
Showing 10 changed files with 61 additions and 37 deletions.
2 changes: 1 addition & 1 deletion dgs/default_values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ submission:
file: "./submission.json"

MOT:
bbox_decimals: 0
bbox_decimals: 3
seqinfo_key: "Sequence"

# ############## #
Expand Down
2 changes: 1 addition & 1 deletion dgs/models/dataset/keypoint_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def images_to_states(self, images: Images) -> list[State]:
# make sure all images are float
images = [tvte.Image(to_dtype(img, dtype=t.float32, scale=True)) for img in images]

# predicts a list of {boxes: XYWH[N], labels: Int64[N], scores: [N], keypoints: Float[N,J,(x|y|vis)]}
# predicts a list of {boxes: XYXY[N], labels: Int64[N], scores: [N], keypoints: Float[N,J,(x|y|vis)]}
# every image in images can have multiple predictions
outputs: list[dict[str, t.Tensor]] = self.model(images)

Expand Down
2 changes: 1 addition & 1 deletion dgs/models/dataset/posetrack21.py
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@ def _get_anno_data(
return (
t.empty((0, 17, 2)),
t.empty((0, 17, 1)),
tvte.BoundingBoxes(t.empty((0, 4)), canvas_size=(0, 0), format="XYXY"),
tvte.BoundingBoxes(t.empty((0, 4)), canvas_size=(0, 0), format="XYWH"),
(),
)
return collate_tensors(keypoints), collate_tensors(visibilities), collate_bboxes(bboxes), tuple(crop_paths)
Expand Down
56 changes: 35 additions & 21 deletions dgs/models/similarity/pose_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@


class ObjectKeypointSimilarity(SimilarityModule):
"""Compute the object key-point similarity (OKS) between two batches of poses.
"""Compute the object key-point similarity (OKS) between two batches of poses / States.
Params
------
Expand Down Expand Up @@ -57,7 +57,7 @@ def __init__(self, config: Config, path: NodePath):
"eps", torch.tensor(torch.finfo(self.precision).eps, device=self.device, dtype=self.precision)
)
# Set up a transform function to convert the bounding boxes if they have the wrong format
self.transform = ConvertBoundingBoxFormat("XYXY")
self.transf_bbox_to_xyxy = ConvertBoundingBoxFormat("XYXY")

# Set up softmax function if requested
self.softmax = nn.Sequential()
Expand All @@ -81,7 +81,7 @@ def get_data(self, ds: State) -> tuple[torch.Tensor, torch.Tensor]:
elif bboxes.format == BoundingBoxFormat.XYWH:
area = bboxes[:, -2] * bboxes[:, -1] # w * h
else:
bboxes = self.transform(bboxes)
bboxes = self.transf_bbox_to_xyxy(bboxes)
area = box_area(bboxes).float()

return kps, area
Expand Down Expand Up @@ -121,36 +121,39 @@ def forward(self, data: State, target: State) -> torch.Tensor:
* 2 = labeled but visible
* :math:`s` the scale of the ground truth object, with :math:`s^2` becoming the object's segmented area
Fixme: exclude ignore regions from image_shape ?
Args:
data: A :class:`State` object containing at least the key points and the bounding box.
target: A :class:`State` containing at least the target key points.
data: A :class:`State` object containing at least the key points and the bounding box. Shape ``N``.
target: A :class:`State` containing at least the target key points. Shape ``T``.
Returns:
A (Float)Tensor of shape ``[N x T]`` with values in ``[0..1]``.
If requested, the softmax is computed along the -1 dimension,
resulting in probability distributions for each value of the input data.
"""
# get predicted key-points as [B1 x J x 2] and bbox area as [B1]
# get predicted key-points as [N x J x 2] and bbox area as [N]
pred_kps, bbox_area = self.get_data(ds=data)
# get ground-truth key-points as [B2 x J x 2] and visibility as [B2 x J]
# get ground-truth key-points as [T x J x 2] and visibility as [T x J]
gt_kps, gt_vis = self.get_target(ds=target)
assert pred_kps.size(-1) == gt_kps.size(-1), "Key-points should have the same number of dimensions"
# Compute d = Euclidean dist, but don't compute the sqrt, because only d^2 is required.
# A little tensor magic, because if B1 != B2 and B1 != 1 and B2 != 1, regular subtraction will fail!
# Therefore, modify the tensors to have shape [B1 x J x 2 x 1], [(1 x) J x 2 x B2].
# The output has shape [B1 x J x 2 x B2], then square and sum over the number of dimensions (-2).
# A little tensor magic, because if N != T and N != 1 and T != 1, regular subtraction will fail!
# Therefore, modify the tensors to have shape [N x J x 2 x 1], [(1 x) J x 2 x T].
# The output has shape [N x J x 2 x T], then square and sum over the number of dimensions (-2).
d2 = torch.sum(
torch.sub(pred_kps.unsqueeze(-1), gt_kps.permute(1, 2, 0)).square(),
dim=-2,
) # -> [B1 x J x B2]
) # -> [N x J x T]
# Ground truth scale as bounding box area in relation to the image area it lies within.
# Keep area s^2, because s is never used.
s2 = bbox_area.flatten() # [B1]
s2 = bbox_area.flatten() # [N]
# Keypoint similarity for every key-point pair of ground truth and detected.
# Use outer product to combine s^2 [B1] with k^2 [J] and add epsilon to make sure to have non-zero values.
# Use outer product to combine s^2 [N] with k^2 [J] and add epsilon to make sure to have non-zero values.
# Again, modify the tensor shapes to match for division.
# Shapes: d2 [B1 x J x B2], new_outer [B1 x J x 1]
ks = torch.exp(-torch.div(d2, (2 * torch.outer(s2, self.k2) + self.eps).unsqueeze(-1))) # -> [B1 x J x B2]
# Shapes: d2 [N x J x T], new_outer [N x J x 1]
ks = torch.exp(-torch.div(d2, (2 * torch.outer(s2, self.k2) + self.eps).unsqueeze(-1))) # -> [N x J x T]
# The count of non-zero visibilities in the ground-truth
count = torch.count_nonzero(gt_vis, dim=-1) # [B2]
# for every pair in B, sum over all J
count = torch.count_nonzero(gt_vis, dim=-1) # [T]
# with ks [N x J x T], sum over all J and divide by the nof visibilities
return self.softmax(torch.div(torch.where(gt_vis.T, ks, 0).sum(dim=-2), count).nan_to_num_(nan=0.0, posinf=0.0))


Expand Down Expand Up @@ -178,7 +181,7 @@ def __init__(self, config: Config, path: NodePath):

def get_data(self, ds: State) -> BoundingBoxes:
"""Given a :class:`State` obtain the ground-truth bounding-boxes as
:class:`torchvision.tv_tensors.BoundingBoxes` object of size ``[B1 x 4]``.
:class:`torchvision.tv_tensors.BoundingBoxes` object of size ``[N x 4]``.
Notes:
The box_iou function expects that the bounding boxes are in the 'XYXY' format.
Expand All @@ -190,7 +193,7 @@ def get_data(self, ds: State) -> BoundingBoxes:

def get_target(self, ds: State) -> BoundingBoxes:
"""Given a :class:`State` obtain the ground-truth bounding-boxes as
:class:`torchvision.tv_etnsors.BoundingBoxes` object of size ``[B2 x 4]``.
:class:`torchvision.tv_etnsors.BoundingBoxes` object of size ``[T x 4]``.
Notes:
The function :func:`box_iou` expects that the bounding boxes are in the 'XYXY' format.
Expand All @@ -201,4 +204,15 @@ def get_target(self, ds: State) -> BoundingBoxes:
return bboxes

def forward(self, data: State, target: State) -> torch.Tensor:
"""Given two states containing bounding-boxes, compute the intersection over union between each pair.
Args:
data: A :class:`State` object containing the detected bounding-boxes. Size ``N``
target: A :class:`State` object containing the target bounding-boxes. Size ``T``
Returns:
A (Float)Tensor of shape ``[N x T]`` with values in ``[0..1]``.
If requested, the softmax is computed along the -1 dimension,
resulting in probability distributions for each value of the input data.
"""
return self.softmax(box_iou(self.get_data(ds=data), self.get_target(ds=target)))
4 changes: 3 additions & 1 deletion dgs/models/similarity/torchreid.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,13 @@ def forward(self, data: State, target: State) -> torch.Tensor:
Returns:
A similarity matrix containing values describing the similarity between every current- and target-embedding.
The similarity should be (Float)Tensor of shape ``[a x b]`` with values in ``[0..1]``.
The similarity is a (Float)Tensor of shape ``[a x b]`` with values in ``[0..1]``.
If the provided metric does not return a probability distribution,
you might want to change the metric or set the 'softmax' parameter of this module,
or within the :class:`DGSModule` if this is a submodule.
Computing the softmax ensures better / correct behavior when combining this similarity with others.
If requested, the softmax is computed along the -1 dimension,
resulting in probability distributions for each value of the input data.
"""
pred_embeds = self.get_data(ds=data)
targ_embeds = self.get_target(ds=target)
Expand Down
3 changes: 2 additions & 1 deletion dgs/models/submission/MOT.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ def _get_bbox_value(_s: State, idx: int) -> str:

# convert bbox format to receive the height and width more easily later on
if s.bbox.format != tvte.BoundingBoxFormat.XYWH:
convert_bounding_box_format(s.bbox, new_format=tvte.BoundingBoxFormat.XYWH)
s.bbox = convert_bounding_box_format(s.bbox, new_format=tvte.BoundingBoxFormat.XYWH)
assert s.bbox.format == tvte.BoundingBoxFormat.XYWH, f"got format: {s.bbox.format}"
detections = s.split()
for det in detections:
tid = det["pred_tid"].item() + 1 # MOT is 1-indexed, but State is 0-indexed
Expand Down
7 changes: 5 additions & 2 deletions dgs/models/submission/posetrack21.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,10 @@ def get_anno_data(s: State) -> list[dict[str, any]]:

# get the annotation data
anno_data = []
bboxes = convert_bounding_box_format(s.bbox, new_format=tvte.BoundingBoxFormat.XYWH)
if s.bbox.format != tvte.BoundingBoxFormat.XYWH:
s.bbox = convert_bounding_box_format(s.bbox, new_format=tvte.BoundingBoxFormat.XYWH)
assert s.bbox.format == tvte.BoundingBoxFormat.XYWH, f"got format: {s.bbox.format}"

for i in range(s.B):
kps = t.cat([s.keypoints[i], s.joint_weight[i]], dim=-1)
scores: list[float]
Expand All @@ -134,7 +137,7 @@ def get_anno_data(s: State) -> list[dict[str, any]]:

anno_data.append(
{
"bboxes": bboxes[i].flatten().tolist(),
"bboxes": s.bbox[i].flatten().tolist(),
"keypoints": kps.flatten().tolist(),
"scores": scores,
"score": (
Expand Down
9 changes: 5 additions & 4 deletions dgs/utils/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,11 @@ def bbox(self) -> tv_tensors.BoundingBoxes:

@bbox.setter
def bbox(self, bbox: tv_tensors) -> None:
raise NotImplementedError(
"It is not allowed to change the bounding box of an already existing State object. "
"Create a new object instead!"
)
if not isinstance(bbox, tv_tensors.BoundingBoxes):
raise TypeError(f"Expected bounding box, got {type(bbox)}")
if bbox.shape != self.bbox.shape:
raise ValueError(f"Can't switch bbox shape. Expected {self.bbox.shape} but got {bbox.shape}")
self.data["bbox"] = bbox.to(device=self.bbox.device)

@property
def device(self):
Expand Down
2 changes: 1 addition & 1 deletion scripts/helpers/extract_bboxes_MOT.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

CONFIG_FILE: str = "./configs/helpers/predict_rcnn.yaml"

SCORE_THRESHS: list[float] = [0.85, 0.90, 0.95]
SCORE_THRESHS: list[float] = [0.85, 0.90, 0.95, 0.99]
IOU_THRESHS: list[float] = [1.0] # basically deactivate IoU thresh

RCNN_DL_KEYS: list[str] = [
Expand Down
11 changes: 7 additions & 4 deletions tests/utils/state/test__state.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,14 @@ def test_keypoints(self):
ds = State(**{"bbox": DUMMY_BBOX, scope: DUMMY_KP, "validate": validate})
setattr(ds, scopes[(i + 1) % 2], DUMMY_KP)

def test_setting_bbox_fails(self):
def test_setting_bbox_exceptions(self):
ds = State(**DUMMY_DATA)
with self.assertRaises(NotImplementedError) as e:
ds.bbox = DUMMY_BBOX
self.assertTrue("not allowed to change the bounding box of an already" in str(e.exception), msg=e.exception)
with self.assertRaises(TypeError) as e:
ds.bbox = DUMMY_BBOX_TENSOR
self.assertTrue("Expected bounding box, got" in str(e.exception), msg=e.exception)
with self.assertRaises(ValueError) as e:
ds.bbox = DUMMY_BBOX_BATCH
self.assertTrue("Can't switch bbox shape. Expected" in str(e.exception), msg=e.exception)

def test_filepath(self):
for validate in [True, False]:
Expand Down

0 comments on commit 0450085

Please sign in to comment.