Skip to content

Commit

Permalink
draft of similarity_engine.py _get_train_loss
Browse files Browse the repository at this point in the history
  • Loading branch information
bmmtstb committed Aug 26, 2024
1 parent fc0bee6 commit 121f3d3
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 34 deletions.
1 change: 1 addition & 0 deletions dgs/default_values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ similarity:

oks:
softmax: false
kp_dim: 2

iou:
softmax: false
Expand Down
3 changes: 2 additions & 1 deletion dgs/models/dgs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from .dgs import DGSModule

DGS_MODULES: dict[str, Type[DGSModule]] = {
"DGS": DGSModule,
"DGSModule": DGSModule,
"DGS": DGSModule, # shorthand
}


Expand Down
11 changes: 9 additions & 2 deletions dgs/models/dgs/dgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ class DGSModule(BaseModule, nn.Module):
Default ``DEF_VAL.dgs.similarity_softmax``.
"""

sim_mods: nn.ModuleList
similarity_softmax: nn.Sequential
combine: CombineSimilaritiesModule
combined_softmax: nn.Sequential
new_track_weight: t.Tensor

def __init__(self, config: Config, path: NodePath):
BaseModule.__init__(self, config=config, path=path)
nn.Module.__init__(self)
Expand Down Expand Up @@ -85,6 +91,7 @@ def __init__(self, config: Config, path: NodePath):
self.configure_torch_module(self.combine)

# if wanted, compute the softmax after the similarities have been summed up / combined
# but before the new tracks are added
combined_softmax = nn.Sequential()
if self.params.get("combined_softmax", DEF_VAL["dgs"]["combined_softmax"]):
combined_softmax.append(nn.Softmax(dim=-1))
Expand All @@ -101,7 +108,7 @@ def __init__(self, config: Config, path: NodePath):
def __call__(self, *args, **kwargs) -> any: # pragma: no cover
return self.forward(*args, **kwargs)

def forward(self, ds: State, target: State) -> t.Tensor:
def forward(self, ds: State, target: State, **_kwargs) -> t.Tensor:
"""Given a State containing the current detections and a target, compute the similarity between every pair.
Returns:
Expand All @@ -113,7 +120,7 @@ def forward(self, ds: State, target: State) -> t.Tensor:
results = [self.similarity_softmax(m(ds, target)) for m in self.sim_mods]

# combine and possibly compute softmax
combined: t.Tensor = self.combined_softmax(self.combine(*results))
combined: t.Tensor = self.combined_softmax(self.combine(*results, **_kwargs))
del results

# add a number of columns for the empty / new tracks equal to the length of the input
Expand Down
54 changes: 35 additions & 19 deletions dgs/models/engine/similarity_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@


class SimilarityEngine(EngineModule):
"""An engine for training and testing similarity models independently.
"""An engine for training and testing similarity models independently and in combinations.
For this model:
Expand Down Expand Up @@ -102,19 +102,23 @@ def __init__(

# Params - Train

def get_data(self, ds: State) -> t.Tensor:
"""Use the similarity model to obtain the similarity data of the current detections."""
return self.model.get_data(ds)
def get_data(self, ds: State) -> list[t.Tensor]:
"""Use the similarity models of the DGS module to obtain the similarity data of the current detections.
def get_target(self, ds: State) -> tuple[any, t.Tensor]:
"""Get the target data.
For the similarity engine, the data consists of a list of all the input data for the similarities.
This means, that for the visual similarity, the embedding is returned,
and for the IoU or OKS similarities, the bbox and key point data is returned.
The :func:`get_data` function will be called twice, once for the current time-step and once for the previous.
"""
return [sm.get_data(ds) for sm in self.model.sim_mods]

For the similarity engine, the target data consists of two parts:
def get_target(self, ds: State) -> t.Tensor:
"""Get the target data.
- the target IDs
- the similarity data of the previous step
For the similarity engine, the target data consists of the target- / class-id.
The :func:`get_target` function will be called twice, once for the current time-step and once for the previous.
"""
return ds.class_id, ...
return ds.class_id

def test(self) -> Results:
r"""Test whether the predicted alpha probability (:math:`\alpha_{\mathrm{pred}}`)
Expand Down Expand Up @@ -169,17 +173,29 @@ def predict(self) -> any:
def _get_train_loss(self, data: list[State], _curr_iter: int) -> t.Tensor:
"""Calculate the loss for the current frame."""
assert isinstance(data, list) and len(data) == 2, "Data must be a list of length 2."
# data_old, data_new = data
# old_ids = data_old.class_id
# new_ids = data_new.class_id
# target_ids = ...
#
# curr_sim_data = self.get_data(data_new)
# prev_sim_data = self.get_data(data_old)

loss = self.loss(..., ...)
data_old: State
data_new: State

data_old, data_new = data

old_ids = self.get_target(data_old) # [T]
new_ids = self.get_target(data_new) # [D]
# concat all IDs from new_ids, which are not present in old_ids, to the old_ids
combined_ids = t.cat((old_ids, new_ids[~(new_ids.reshape((-1, 1)) == old_ids.reshape((1, -1))).max(dim=1)[0]]))
# the ID of the correct match, and if there is no old ID to match to, use the newly created tracks
indices = t.where(new_ids.reshape(-1, 1) == combined_ids.reshape(1, -1))

# get the input data of the similarity modules for the current step
curr_sim_data = self.get_data(data_new) # [D]
alpha = self.model.combine.alpha_model(curr_sim_data)

# get the similarity matrices as [D x (T + D)]
similarity = self.model.forward(ds=data_new, target=data_old, alpha_inputs=curr_sim_data)

loss = self.loss(alpha, similarity[indices])
return loss

def terminate(self) -> None:
self.model.terminate()
del self.model
super().terminate()
32 changes: 20 additions & 12 deletions dgs/models/similarity/pose_similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"format": [str, ("in", list(OKS_SIGMAS.keys()))],
# optional
"softmax": ["optional", bool],
"keypoint_dim": ["optional", int, ("within", (1, 3))],
}

iou_validations: Validations = {
Expand All @@ -35,10 +36,16 @@ class ObjectKeypointSimilarity(SimilarityModule):
format (str):
The key point format, e.g., 'coco', 'coco-whole', ... has to be in OKS_SIGMAS.keys().
Optional Params
---------------
softmax (bool, optional):
Whether to compute the softmax of the result.
All values will lie in the range :math:`[0, 1]`, with softmax, they additionally sum to one.
Default ``DEF_VAL.similarity.oks.softmax``.
keypoint_dim (int, optional):
The dimensionality of the key points. So whether 2D or 3D is expected.
Default ``DEF_VAL.similarity.oks.kp_dim``.
"""

def __init__(self, config: Config, path: NodePath):
Expand All @@ -62,16 +69,16 @@ def __init__(self, config: Config, path: NodePath):
if self.params.get("softmax", DEF_VAL["similarity"]["oks"]["softmax"]):
self.softmax.append(nn.Softmax(dim=-1))

def get_data(self, ds: State) -> tuple[t.Tensor, t.Tensor]:
"""Given a :class:`State`, compute the detected / predicted key points with shape ``[B1 x J x 2]``
and the areas of the respective ground-truth bounding-boxes with shape ``[B1]``.
self.kp_dim: int = self.params.get("keypoint_dim", DEF_VAL["similarity"]["oks"]["kp_dim"])

Notes:
To compute the bbox area, it is possible to use the :class:`~torchvision.ops.box_area` function.
The box_area function expects that the bounding boxes are given in 'XYXY' format.
def get_data(self, ds: State) -> t.Tensor:
"""Given a :class:`State`, compute the detected / predicted key points with shape ``[B1 x J x 2|3]``
and the areas of the respective ground-truth bounding-boxes with shape ``[B1]``.
"""
kps = ds.keypoints.float().view(ds.B, -1, 2)
return ds.keypoints.float().view(ds.B, -1, self.kp_dim)

def get_area(self, ds: State) -> t.Tensor:
"""Given a :class:`State`, compute the area of the bounding box."""
bboxes = ds.bbox

if bboxes.format == BoundingBoxFormat.XYXY:
Expand All @@ -82,14 +89,14 @@ def get_data(self, ds: State) -> tuple[t.Tensor, t.Tensor]:
bboxes = self.transf_bbox_to_xyxy(bboxes)
area = box_area(bboxes).float()

return kps, area
return area

def get_target(self, ds: State) -> tuple[t.Tensor, t.Tensor]:
"""Given a :class:`State` obtain the ground truth key points and the key-point-visibility.
Both are tensors, the key points are a FloatTensor of shape ``[B2 x J x 2]``
Both are tensors, the key points are a FloatTensor of shape ``[B2 x J x 2|3]``
and the visibility is a BoolTensor of shape ``[B2 x J]``.
"""
kps = ds.keypoints.float().view(ds.B, -1, 2)
kps = ds.keypoints.float().view(ds.B, -1, self.kp_dim)
vis = ds.cast_joint_weight(dtype=t.bool).squeeze(-1).view(ds.B, -1)
return kps, vis

Expand Down Expand Up @@ -129,7 +136,8 @@ def forward(self, data: State, target: State) -> t.Tensor:
resulting in probability distributions for each value of the input data.
"""
# get predicted key-points as [N x J x 2] and bbox area as [N]
pred_kps, bbox_area = self.get_data(ds=data)
pred_kps = self.get_data(ds=data)
bbox_area = self.get_area(ds=data)
# get ground-truth key-points as [T x J x 2] and visibility as [T x J]
gt_kps, gt_vis = self.get_target(ds=target)
assert pred_kps.size(-1) == gt_kps.size(-1), "Key-points should have the same number of dimensions"
Expand Down Expand Up @@ -191,7 +199,7 @@ def get_data(self, ds: State) -> BoundingBoxes:

def get_target(self, ds: State) -> BoundingBoxes:
"""Given a :class:`State` obtain the ground-truth bounding-boxes as
:class:`torchvision.tv_etnsors.BoundingBoxes` object of size ``[T x 4]``.
:class:`torchvision.tv_tensors.BoundingBoxes` object of size ``[T x 4]``.
Notes:
The function :func:`box_iou` expects that the bounding boxes are in the 'XYXY' format.
Expand Down

0 comments on commit 121f3d3

Please sign in to comment.