draft of similarity_engine.py _get_train_loss

bmmtstb · Aug 26, 2024 · 121f3d3 · 121f3d3
1 parent fc0bee6
commit 121f3d3
Show file tree

Hide file tree

Showing 5 changed files with 67 additions and 34 deletions.
diff --git a/dgs/default_values.yaml b/dgs/default_values.yaml
@@ -126,6 +126,7 @@ similarity:
 
     oks:
         softmax: false
+        kp_dim: 2
 
     iou:
         softmax: false

diff --git a/dgs/models/dgs/__init__.py b/dgs/models/dgs/__init__.py
@@ -9,7 +9,8 @@
 from .dgs import DGSModule
 
 DGS_MODULES: dict[str, Type[DGSModule]] = {
-    "DGS": DGSModule,
+    "DGSModule": DGSModule,
+    "DGS": DGSModule,  # shorthand
 }
 
 

diff --git a/dgs/models/dgs/dgs.py b/dgs/models/dgs/dgs.py
@@ -49,6 +49,12 @@ class DGSModule(BaseModule, nn.Module):
         Default ``DEF_VAL.dgs.similarity_softmax``.
     """
 
+    sim_mods: nn.ModuleList
+    similarity_softmax: nn.Sequential
+    combine: CombineSimilaritiesModule
+    combined_softmax: nn.Sequential
+    new_track_weight: t.Tensor
+
     def __init__(self, config: Config, path: NodePath):
         BaseModule.__init__(self, config=config, path=path)
         nn.Module.__init__(self)
@@ -85,6 +91,7 @@ def __init__(self, config: Config, path: NodePath):
         self.configure_torch_module(self.combine)
 
         # if wanted, compute the softmax after the similarities have been summed up / combined
+        # but before the new tracks are added
         combined_softmax = nn.Sequential()
         if self.params.get("combined_softmax", DEF_VAL["dgs"]["combined_softmax"]):
             combined_softmax.append(nn.Softmax(dim=-1))
@@ -101,7 +108,7 @@ def __init__(self, config: Config, path: NodePath):
     def __call__(self, *args, **kwargs) -> any:  # pragma: no cover
         return self.forward(*args, **kwargs)
 
-    def forward(self, ds: State, target: State) -> t.Tensor:
+    def forward(self, ds: State, target: State, **_kwargs) -> t.Tensor:
         """Given a State containing the current detections and a target, compute the similarity between every pair.
 
         Returns:
@@ -113,7 +120,7 @@ def forward(self, ds: State, target: State) -> t.Tensor:
         results = [self.similarity_softmax(m(ds, target)) for m in self.sim_mods]
 
         # combine and possibly compute softmax
-        combined: t.Tensor = self.combined_softmax(self.combine(*results))
+        combined: t.Tensor = self.combined_softmax(self.combine(*results, **_kwargs))
         del results
 
         # add a number of columns for the empty / new tracks equal to the length of the input

diff --git a/dgs/models/engine/similarity_engine.py b/dgs/models/engine/similarity_engine.py
@@ -29,7 +29,7 @@
 
 
 class SimilarityEngine(EngineModule):
-    """An engine for training and testing similarity models independently.
+    """An engine for training and testing similarity models independently and in combinations.
 
     For this model:
 
@@ -102,19 +102,23 @@ def __init__(
 
         # Params - Train
 
-    def get_data(self, ds: State) -> t.Tensor:
-        """Use the similarity model to obtain the similarity data of the current detections."""
-        return self.model.get_data(ds)
+    def get_data(self, ds: State) -> list[t.Tensor]:
+        """Use the similarity models of the DGS module to obtain the similarity data of the current detections.
 
-    def get_target(self, ds: State) -> tuple[any, t.Tensor]:
-        """Get the target data.
+        For the similarity engine, the data consists of a list of all the input data for the similarities.
+        This means, that for the visual similarity, the embedding is returned,
+        and for the IoU or OKS similarities, the bbox and key point data is returned.
+        The :func:`get_data` function will be called twice, once for the current time-step and once for the previous.
+        """
+        return [sm.get_data(ds) for sm in self.model.sim_mods]
 
-        For the similarity engine, the target data consists of two parts:
+    def get_target(self, ds: State) -> t.Tensor:
+        """Get the target data.
 
-        - the target IDs
-        - the similarity data of the previous step
+        For the similarity engine, the target data consists of the target- / class-id.
+        The :func:`get_target` function will be called twice, once for the current time-step and once for the previous.
         """
-        return ds.class_id, ...
+        return ds.class_id
 
     def test(self) -> Results:
         r"""Test whether the predicted alpha probability (:math:`\alpha_{\mathrm{pred}}`)
@@ -169,17 +173,29 @@ def predict(self) -> any:
     def _get_train_loss(self, data: list[State], _curr_iter: int) -> t.Tensor:
         """Calculate the loss for the current frame."""
         assert isinstance(data, list) and len(data) == 2, "Data must be a list of length 2."
-        # data_old, data_new = data
-        # old_ids = data_old.class_id
-        # new_ids = data_new.class_id
-        # target_ids = ...
-        #
-        # curr_sim_data = self.get_data(data_new)
-        # prev_sim_data = self.get_data(data_old)
-
-        loss = self.loss(..., ...)
+        data_old: State
+        data_new: State
+
+        data_old, data_new = data
+
+        old_ids = self.get_target(data_old)  # [T]
+        new_ids = self.get_target(data_new)  # [D]
+        # concat all IDs from new_ids, which are not present in old_ids, to the old_ids
+        combined_ids = t.cat((old_ids, new_ids[~(new_ids.reshape((-1, 1)) == old_ids.reshape((1, -1))).max(dim=1)[0]]))
+        # the ID of the correct match, and if there is no old ID to match to, use the newly created tracks
+        indices = t.where(new_ids.reshape(-1, 1) == combined_ids.reshape(1, -1))
+
+        # get the input data of the similarity modules for the current step
+        curr_sim_data = self.get_data(data_new)  # [D]
+        alpha = self.model.combine.alpha_model(curr_sim_data)
+
+        # get the similarity matrices as [D x (T + D)]
+        similarity = self.model.forward(ds=data_new, target=data_old, alpha_inputs=curr_sim_data)
+
+        loss = self.loss(alpha, similarity[indices])
         return loss
 
     def terminate(self) -> None:
         self.model.terminate()
+        del self.model
         super().terminate()
diff --git a/dgs/models/similarity/pose_similarity.py b/dgs/models/similarity/pose_similarity.py
@@ -18,6 +18,7 @@
     "format": [str, ("in", list(OKS_SIGMAS.keys()))],
     # optional
     "softmax": ["optional", bool],
+    "keypoint_dim": ["optional", int, ("within", (1, 3))],
 }
 
 iou_validations: Validations = {
@@ -35,10 +36,16 @@ class ObjectKeypointSimilarity(SimilarityModule):
     format (str):
         The key point format, e.g., 'coco', 'coco-whole', ... has to be in OKS_SIGMAS.keys().
 
+    Optional Params
+    ---------------
+
     softmax (bool, optional):
         Whether to compute the softmax of the result.
         All values will lie in the range :math:`[0, 1]`, with softmax, they additionally sum to one.
         Default ``DEF_VAL.similarity.oks.softmax``.
+    keypoint_dim (int, optional):
+        The dimensionality of the key points. So whether 2D or 3D is expected.
+        Default ``DEF_VAL.similarity.oks.kp_dim``.
     """
 
     def __init__(self, config: Config, path: NodePath):
@@ -62,16 +69,16 @@ def __init__(self, config: Config, path: NodePath):
         if self.params.get("softmax", DEF_VAL["similarity"]["oks"]["softmax"]):
             self.softmax.append(nn.Softmax(dim=-1))
 
-    def get_data(self, ds: State) -> tuple[t.Tensor, t.Tensor]:
-        """Given a :class:`State`, compute the detected / predicted key points with shape ``[B1 x J x 2]``
-        and the areas of the respective ground-truth bounding-boxes with shape ``[B1]``.
+        self.kp_dim: int = self.params.get("keypoint_dim", DEF_VAL["similarity"]["oks"]["kp_dim"])
 
-        Notes:
-            To compute the bbox area, it is possible to use the :class:`~torchvision.ops.box_area` function.
-            The box_area function expects that the bounding boxes are given in 'XYXY' format.
+    def get_data(self, ds: State) -> t.Tensor:
+        """Given a :class:`State`, compute the detected / predicted key points with shape ``[B1 x J x 2|3]``
+        and the areas of the respective ground-truth bounding-boxes with shape ``[B1]``.
         """
-        kps = ds.keypoints.float().view(ds.B, -1, 2)
+        return ds.keypoints.float().view(ds.B, -1, self.kp_dim)
 
+    def get_area(self, ds: State) -> t.Tensor:
+        """Given a :class:`State`, compute the area of the bounding box."""
         bboxes = ds.bbox
 
         if bboxes.format == BoundingBoxFormat.XYXY:
@@ -82,14 +89,14 @@ def get_data(self, ds: State) -> tuple[t.Tensor, t.Tensor]:
             bboxes = self.transf_bbox_to_xyxy(bboxes)
             area = box_area(bboxes).float()
 
-        return kps, area
+        return area
 
     def get_target(self, ds: State) -> tuple[t.Tensor, t.Tensor]:
         """Given a :class:`State` obtain the ground truth key points and the key-point-visibility.
-        Both are tensors, the key points are a FloatTensor of shape ``[B2 x J x 2]``
+        Both are tensors, the key points are a FloatTensor of shape ``[B2 x J x 2|3]``
         and the visibility is a BoolTensor of shape ``[B2 x J]``.
         """
-        kps = ds.keypoints.float().view(ds.B, -1, 2)
+        kps = ds.keypoints.float().view(ds.B, -1, self.kp_dim)
         vis = ds.cast_joint_weight(dtype=t.bool).squeeze(-1).view(ds.B, -1)
         return kps, vis
 
@@ -129,7 +136,8 @@ def forward(self, data: State, target: State) -> t.Tensor:
             resulting in probability distributions for each value of the input data.
         """
         # get predicted key-points as [N x J x 2] and bbox area as [N]
-        pred_kps, bbox_area = self.get_data(ds=data)
+        pred_kps = self.get_data(ds=data)
+        bbox_area = self.get_area(ds=data)
         # get ground-truth key-points as [T x J x 2] and visibility as [T x J]
         gt_kps, gt_vis = self.get_target(ds=target)
         assert pred_kps.size(-1) == gt_kps.size(-1), "Key-points should have the same number of dimensions"
@@ -191,7 +199,7 @@ def get_data(self, ds: State) -> BoundingBoxes:
 
     def get_target(self, ds: State) -> BoundingBoxes:
         """Given a :class:`State` obtain the ground-truth bounding-boxes as
-        :class:`torchvision.tv_etnsors.BoundingBoxes` object of size ``[T x 4]``.
+        :class:`torchvision.tv_tensors.BoundingBoxes` object of size ``[T x 4]``.
 
         Notes:
             The function :func:`box_iou` expects that the bounding boxes are in the 'XYXY' format.
-Original file line number
+Diff line change
@@ Expand Up / @@ -126,6 +126,7 @@ similarity: @@
         oks:
             softmax: false
+            kp_dim: 2
         iou:
             softmax: false
@@ Expand Down @@