Memory management and Submission files

Replaced PT21 submission file structure with a dynamic module - created PT21 submission class - used general submission class in DGS engine - smaller fixes in keypoint_rcnn.py and module.py - no updates for tracks with size 0 - added possibility to not add current date to log_dir - remove indents and spaces from written json files - added tool for general memory analysis Signed-off-by: Martin <[email protected]>
bmmtstb · May 18, 2024 · d07472b · d07472b
1 parent 2c5ad16
commit d07472b
Show file tree

Hide file tree

Showing 23 changed files with 602 additions and 295 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-![Coverage](https://img.shields.io/badge/Coverage-91%25-lime)
+![Coverage](https://img.shields.io/badge/Coverage-92%25-lime)
 [![Run CI](https://github.com/bmmtstb/dynamically-gated-similarities/actions/workflows/ci.yaml/badge.svg?branch=master)](https://github.com/bmmtstb/dynamically-gated-similarities/actions/workflows/ci.yaml)
 [![Deploy Docs](https://github.com/bmmtstb/dynamically-gated-similarities/actions/workflows/wiki.yaml/badge.svg?branch=master)](https://github.com/bmmtstb/dynamically-gated-similarities/actions/workflows/wiki.yaml)
 

diff --git a/configs/DGS/eval_sim_indep.yaml b/configs/DGS/eval_sim_indep.yaml
@@ -1,16 +1,18 @@
-name: "Evaluate-IoU"
+name: "Evaluate-Single"
 description: "Use the DGS tracker with a constant alpha module. 
 With IoU as its only Similarity with an alpha value of 1."
 
-device: "cuda"
+device: "cuda:0"
 print_prio: "INFO"
 is_training: off
 log_dir: "./results/own/eval/"
+log_dir_add_date: false
 
 test:
     inactivity_threshold: 15
     max_track_length: 1
     save_images: off
+    submission: ["submission"]
 
 # #### #
 # DATA #
@@ -23,6 +25,8 @@ dl_rcnn:
     batch_size: 16
     threshold: 0.75
     return_lists: true
+    image_size: !!python/tuple [ 1024, 1024 ]
+    image_mode: "zero-pad"
     crop_size: !!python/tuple [256, 192]
     crop_mode: "zero-pad"
     # data_path:  # will be set in python script
@@ -38,6 +42,10 @@ dl_gt:
     # data_path: # will be set in python script
     # out_path:  # will be set in python script
 
+submission:
+    module_name: "PoseTrack21"
+    # file: # will be set in python script
+
 # ####### #
 # MODULES #
 # ####### #
@@ -147,3 +155,21 @@ v4_embed_gen:
     weights: "pretrained"
     nof_classes: 1000
 
+# Visual - vXXX - 
+
+dgs_vis_XXX:
+    module_name: "DGS"
+    combine: "combine_sim"
+    names: [ "vXXX_sim" ]
+    combined_softmax: true
+
+vXXX_sim:
+    module_name: "torchreid"
+    metric: "NegSoftmaxEuclideanDist"
+    embedding_generator_path: ["vXXX_embed_gen"]
+
+vXXX_embed_gen:
+    module_name: "torchreid"
+    model_name: ""
+    weights: "./weights/"
+    nof_classes: 1000
diff --git a/dgs/default_values.yaml b/dgs/default_values.yaml
@@ -44,6 +44,9 @@ images:
     aspect_mode: "zero-pad"
     aspect_round_decimals: 2
 
+submission:
+    file: "./submission.json"
+
 # ############## #
 # LOSS FUNCTIONS #
 # ############## #
@@ -59,6 +62,7 @@ base:
     print_prio: "INFO"
     description: ""
     log_dir: "./results/"
+    log_dir_add_date: true
     precision: "float32"
 
 

diff --git a/dgs/models/dataset/dataset.py b/dgs/models/dataset/dataset.py
@@ -98,6 +98,10 @@ class BaseDataset(BaseModule, TorchDataset):
     dataset_path (FilePath):
         Path to the directory of the dataset.
         The value has to either be a local project path, or a valid absolute path.
+
+    Optional Params
+    ---------------
+
     force_img_reshape (bool, optional):
         Whether to accept that images in one folder might have different shapes.
         Default ``DEF_VAL.dataset.force_img_reshape``.

diff --git a/dgs/models/dataset/keypoint_rcnn.py b/dgs/models/dataset/keypoint_rcnn.py
@@ -7,7 +7,6 @@
 
 import os
 from abc import ABC
-from typing import Union
 
 import torch
 from torch import nn
@@ -86,9 +85,20 @@ def images_to_states(self, images: Images) -> list[State]:
             # get the indices where the score ('certainty') is bigger than the given threshold
             indices = output["scores"] > self.threshold
 
+            B: int = int(torch.count_nonzero(indices).item())
+
+            data = {
+                "validate": False,
+                "image_id": torch.ones(max(B, 1), device=self.device, dtype=torch.long) * self.img_id,
+                "frame_id": torch.ones(max(B, 1), device=self.device, dtype=torch.long) * self.img_id,
+            }
+            self.img_id += torch.tensor(1, dtype=torch.long, device=self.device)
+
             # skip if there aren't any detections
-            if not torch.any(indices):
-                states.append(EMPTY_STATE.copy())
+            if B == 0:
+                es = EMPTY_STATE.copy()
+                es.update(data)
+                states.append(es)
                 continue
 
             # bbox given in XYXY format
@@ -112,26 +122,21 @@ def images_to_states(self, images: Images) -> list[State]:
 
             assert loc_kps is not None
 
-            # make sure the crops are 4-dimensional [B x C x H x W]
-            if crops.ndim == 3:  # pragma: no cover
-                crops = tvte.wrap(crops.unsqueeze(0), like=crops)
-
-            B = len(bbox)
+            data = dict(
+                data,
+                **{
+                    "skeleton_name": tuple("coco" for _ in range(B)),
+                    "scores": output["scores"][indices].unsqueeze(-1).repeat(1, 17),  # B x 17
+                    "score": output["scores"][indices],
+                    "bbox": bbox,
+                    "image_crop": crops,
+                    "keypoints": kps,
+                    "keypoints_local": loc_kps,
+                    "joint_weight": vis,
+                    "person_id": torch.ones(B, device=self.device, dtype=torch.long) * -1,  # set as -1
+                },
+            )
 
-            data = {
-                "validate": False,
-                "bbox": bbox,
-                "image_crop": crops,
-                "keypoints": kps,
-                "keypoints_local": loc_kps,
-                "joint_weight": vis,
-                "scores": output["scores"],
-                "skeleton_name": tuple("coco" for _ in range(B)),
-                "image_id": torch.ones(B, device=self.device, dtype=torch.long) * self.img_id,
-                "frame_id": torch.ones(B, device=self.device, dtype=torch.long) * self.img_id,
-                "person_id": torch.ones(B, device=self.device, dtype=torch.long) * -1,
-            }
-            self.img_id += torch.tensor(1, dtype=torch.long, device=self.device)
             states.append(State(**data))
 
         return states
@@ -180,10 +185,6 @@ def __init__(self, config: Config, path: NodePath) -> None:
         self.data = []
         data_path = self.params["data_path"]
         if isinstance(data_path, list):
-            assert all(isinstance(p, str) for p in data_path), "Path is a list but not all values are string"
-            assert all(
-                any(p.lower().endswith(end) for end in IMAGE_FORMATS) for p in data_path
-            ), "Not all values are images"
             self.data = sorted(data_path)
         elif isinstance(data_path, str):
             data_path = self.get_path_in_dataset(data_path)
@@ -210,23 +211,22 @@ def __init__(self, config: Config, path: NodePath) -> None:
                 f"Unknown path object, expected filepath, dirpath, or list of filepaths. Got {type(data_path)}"
             )
 
-    def arbitrary_to_ds(self, a: Union[FilePath, FilePaths], idx: int) -> list[State]:
+    def arbitrary_to_ds(self, a: FilePath, idx: int) -> list[State]:
         """Given a filepath, predict the bounding boxes and key-points of the respective image.
         Return a State containing all the available information.
         Because the state is known, the image is not saved in the State, to reduce the space-overhead on the GPU.
+
+        Args:
+            a: A single path to an image file.
+            idx: The index of the file path within ``self.data``.
         """
-        if isinstance(a, str):
-            a = (a,)
         # the torch model expects a list of 3D images
-        images = [
-            convert_image_dtype(tvte.Image(load_image(fp).squeeze(0), device=self.device), dtype=torch.float32)
-            for fp in a
-        ]
+        images = [convert_image_dtype(tvte.Image(load_image(a).squeeze(0), device=self.device), dtype=torch.float32)]
 
         states = self.images_to_states(images=images)
 
-        for fp, state in zip(a, states):
-            state.filepath = tuple(fp for _ in range(max(state.B, 1)))
+        for state in states:
+            state.filepath = tuple(a for _ in range(max(state.B, 1)))
 
         return states
 
@@ -289,14 +289,12 @@ def arbitrary_to_ds(self, a: Image, idx: int) -> list[State]:
         """Given a frame of a video, return the resulting state after running the RCNN model."""
         if not isinstance(a, torch.Tensor):
             raise NotImplementedError
-        if a.ndim == 3:
-            a = a.unsqueeze(0)
         # the torch RCNN model expects a list of 3D images
-        images = [convert_image_dtype(img, torch.float32) for img in a]
+        images = [convert_image_dtype(a, torch.float32)]
 
         states = self.images_to_states(images=images)
 
-        for img, state in zip(a, states):
-            state.image = [img.unsqueeze(0) for _ in range(state.B)]
+        for img, state in zip(images, states):
+            state.image = [tvte.wrap(img.unsqueeze(0), like=img) for _ in range(state.B)]
 
         return states
diff --git a/dgs/models/dataset/posetrack21.py b/dgs/models/dataset/posetrack21.py
@@ -23,14 +23,13 @@
 import torch as t
 from torch.utils.data import ConcatDataset, Dataset as TorchDataset
 from torchvision import tv_tensors as tvte
-from torchvision.transforms.v2.functional import convert_bounding_box_format
 from tqdm import tqdm
 
 from dgs.models.dataset.dataset import BaseDataset, BBoxDataset, dataloader_validations, ImageDataset
 from dgs.models.dataset.torchreid_pose_dataset import TorchreidPoseDataset
 from dgs.utils.config import DEF_VAL
 from dgs.utils.constants import PROJECT_ROOT
-from dgs.utils.files import mkdir_if_missing, read_json, to_abspath, write_json
+from dgs.utils.files import mkdir_if_missing, read_json, to_abspath
 from dgs.utils.state import collate_bboxes, collate_tensors, State
 from dgs.utils.types import Config, Device, FilePath, ImgShape, NodePath, Validations
 from dgs.utils.utils import extract_crops_and_save
@@ -53,8 +52,6 @@
     "PoseTrack21_BBox",
     "PoseTrack21_Image",
     "PoseTrack21Torchreid",
-    "generate_pt21_submission_file",
-    "submission_data_from_state",
 ]
 
 pt21_json_validations: Validations = {
@@ -330,119 +327,6 @@ def extract_pt21_image_crops(dataset_dir: FilePath = "./data/PoseTrack21", indiv
     )
 
 
-def submission_data_from_state(s: State) -> tuple[dict[str, any], list[dict[str, any]]]:
-    """Given a :class:`.State`, extract data for the 'images' and 'annotations' list used in the pt21 submission.
-
-    See :func:`.generate_pt21_submission_file` for more details on the submission format.
-
-    Returns:
-        The image and annotation data as dictionaries.
-        The annotation data is a list of dicts, because every image can have multiple detections / annotations.
-    """
-    # pylint: disable=too-many-branches
-    # validate the image data
-    for key in ["filepath", "image_id", "frame_id"]:
-        if key not in s:
-            raise KeyError(f"Expected key '{key}' to be in State.")
-        if isinstance(s[key], str):
-            # str -> tuple of str, this will always be correct, add at least one value for later usage
-            s[key] = (s[key] for _ in range(max(1, s.B)))
-        elif s.B > 1:
-            if (l := len(s[key])) != s.B:
-                raise ValueError(f"Expected '{key}' ({l}) to have the same length as the State ({s.B}).")
-            if any(s[key][i] != s[key][0] for i in range(1, s.B)):
-                raise ValueError(f"State has different {key}s, expected all {key}s to match. got: '{s[key]}'.")
-        elif (l := len(s[key])) != 1:
-            raise ValueError(f"Expected '{key}' ({l}) to have a length of exactly 1.")
-    # add frame id if missing as duplicate of image id
-    if "frame_id" not in s:
-        s["frame_id"] = s["image_id"]
-
-    # get the image data
-    image_data = {
-        "file_name": s.filepath[0],
-        "id": int(s["image_id"][0].item() if isinstance(s["image_id"], t.Tensor) else s["image_id"][0]),
-        "frame_id": int(s["frame_id"][0].item() if isinstance(s["frame_id"], t.Tensor) else s["frame_id"][0]),
-    }
-    if s.B == 0:
-        return image_data, []
-
-    # validate the annotation data
-    for key in ["person_id", "pred_tid", "bbox", "keypoints", "joint_weight"]:
-        if key not in s:
-            raise KeyError(f"Expected key '{key}' to be in State.")
-        if (l := len(s[key])) != s.B:
-            raise ValueError(f"Expected '{key}' ({l}) to have the same length as the State ({s.B}).")
-
-    # get the annotation data
-    anno_data = []
-    bboxes = convert_bounding_box_format(s.bbox, new_format=tvte.BoundingBoxFormat.XYWH)
-    for i in range(s.B):
-        kps = t.cat([s.keypoints[i], s.joint_weight[i]], dim=-1)
-        anno_data.append(
-            {
-                "bboxes": bboxes[i].flatten().tolist(),
-                "kps": kps.flatten().tolist(),
-                "scores": s["scores"][i].flatten().tolist() if "scores" in s else [0.0 for _ in range(17)],
-                "image_id": int(s["image_id"][0].item() if isinstance(s["image_id"], t.Tensor) else s["image_id"][0]),
-                "person_id": int(s.person_id[i].item()),
-                "track_id": int(s["pred_tid"][i].item()),
-            }
-        )
-
-    return image_data, anno_data
-
-
-def generate_pt21_submission_file(
-    outfile: FilePath, images: list[dict[str, any]], annotations: list[dict[str, any]]
-) -> None:  # pragma: no cover
-    """Given data, generate a |PT21| submission file.
-
-    References:
-        https://github.com/anDoer/PoseTrack21/blob/main/doc/dataset_structure.md
-
-        https://github.com/leonid-pishchulin/poseval
-
-    Args:
-        outfile: The path to the target file
-        images: A list containing the IDs and file names of the images
-        annotations: A list containing the per-bbox predicted annotations.
-
-    Notes:
-        The structure of the PT21 submission file is similar to the structure of the inputs::
-
-            {
-                "images": [
-                    {
-                        "file_name": "images/train/000001_bonn_train/000000.jpg",
-                        "id": 10000010000,
-                        "frame_id": 10000010000
-                    },
-                ],
-                "annotations": [
-                    {
-                        "bbox": [x1,  y1, w, h],
-                        "image_id": 10000010000,
-                        "keypoints": [x1, y1, vis1, ..., x17, y17, vis17],
-                        "scores": [s1, ..., s17],
-                        "person_id": 1024,
-                        "track_id": 0
-                    },
-                ]
-            }
-
-        Additionally, note that the visibilities are ignored during evaluation.
-
-    """
-    data = {"images": images, "annotations": annotations}
-    try:
-        write_json(obj=data, filepath=outfile)
-    except TypeError as e:
-        print(f"images: {images}")
-        print(f"annotations: {annotations}")
-        raise TypeError from e
-
-
 def get_pose_track_21(config: Config, path: NodePath, ds_name: str = "bbox") -> Union[BaseDataset, TorchDataset]:
     """Load PoseTrack JSON files.
 

diff --git a/dgs/models/embedding_generator/pose_based.py b/dgs/models/embedding_generator/pose_based.py
@@ -287,7 +287,8 @@ def __init__(self, config: Config, path: NodePath):
             nn.Softmax(dim=-1),
         )
 
-        self.model = self._init_flattened_model()
+        model = self._init_flattened_model()
+        self.register_module(name="model", module=model)
 
         self.bbox_converter = ConvertBoundingBoxFormat(
             format=self.params.get("bbox_format", DEF_VAL["embed_gen"]["pose"]["LPBEG"]["bbox_format"])