diff --git a/CHANGELOG.md b/CHANGELOG.md index de59809eb13..abe93f4e2d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -73,6 +73,8 @@ All notable changes to this project will be documented in this file. () - Bump onnx to 1.17.0 to omit CVE-2024-5187 () +- Decouple DinoV2 for semantic segmentation task + () ### Bug fixes @@ -126,6 +128,10 @@ All notable changes to this project will be documented in this file. () - Fix empty annotation in tiling () +- Fix patching early stopping in tools/converter.py, update headers in templates, change training schedule for classification + () +- Fix tensor type compatibility in dynamic soft label assigner and RTMDet head + () - Fix DETR target class indices are of type long in loss calculations () diff --git a/src/otx/algo/callbacks/adaptive_early_stopping.py b/src/otx/algo/callbacks/adaptive_early_stopping.py index 0f95e3c277d..754c6cf7c36 100644 --- a/src/otx/algo/callbacks/adaptive_early_stopping.py +++ b/src/otx/algo/callbacks/adaptive_early_stopping.py @@ -20,7 +20,7 @@ def __init__( self, monitor: str, min_delta: float = 0.0, - patience: int = 3, + patience: int = 10, verbose: bool = False, mode: str = "min", strict: bool = True, diff --git a/src/otx/algo/classification/backbones/vision_transformer.py b/src/otx/algo/classification/backbones/vision_transformer.py index c60f2ded49e..1255abff0d1 100644 --- a/src/otx/algo/classification/backbones/vision_transformer.py +++ b/src/otx/algo/classification/backbones/vision_transformer.py @@ -5,6 +5,7 @@ """Copy from mmpretrain/models/backbones/vision_transformer.py.""" from __future__ import annotations +import math from functools import partial from typing import TYPE_CHECKING, Any, Callable, Literal @@ -46,6 +47,7 @@ "vit-huge", "dinov2-s", "dinov2-small", + "dinov2-small-seg", "dinov2-b", "dinov2-base", "dinov2-l", @@ -87,6 +89,7 @@ class VisionTransformer(BaseModule): norm_layer: Normalization layer. act_layer: MLP activation layer. block_fn: Transformer block layer. + interpolate_offset: work-around offset to apply when interpolating positional embeddings lora: Enable LoRA training. """ @@ -147,6 +150,17 @@ class VisionTransformer(BaseModule): "num_heads": 6, "reg_tokens": 4, "no_embed_class": True, + }, + ), + **dict.fromkeys( + ["dinov2-small-seg"], # segmentation + { + "patch_size": 14, + "embed_dim": 384, + "depth": 12, + "num_heads": 6, + "reg_tokens": 0, + "no_embed_class": False, "init_values": 1e-5, }, ), @@ -193,9 +207,9 @@ class VisionTransformer(BaseModule): def __init__( # noqa: PLR0913 self, - arch: VIT_ARCH_TYPE = "vit-base", + arch: VIT_ARCH_TYPE | str = "vit-base", img_size: int | tuple[int, int] = 224, - patch_size: int | tuple[int, int] | None = None, + patch_size: int | None = None, in_chans: int = 3, num_classes: int = 1000, embed_dim: int | None = None, @@ -221,6 +235,7 @@ def __init__( # noqa: PLR0913 mlp_layer: nn.Module | None = None, act_layer: LayerType | None = None, norm_layer: LayerType | None = None, + interpolate_offset: float = 0.1, lora: bool = False, ) -> None: super().__init__() @@ -231,7 +246,7 @@ def __init__( # noqa: PLR0913 arch_settings: dict[str, Any] = self.arch_zoo[arch] self.img_size: int | tuple[int, int] = img_size - self.patch_size: int | tuple[int, int] = patch_size or arch_settings.get("patch_size", 16) + self.patch_size: int = patch_size or arch_settings.get("patch_size", 16) self.embed_dim = embed_dim or arch_settings.get("embed_dim", 768) depth = depth or arch_settings.get("depth", 12) num_heads = num_heads or arch_settings.get("num_heads", 12) @@ -251,6 +266,7 @@ def __init__( # noqa: PLR0913 self.no_embed_class = no_embed_class # don't embed prefix positions (includes reg) self.dynamic_img_size = dynamic_img_size self.grad_checkpointing = False + self.interpolate_offset = interpolate_offset embed_args = {} if dynamic_img_size: @@ -353,15 +369,17 @@ def resize_positional_embeddings(pos_embed: torch.Tensor, new_shape: tuple[int, # convert dinov2 pretrained weights state_dict = torch.load(checkpoint_path) state_dict.pop("mask_token", None) - state_dict["reg_token"] = state_dict.pop("register_tokens") + if "reg_token" in state_dict: + state_dict["reg_token"] = state_dict.pop("register_tokens") state_dict["cls_token"] = state_dict.pop("cls_token") + state_dict["pos_embed"][:, 0] img_size = (self.img_size, self.img_size) if isinstance(self.img_size, int) else self.img_size - patch_size = (self.patch_size, self.patch_size) if isinstance(self.patch_size, int) else self.patch_size - state_dict["pos_embed"] = resize_positional_embeddings( - state_dict.pop("pos_embed")[:, 1:], - (img_size[0] // patch_size[0], img_size[1] // patch_size[1]), - ) + patch_size = (self.patch_size, self.patch_size) + if state_dict["pos_embed"].shape != self.pos_embed.shape: + state_dict["pos_embed"] = resize_positional_embeddings( + state_dict.pop("pos_embed")[:, 1:], + (img_size[0] // patch_size[0], img_size[1] // patch_size[1]), + ) self.load_state_dict(state_dict, strict=False) else: msg = f"Unsupported `checkpoint_extension` {checkpoint_ext}, please choose from 'npz' or 'pth'." @@ -401,6 +419,137 @@ def _pos_embed(self, x: torch.Tensor) -> torch.Tensor: return self.pos_drop(x) + def interpolate_pos_encoding(self, x: torch.Tensor, w: int, h: int) -> torch.Tensor: + """Interpolates the positional encoding to match the input dimensions. + + Args: + x (torch.Tensor): Input tensor. + w (int): Width of the input image. + h (int): Height of the input image. + + Returns: + torch.Tensor: Tensor with interpolated positional encoding. + """ + previous_dtype = x.dtype + npatch = x.shape[1] + n = self.pos_embed.shape[1] + if npatch == n and w == h: + return self.pos_embed + pos_embed = self.pos_embed.float() + class_pos_embed = pos_embed[:, 0] + patch_pos_embed = pos_embed[:, 1:] + dim = x.shape[-1] + w0 = w // self.patch_size + h0 = h // self.patch_size + m = int(math.sqrt(n)) # Recover the number of patches in each dimension + if m * m != n: + msg = f"Expected m * m to equal n, but got m={m}, n={n}" + raise ValueError(msg) + kwargs = {} + if self.interpolate_offset: + # fix float error by introducing small offset + sx = float(w0 + self.interpolate_offset) / m + sy = float(h0 + self.interpolate_offset) / m + kwargs["scale_factor"] = (sx, sy) + else: + # Simply specify an output size instead of a scale factor + kwargs["size"] = (w0, h0) + patch_pos_embed = nn.functional.interpolate( + patch_pos_embed.reshape(1, m, m, dim).permute(0, 3, 1, 2), + mode="bicubic", + **kwargs, + ) + patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim) + return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1).to(previous_dtype) + + def prepare_tokens_with_masks(self, x: torch.Tensor, masks: torch.Tensor | None = None) -> torch.Tensor: + """Prepare tokens with optional masks. + + Args: + x (torch.Tensor): Input tensor. + masks (torch.Tensor | None): Optional masks tensor. + + Returns: + torch.Tensor: Tensor with prepared tokens. + """ + _, _, w, h = x.shape + x = self.patch_embed(x) + if masks is not None: + x = torch.where(masks.unsqueeze(-1), self.mask_token.to(x.dtype).unsqueeze(0), x) + + x = torch.cat((self.cls_token.expand(x.shape[0], -1, -1), x), dim=1) + x = x + self.interpolate_pos_encoding(x, w, h) + + if self.reg_token is not None: + x = torch.cat( + ( + x[:, :1], + self.reg_token.expand(x.shape[0], -1, -1), + x[:, 1:], + ), + dim=1, + ) + + return x + + def _get_intermediate_layers_not_chunked(self, x: torch.Tensor, n: int = 1) -> list[torch.Tensor]: + """Get intermediate layers without chunking. + + Args: + x (torch.Tensor): Input tensor. + n (int): Number of last blocks to take. If it's a list, take the specified blocks. + + Returns: + list[torch.Tensor]: List of intermediate layer outputs. + """ + x = self.prepare_tokens_with_masks(x) + # If n is an int, take the n last blocks. If it's a list, take them + output, total_block_len = [], len(self.blocks) + blocks_to_take = range(total_block_len - n, total_block_len) if isinstance(n, int) else n + for i, blk in enumerate(self.blocks): + x = blk(x) + if i in blocks_to_take: + output.append(x) + if len(output) != len(blocks_to_take): + msg = f"only {len(output)} / {len(blocks_to_take)} blocks found" + raise RuntimeError(msg) + return output + + def get_intermediate_layers( + self, + x: torch.Tensor, + n: int = 1, # Layers or n last layers to take + reshape: bool = False, + return_class_token: bool = False, + norm: bool = True, + ) -> tuple: + """Get intermediate layers of the VisionTransformer. + + Args: + x (torch.Tensor): Input tensor. + n (int): Number of last blocks to take. If it's a list, take the specified blocks. + reshape (bool): Whether to reshape the output feature maps. + return_class_token (bool): Whether to return the class token. + norm (bool): Whether to apply normalization to the outputs. + + Returns: + tuple: A tuple containing the intermediate layer outputs. + """ + outputs = self._get_intermediate_layers_not_chunked(x, n) + if norm: + outputs = [self.norm(out) for out in outputs] + class_tokens = [out[:, 0] for out in outputs] + outputs = [out[:, 1 + self.num_reg_tokens :] for out in outputs] + if reshape: + b, _, w, h = x.shape + outputs = [ + out.reshape(b, w // self.patch_size, h // self.patch_size, -1).permute(0, 3, 1, 2).contiguous() + for out in outputs + ] + if return_class_token: + return tuple(zip(outputs, class_tokens)) + return tuple(outputs) + def forward( self, x: torch.Tensor, diff --git a/src/otx/algo/classification/efficientnet.py b/src/otx/algo/classification/efficientnet.py index 2f5c00d544e..cad57e84744 100644 --- a/src/otx/algo/classification/efficientnet.py +++ b/src/otx/algo/classification/efficientnet.py @@ -14,7 +14,7 @@ from otx.algo.classification.backbones.efficientnet import EFFICIENTNET_VERSION, OTXEfficientNet from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, + HierarchicalLinearClsHead, LinearClsHead, MultiLabelLinearClsHead, SemiSLLinearClsHead, @@ -272,11 +272,8 @@ def _build_model(self, head_config: dict) -> nn.Module: return HLabelClassifier( backbone=backbone, - neck=nn.Identity(), - head=HierarchicalCBAMClsHead( - in_channels=backbone.num_features, - **copied_head_config, - ), + neck=GlobalAveragePooling(dim=2), + head=HierarchicalLinearClsHead(**copied_head_config, in_channels=backbone.num_features), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), ) diff --git a/src/otx/algo/classification/mobilenet_v3.py b/src/otx/algo/classification/mobilenet_v3.py index 18963d53b5c..8d21b3a0562 100644 --- a/src/otx/algo/classification/mobilenet_v3.py +++ b/src/otx/algo/classification/mobilenet_v3.py @@ -15,7 +15,7 @@ from otx.algo.classification.backbones import OTXMobileNetV3 from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, + HierarchicalLinearClsHead, LinearClsHead, MultiLabelNonLinearClsHead, SemiSLLinearClsHead, @@ -313,14 +313,12 @@ def _build_model(self, head_config: dict) -> nn.Module: copied_head_config = copy(head_config) copied_head_config["step_size"] = (ceil(self.input_size[0] / 32), ceil(self.input_size[1] / 32)) + in_channels = 960 if self.mode == "large" else 576 return HLabelClassifier( backbone=OTXMobileNetV3(mode=self.mode, input_size=self.input_size), - neck=nn.Identity(), - head=HierarchicalCBAMClsHead( - in_channels=960, - **copied_head_config, - ), + neck=GlobalAveragePooling(dim=2), + head=HierarchicalLinearClsHead(**copied_head_config, in_channels=in_channels), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), ) diff --git a/src/otx/algo/classification/timm_model.py b/src/otx/algo/classification/timm_model.py index d7e171565a7..712b7c8ed70 100644 --- a/src/otx/algo/classification/timm_model.py +++ b/src/otx/algo/classification/timm_model.py @@ -15,12 +15,12 @@ from otx.algo.classification.backbones.timm import TimmBackbone, TimmModelType from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, LinearClsHead, MultiLabelLinearClsHead, SemiSLLinearClsHead, ) from otx.algo.classification.losses.asymmetric_angular_loss_with_ignore import AsymmetricAngularLossWithIgnore +from otx.algo.classification.mobilenet_v3 import HierarchicalLinearClsHead from otx.algo.classification.necks.gap import GlobalAveragePooling from otx.algo.classification.utils import get_classification_layers from otx.algo.utils.support_otx_v1 import OTXv1Helper @@ -272,11 +272,8 @@ def _build_model(self, head_config: dict) -> nn.Module: copied_head_config["step_size"] = (ceil(self.input_size[0] / 32), ceil(self.input_size[1] / 32)) return HLabelClassifier( backbone=backbone, - neck=nn.Identity(), - head=HierarchicalCBAMClsHead( - in_channels=backbone.num_features, - **copied_head_config, - ), + neck=GlobalAveragePooling(dim=2), + head=HierarchicalLinearClsHead(**copied_head_config, in_channels=backbone.num_features), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), ) diff --git a/src/otx/algo/classification/torchvision_model.py b/src/otx/algo/classification/torchvision_model.py index e5931d9ce7f..effed540a54 100644 --- a/src/otx/algo/classification/torchvision_model.py +++ b/src/otx/algo/classification/torchvision_model.py @@ -14,12 +14,12 @@ from otx.algo.classification.backbones.torchvision import TorchvisionBackbone, TVModelType from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, LinearClsHead, MultiLabelLinearClsHead, SemiSLLinearClsHead, ) from otx.algo.classification.losses import AsymmetricAngularLossWithIgnore +from otx.algo.classification.mobilenet_v3 import HierarchicalLinearClsHead from otx.algo.classification.necks.gap import GlobalAveragePooling from otx.algo.classification.utils import get_classification_layers from otx.core.data.entity.classification import ( @@ -315,11 +315,8 @@ def _build_model(self, head_config: dict) -> nn.Module: backbone = TorchvisionBackbone(backbone=self.backbone, pretrained=self.pretrained) return HLabelClassifier( backbone=backbone, - neck=nn.Identity(), - head=HierarchicalCBAMClsHead( - in_channels=backbone.in_features, - **head_config, - ), + neck=GlobalAveragePooling(dim=2), + head=HierarchicalLinearClsHead(**head_config, in_channels=backbone.in_features), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), ) diff --git a/src/otx/algo/classification/vit.py b/src/otx/algo/classification/vit.py index 55beb76deff..fd0ccc3c835 100644 --- a/src/otx/algo/classification/vit.py +++ b/src/otx/algo/classification/vit.py @@ -19,12 +19,12 @@ from otx.algo.classification.backbones.vision_transformer import VIT_ARCH_TYPE, VisionTransformer from otx.algo.classification.classifier import HLabelClassifier, ImageClassifier, SemiSLClassifier from otx.algo.classification.heads import ( - HierarchicalCBAMClsHead, MultiLabelLinearClsHead, SemiSLVisionTransformerClsHead, VisionTransformerClsHead, ) from otx.algo.classification.losses import AsymmetricAngularLossWithIgnore +from otx.algo.classification.mobilenet_v3 import HierarchicalLinearClsHead from otx.algo.classification.utils import get_classification_layers from otx.algo.explain.explain_algo import ViTReciproCAM, feature_vector_fn from otx.algo.utils.support_otx_v1 import OTXv1Helper @@ -466,11 +466,7 @@ def _build_model(self, head_config: dict) -> nn.Module: return HLabelClassifier( backbone=vit_backbone, neck=None, - head=HierarchicalCBAMClsHead( - in_channels=vit_backbone.embed_dim, - step_size=1, - **head_config, - ), + head=HierarchicalLinearClsHead(**head_config, in_channels=vit_backbone.embed_dim), multiclass_loss=nn.CrossEntropyLoss(), multilabel_loss=AsymmetricAngularLossWithIgnore(gamma_pos=0.0, gamma_neg=1.0, reduction="sum"), init_cfg=init_cfg, diff --git a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py index 4807e5b4a36..e12b1d1b678 100644 --- a/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py +++ b/src/otx/algo/common/utils/assigners/dynamic_soft_label_assigner.py @@ -196,7 +196,7 @@ def assign( assigned_labels = assigned_gt_inds.new_full((num_bboxes,), -1) assigned_labels[valid_mask] = gt_labels[matched_gt_inds].long() max_overlaps = assigned_gt_inds.new_full((num_bboxes,), -INF, dtype=torch.float32) - max_overlaps[valid_mask] = matched_pred_ious + max_overlaps[valid_mask] = matched_pred_ious.to(max_overlaps) return AssignResult(num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels) def dynamic_k_matching( diff --git a/src/otx/algo/detection/heads/rtmdet_head.py b/src/otx/algo/detection/heads/rtmdet_head.py index 7f1fb39756e..6daa8637177 100644 --- a/src/otx/algo/detection/heads/rtmdet_head.py +++ b/src/otx/algo/detection/heads/rtmdet_head.py @@ -574,7 +574,7 @@ def _get_targets_single( # type: ignore[override] if len(pos_inds) > 0: # point-based pos_bbox_targets = sampling_result.pos_gt_bboxes - bbox_targets[pos_inds, :] = pos_bbox_targets + bbox_targets[pos_inds, :] = pos_bbox_targets.to(bbox_targets) labels[pos_inds] = sampling_result.pos_gt_labels if self.train_cfg["pos_weight"] <= 0: diff --git a/src/otx/algo/segmentation/backbones/__init__.py b/src/otx/algo/segmentation/backbones/__init__.py index 4c7a44cee9b..8b633cc21f8 100644 --- a/src/otx/algo/segmentation/backbones/__init__.py +++ b/src/otx/algo/segmentation/backbones/__init__.py @@ -3,8 +3,7 @@ # """Backbone modules for OTX segmentation model.""" -from .dinov2 import DinoVisionTransformer from .litehrnet import LiteHRNetBackbone from .mscan import MSCAN -__all__ = ["LiteHRNetBackbone", "DinoVisionTransformer", "MSCAN"] +__all__ = ["LiteHRNetBackbone", "MSCAN"] diff --git a/src/otx/algo/segmentation/backbones/dinov2.py b/src/otx/algo/segmentation/backbones/dinov2.py deleted file mode 100644 index 5468870ffef..00000000000 --- a/src/otx/algo/segmentation/backbones/dinov2.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (C) 2023 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# -"""DINO-V2 model for the OTX classification.""" - -from __future__ import annotations - -import logging -import os -from functools import partial -from pathlib import Path - -import torch -from torch import nn - -from otx.algo.utils.mmengine_utils import load_checkpoint_to_model, load_from_http -from otx.utils.utils import get_class_initial_arguments - -logger = logging.getLogger() - - -class DinoVisionTransformer(nn.Module): - """DINO-v2 Model.""" - - def __init__( - self, - name: str, - freeze_backbone: bool, - out_index: list[int], - pretrained_weights: str | None = None, - ): - super().__init__() - self._init_args = get_class_initial_arguments() - - ci_data_root = os.environ.get("CI_DATA_ROOT") - pretrained: bool = True - if ci_data_root is not None and Path(ci_data_root).exists(): - pretrained = False - - self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=name, pretrained=pretrained) - - if ci_data_root is not None and Path(ci_data_root).exists(): - ckpt_filename = f"{name}4_pretrain.pth" - ckpt_path = Path(ci_data_root) / "torch" / "hub" / "checkpoints" / ckpt_filename - if not ckpt_path.exists(): - msg = ( - f"Internal cache was specified but cannot find weights file: {ckpt_filename}. load from torch hub." - ) - logger.warning(msg) - self.backbone = torch.hub.load(repo_or_dir="facebookresearch/dinov2", model=name, pretrained=True) - else: - self.backbone.load_state_dict(torch.load(ckpt_path)) - - if freeze_backbone: - self._freeze_backbone(self.backbone) - - # take intermediate layers to preserve spatial dimension - self.backbone.forward = partial( - self.backbone.get_intermediate_layers, - n=out_index, - reshape=True, - ) - - if pretrained_weights is not None: - self.load_pretrained_weights(pretrained_weights) - - def _freeze_backbone(self, backbone: nn.Module) -> None: - """Freeze the backbone.""" - for _, v in backbone.named_parameters(): - v.requires_grad = False - - def init_weights(self) -> None: - """Initialize the weights.""" - # restrict rewriting backbone pretrained weights from torch.hub - # unless weights passed explicitly in config - if self.init_cfg: - return super().init_weights() - return None - - def forward(self, imgs: torch.Tensor) -> torch.Tensor: - """Forward function.""" - return self.backbone(imgs) - - def load_pretrained_weights(self, pretrained: str | None = None, prefix: str = "") -> None: - """Initialize weights.""" - checkpoint = None - if isinstance(pretrained, str) and Path(pretrained).exists(): - checkpoint = torch.load(pretrained, "cpu") - print(f"init weight - {pretrained}") - elif pretrained is not None: - cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" - checkpoint = load_from_http(filename=pretrained, map_location="cpu", model_dir=cache_dir) - print(f"init weight - {pretrained}") - if checkpoint is not None: - load_checkpoint_to_model(self, checkpoint, prefix=prefix) - - def __reduce__(self): - return (DinoVisionTransformer, self._init_args) diff --git a/src/otx/algo/segmentation/dino_v2_seg.py b/src/otx/algo/segmentation/dino_v2_seg.py index e8e5b810721..681094ff551 100644 --- a/src/otx/algo/segmentation/dino_v2_seg.py +++ b/src/otx/algo/segmentation/dino_v2_seg.py @@ -5,9 +5,14 @@ from __future__ import annotations +from functools import partial +from pathlib import Path from typing import TYPE_CHECKING, Any, ClassVar +from urllib.parse import urlparse -from otx.algo.segmentation.backbones import DinoVisionTransformer +from torch.hub import download_url_to_file + +from otx.algo.classification.backbones.vision_transformer import VisionTransformer from otx.algo.segmentation.heads import FCNHead from otx.algo.segmentation.losses import CrossEntropyLossWithIgnore from otx.algo.segmentation.segmentors import BaseSegmModel @@ -22,18 +27,41 @@ class DinoV2Seg(OTXSegmentationModel): """DinoV2Seg Model.""" AVAILABLE_MODEL_VERSIONS: ClassVar[list[str]] = [ - "dinov2_vits14", + "dinov2-small-seg", ] + PRETRAINED_WEIGHTS: ClassVar[dict[str, str]] = { + "dinov2-small-seg": "https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_pretrain.pth", + } def _build_model(self) -> nn.Module: if self.model_version not in self.AVAILABLE_MODEL_VERSIONS: msg = f"Model version {self.model_version} is not supported." raise ValueError(msg) - - backbone = DinoVisionTransformer(name=self.model_version, freeze_backbone=True, out_index=[8, 9, 10, 11]) + backbone = VisionTransformer(arch=self.model_version, img_size=self.input_size) + backbone.forward = partial( # type: ignore[method-assign] + backbone.get_intermediate_layers, + n=[8, 9, 10, 11], + reshape=True, + ) decode_head = FCNHead(self.model_version, num_classes=self.num_classes) criterion = CrossEntropyLossWithIgnore(ignore_index=self.label_info.ignore_index) # type: ignore[attr-defined] + backbone.init_weights() + if self.model_version in self.PRETRAINED_WEIGHTS: + print(f"init weight - {self.PRETRAINED_WEIGHTS[self.model_version]}") + parts = urlparse(self.PRETRAINED_WEIGHTS[self.model_version]) + filename = Path(parts.path).name + + cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" + cache_file = cache_dir / filename + if not Path.exists(cache_file): + download_url_to_file(self.PRETRAINED_WEIGHTS[self.model_version], cache_file, "", progress=True) + backbone.load_pretrained(checkpoint_path=cache_file) + + # freeze backbone + for _, v in backbone.named_parameters(): + v.requires_grad = False + return BaseSegmModel( backbone=backbone, decode_head=decode_head, diff --git a/src/otx/algo/segmentation/heads/fcn_head.py b/src/otx/algo/segmentation/heads/fcn_head.py index 7f7801aa09e..0d4cff492bb 100644 --- a/src/otx/algo/segmentation/heads/fcn_head.py +++ b/src/otx/algo/segmentation/heads/fcn_head.py @@ -217,7 +217,7 @@ class FCNHead: "aggregator_merge_norm": "None", "aggregator_use_concat": False, }, - "dinov2_vits14": { + "dinov2-small-seg": { "normalization": partial(build_norm_layer, nn.SyncBatchNorm, requires_grad=True), "in_channels": [384, 384, 384, 384], "in_index": [0, 1, 2, 3], diff --git a/src/otx/core/data/dataset/action_classification.py b/src/otx/core/data/dataset/action_classification.py index 23391984423..4cfa4808487 100644 --- a/src/otx/core/data/dataset/action_classification.py +++ b/src/otx/core/data/dataset/action_classification.py @@ -37,6 +37,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.BGR, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: super().__init__( dm_subset, diff --git a/src/otx/core/data/dataset/anomaly.py b/src/otx/core/data/dataset/anomaly.py index 0f855f5b3d6..0ce571af51f 100644 --- a/src/otx/core/data/dataset/anomaly.py +++ b/src/otx/core/data/dataset/anomaly.py @@ -57,6 +57,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: self.task_type = task_type super().__init__( diff --git a/src/otx/core/data/dataset/base.py b/src/otx/core/data/dataset/base.py index 21e8d349a9c..a8d0fc59b73 100644 --- a/src/otx/core/data/dataset/base.py +++ b/src/otx/core/data/dataset/base.py @@ -70,6 +70,7 @@ class OTXDataset(Dataset, Generic[T_OTXDataEntity]): max_refetch: Maximum number of images to fetch in cache image_color_channel: Color channel of images stack_images: Whether or not to stack images in collate function in OTXBatchData entity. + data_format: Source data format, which was originally passed to datumaro (could be arrow for instance). """ @@ -83,6 +84,7 @@ def __init__( image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, to_tv_image: bool = True, + data_format: str = "", ) -> None: self.dm_subset = dm_subset self.transforms = transforms @@ -92,8 +94,11 @@ def __init__( self.image_color_channel = image_color_channel self.stack_images = stack_images self.to_tv_image = to_tv_image + self.data_format = data_format - if self.dm_subset.categories(): + if self.dm_subset.categories() and data_format == "arrow": + self.label_info = LabelInfo.from_dm_label_groups_arrow(self.dm_subset.categories()[AnnotationType.label]) + elif self.dm_subset.categories(): self.label_info = LabelInfo.from_dm_label_groups(self.dm_subset.categories()[AnnotationType.label]) else: self.label_info = NullLabelInfo() diff --git a/src/otx/core/data/dataset/classification.py b/src/otx/core/data/dataset/classification.py index 8f4f5ffc241..f955c5bb8fa 100644 --- a/src/otx/core/data/dataset/classification.py +++ b/src/otx/core/data/dataset/classification.py @@ -80,17 +80,21 @@ def _get_item_impl(self, index: int) -> MultilabelClsDataEntity | None: ignored_labels: list[int] = [] # This should be assigned form item img_data, img_shape, _ = self._get_img_data_and_shape(img) - label_anns = [] + label_ids = set() for ann in item.annotations: + # multilabel information stored in 'multi_label_ids' attribute when the source format is arrow + if "multi_label_ids" in ann.attributes: + for lbl_idx in ann.attributes["multi_label_ids"]: + label_ids.add(lbl_idx) + if isinstance(ann, Label): - label_anns.append(ann) + label_ids.add(ann.label) else: # If the annotation is not Label, it should be converted to Label. # For Chained Task: Detection (Bbox) -> Classification (Label) label = Label(label=ann.label) - if label not in label_anns: - label_anns.append(label) - labels = torch.as_tensor([ann.label for ann in label_anns]) + label_ids.add(label.label) + labels = torch.as_tensor(list(label_ids)) entity = MultilabelClsDataEntity( image=img_data, @@ -128,13 +132,22 @@ def __init__(self, **kwargs) -> None: self.dm_categories = self.dm_subset.categories()[AnnotationType.label] # Hlabel classification used HLabelInfo to insert the HLabelData. - self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories) + if self.data_format == "arrow": + # arrow format stores label IDs as names, have to deal with that here + self.label_info = HLabelInfo.from_dm_label_groups_arrow(self.dm_categories) + else: + self.label_info = HLabelInfo.from_dm_label_groups(self.dm_categories) + + self.id_to_name_mapping = dict(zip(self.label_info.label_ids, self.label_info.label_names)) + self.id_to_name_mapping[""] = "" + if self.label_info.num_multiclass_heads == 0: msg = "The number of multiclass heads should be larger than 0." raise ValueError(msg) - for dm_item in self.dm_subset: - self._add_ancestors(dm_item.annotations) + if self.data_format != "arrow": + for dm_item in self.dm_subset: + self._add_ancestors(dm_item.annotations) def _add_ancestors(self, label_anns: list[Label]) -> None: """Add ancestors recursively if some label miss the ancestor information. @@ -149,7 +162,7 @@ def _add_ancestors(self, label_anns: list[Label]) -> None: """ def _label_idx_to_name(idx: int) -> str: - return self.label_info.label_names[idx] + return self.dm_categories[idx].name def _label_name_to_idx(name: str) -> int: indices = [idx for idx, val in enumerate(self.label_info.label_names) if val == name] @@ -157,6 +170,8 @@ def _label_name_to_idx(name: str) -> int: def _get_label_group_idx(label_name: str) -> int: if isinstance(self.label_info, HLabelInfo): + if self.data_format == "arrow": + return self.label_info.class_to_group_idx[self.id_to_name_mapping[label_name]][0] return self.label_info.class_to_group_idx[label_name][0] msg = f"self.label_info should have HLabelInfo type, got {type(self.label_info)}" raise ValueError(msg) @@ -197,17 +212,22 @@ def _get_item_impl(self, index: int) -> HlabelClsDataEntity | None: ignored_labels: list[int] = [] # This should be assigned form item img_data, img_shape, _ = self._get_img_data_and_shape(img) - label_anns = [] + label_ids = set() for ann in item.annotations: + # in h-cls scenario multilabel information stored in 'multi_label_ids' attribute + if "multi_label_ids" in ann.attributes: + for lbl_idx in ann.attributes["multi_label_ids"]: + label_ids.add(lbl_idx) + if isinstance(ann, Label): - label_anns.append(ann) + label_ids.add(ann.label) else: # If the annotation is not Label, it should be converted to Label. # For Chained Task: Detection (Bbox) -> Classification (Label) label = Label(label=ann.label) - if label not in label_anns: - label_anns.append(label) - hlabel_labels = self._convert_label_to_hlabel_format(label_anns, ignored_labels) + label_ids.add(label.label) + + hlabel_labels = self._convert_label_to_hlabel_format([Label(label=idx) for idx in label_ids], ignored_labels) entity = HlabelClsDataEntity( image=img_data, @@ -256,18 +276,18 @@ def _convert_label_to_hlabel_format(self, label_anns: list[Label], ignored_label class_indices[i] = -1 for ann in label_anns: - ann_name = self.dm_categories.items[ann.label].name - ann_parent = self.dm_categories.items[ann.label].parent + if self.data_format == "arrow": + # skips unknown labels for instance, the empty one + if self.dm_categories.items[ann.label].name not in self.id_to_name_mapping: + continue + ann_name = self.id_to_name_mapping[self.dm_categories.items[ann.label].name] + else: + ann_name = self.dm_categories.items[ann.label].name group_idx, in_group_idx = self.label_info.class_to_group_idx[ann_name] - (parent_group_idx, parent_in_group_idx) = ( - self.label_info.class_to_group_idx[ann_parent] if ann_parent else (None, None) - ) if group_idx < num_multiclass_heads: class_indices[group_idx] = in_group_idx - if parent_group_idx is not None and parent_in_group_idx is not None: - class_indices[parent_group_idx] = parent_in_group_idx - elif not ignored_labels or ann.label not in ignored_labels: + elif ann.label not in ignored_labels: class_indices[num_multiclass_heads + in_group_idx] = 1 else: class_indices[num_multiclass_heads + in_group_idx] = -1 diff --git a/src/otx/core/data/dataset/keypoint_detection.py b/src/otx/core/data/dataset/keypoint_detection.py index c74b77c9319..bacb84bf643 100644 --- a/src/otx/core/data/dataset/keypoint_detection.py +++ b/src/otx/core/data/dataset/keypoint_detection.py @@ -54,9 +54,11 @@ def __init__( self.dm_subset = self._get_single_bbox_dataset(dm_subset) if self.dm_subset.categories(): + kp_labels = self.dm_subset.categories()[AnnotationType.points][0].labels self.label_info = LabelInfo( - label_names=self.dm_subset.categories()[AnnotationType.points][0].labels, + label_names=kp_labels, label_groups=[], + label_ids=[str(i) for i in range(len(kp_labels))], ) else: self.label_info = NullLabelInfo() diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py index a690dde42ad..61d9ec36a09 100644 --- a/src/otx/core/data/dataset/segmentation.py +++ b/src/otx/core/data/dataset/segmentation.py @@ -167,6 +167,7 @@ def __init__( stack_images: bool = True, to_tv_image: bool = True, ignore_index: int = 255, + data_format: str = "", ) -> None: super().__init__( dm_subset, @@ -187,6 +188,7 @@ def __init__( label_names=self.label_info.label_names, label_groups=self.label_info.label_groups, ignore_index=ignore_index, + label_ids=self.label_info.label_ids, ) self.ignore_index = ignore_index diff --git a/src/otx/core/data/factory.py b/src/otx/core/data/factory.py index 4ca229aacb1..949f61c988e 100644 --- a/src/otx/core/data/factory.py +++ b/src/otx/core/data/factory.py @@ -73,6 +73,7 @@ def create( # noqa: PLR0911 dm_subset: DmDataset, cfg_subset: SubsetConfig, mem_cache_handler: MemCacheHandlerBase, + data_format: str, mem_cache_img_max_size: tuple[int, int] | None = None, image_color_channel: ImageColorChannel = ImageColorChannel.RGB, stack_images: bool = True, @@ -85,6 +86,7 @@ def create( # noqa: PLR0911 common_kwargs = { "dm_subset": dm_subset, "transforms": transforms, + "data_format": data_format, "mem_cache_handler": mem_cache_handler, "mem_cache_img_max_size": mem_cache_img_max_size, "image_color_channel": image_color_channel, diff --git a/src/otx/core/data/module.py b/src/otx/core/data/module.py index 06f62f1c614..ce461251392 100644 --- a/src/otx/core/data/module.py +++ b/src/otx/core/data/module.py @@ -107,13 +107,6 @@ def __init__( # noqa: PLR0913 self.subsets: dict[str, OTXDataset] = {} self.save_hyperparameters(ignore=["input_size"]) - # TODO (Jaeguk): This is workaround for a bug in Datumaro. - # These lines should be removed after next datumaro release. - # https://github.com/openvinotoolkit/datumaro/pull/1223/files - from datumaro.plugins.data_formats.video import VIDEO_EXTENSIONS - - VIDEO_EXTENSIONS.append(".mp4") - dataset = DmDataset.import_from(self.data_root, format=self.data_format) if self.task != "H_LABEL_CLS": dataset = pre_filtering( @@ -193,6 +186,7 @@ def __init__( # noqa: PLR0913 dm_subset=dm_subset.as_dataset(), cfg_subset=config_mapping[name], mem_cache_handler=mem_cache_handler, + data_format=self.data_format, mem_cache_img_max_size=mem_cache_img_max_size, image_color_channel=image_color_channel, stack_images=stack_images, @@ -237,6 +231,7 @@ def __init__( # noqa: PLR0913 include_polygons=include_polygons, ignore_index=ignore_index, vpm_config=vpm_config, + data_format=self.data_format, ) self.subsets[transform_key] = unlabeled_dataset else: @@ -251,6 +246,7 @@ def __init__( # noqa: PLR0913 include_polygons=include_polygons, ignore_index=ignore_index, vpm_config=vpm_config, + data_format=self.data_format, ) self.subsets[name] = unlabeled_dataset diff --git a/src/otx/core/data/pre_filtering.py b/src/otx/core/data/pre_filtering.py index b61835b2750..08bd9fd5293 100644 --- a/src/otx/core/data/pre_filtering.py +++ b/src/otx/core/data/pre_filtering.py @@ -84,7 +84,7 @@ def remove_unused_labels(dataset: DmDataset, data_format: str, ignore_index: int used_labels = [0, *used_labels] if data_format == "common_semantic_segmentation_with_subset_dirs" and len(original_categories) < len(used_labels): msg = ( - "There are labeles mismatch in dataset categories and actuall categories comes from semantic masks." + "There are labels mismatch in dataset categories and actual categories comes from semantic masks." "Please, check `dataset_meta.json` file." ) raise ValueError(msg) diff --git a/src/otx/core/model/base.py b/src/otx/core/model/base.py index ac2331ff885..d4176fd0cbb 100644 --- a/src/otx/core/model/base.py +++ b/src/otx/core/model/base.py @@ -744,7 +744,7 @@ def lr_scheduler_step(self, scheduler: LRSchedulerTypeUnion, metric: Tensor) -> return super().lr_scheduler_step(scheduler=scheduler, metric=metric) if len(warmup_schedulers) != 1: - msg = "No more than two warmup schedulers coexist." + msg = "No more than one warmup schedulers coexist." raise RuntimeError(msg) warmup_scheduler = next(iter(warmup_schedulers)) @@ -809,7 +809,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): return LabelInfo.from_num_classes(num_classes=label_info) if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info): - return LabelInfo(label_names=label_info, label_groups=[label_info]) + return LabelInfo( + label_names=label_info, + label_groups=[label_info], + label_ids=[str(i) for i in range(len(label_info))], + ) if isinstance(label_info, LabelInfo): return label_info @@ -1113,7 +1117,7 @@ def _create_label_info_from_ov_ir(self) -> LabelInfo: ) logger.warning(msg) - return LabelInfo(label_names=label_names, label_groups=[label_names]) + return LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=[]) msg = "Cannot construct LabelInfo from OpenVINO IR. Please check this model is trained by OTX." raise ValueError(msg) diff --git a/src/otx/core/model/segmentation.py b/src/otx/core/model/segmentation.py index 0003307e376..1aa1a7e933d 100644 --- a/src/otx/core/model/segmentation.py +++ b/src/otx/core/model/segmentation.py @@ -245,7 +245,11 @@ def _dispatch_label_info(label_info: LabelInfoTypes) -> LabelInfo: if isinstance(label_info, int): return SegLabelInfo.from_num_classes(num_classes=label_info) if isinstance(label_info, Sequence) and all(isinstance(name, str) for name in label_info): - return SegLabelInfo(label_names=label_info, label_groups=[label_info]) + return SegLabelInfo( + label_names=label_info, + label_groups=[label_info], + label_ids=[str(i) for i in range(len(label_info))], + ) if isinstance(label_info, SegLabelInfo): return label_info diff --git a/src/otx/core/schedulers/warmup_schedulers.py b/src/otx/core/schedulers/warmup_schedulers.py index 6de763bb52b..0b1d12a711e 100644 --- a/src/otx/core/schedulers/warmup_schedulers.py +++ b/src/otx/core/schedulers/warmup_schedulers.py @@ -19,8 +19,9 @@ class LinearWarmupScheduler(LambdaLR): """Linear Warmup scheduler. Args: - num_warmup_steps: Learning rate will linearly increased during the period same as this number. - warmup_interval: If "epoch", count the number of steps for the warmup period. + optimizer (Optimizer): Optimizer to apply the scheduler. + num_warmup_steps (int): Learning rate will linearly increased during the period same as this number. + interval (Literal["step", "epoch"]): If "epoch", count the number of epochs for the warmup period. Otherwise, the iteration step will be the warmup period. """ @@ -55,7 +56,7 @@ class LinearWarmupSchedulerCallable: main_scheduler_callable: Callable to create a LR scheduler that will be mainly used. num_warmup_steps: Learning rate will linearly increased during the period same as this number. If it is less than equal to zero, do not create `LinearWarmupScheduler`. - warmup_interval: If "epoch", count the number of steps for the warmup period. + warmup_interval: If "epoch", count the number of epochs for the warmup period. Otherwise, the iteration step will be the warmup period. monitor: If given, override the main scheduler's `monitor` attribute. """ diff --git a/src/otx/core/types/export.py b/src/otx/core/types/export.py index 7f64febe607..a66541d0873 100644 --- a/src/otx/core/types/export.py +++ b/src/otx/core/types/export.py @@ -104,13 +104,15 @@ def to_metadata(self) -> dict[tuple[str, str], str]: all_label_ids = "None " for lbl in self.label_info.label_names: all_labels += lbl.replace(" ", "_") + " " - all_label_ids += lbl.replace(" ", "_") + " " + for lbl_id in self.label_info.label_ids: + all_label_ids += lbl_id + " " else: all_labels = "" all_label_ids = "" for lbl in self.label_info.label_names: all_labels += lbl.replace(" ", "_") + " " - all_label_ids += lbl.replace(" ", "_") + " " + for lbl_id in self.label_info.label_ids: + all_label_ids += lbl_id + " " metadata = { # Common diff --git a/src/otx/core/types/label.py b/src/otx/core/types/label.py index c89f67d7fd6..8e0b3c1d384 100644 --- a/src/otx/core/types/label.py +++ b/src/otx/core/types/label.py @@ -5,10 +5,13 @@ from __future__ import annotations +import copy import json from dataclasses import asdict, dataclass from typing import TYPE_CHECKING, Any +from datumaro.components.annotation import GroupType + if TYPE_CHECKING: from datumaro import Label, LabelCategories @@ -27,6 +30,7 @@ class LabelInfo: """Object to represent label information.""" label_names: list[str] + label_ids: list[str] label_groups: list[list[str]] @property @@ -51,10 +55,12 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo: return NullLabelInfo() label_names = [f"label_{idx}" for idx in range(num_classes)] + label_ids = [str(i) for i in range(num_classes)] return cls( label_names=label_names, label_groups=[label_names], + label_ids=label_ids, ) @classmethod @@ -79,6 +85,38 @@ def from_dm_label_groups(cls, dm_label_categories: LabelCategories) -> LabelInfo return LabelInfo( label_names=label_names, label_groups=label_groups, + label_ids=[str(i) for i in range(len(label_names))], + ) + + @classmethod + def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> LabelInfo: + """Overload to support datumaro's arrow format.""" + label_names = [] + for item in dm_label_categories.items: + for attr in item.attributes: + if attr.startswith("__name__"): + label_names.append(attr[len("__name__") :]) + break + + if len(label_names) != len(dm_label_categories.items): + msg = "Wrong arrow format: can not extract label names from attributes" + raise ValueError(msg) + + id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)} + + for label_group in dm_label_categories.label_groups: + label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels] + + label_groups = [label_group.labels for label_group in dm_label_categories.label_groups] + if len(label_groups) == 0: # Single-label classification + label_groups = [label_names] + + label_ids = [item.name for item in dm_label_categories.items] + + return LabelInfo( + label_names=label_names, + label_groups=label_groups, + label_ids=label_ids, ) def as_dict(self) -> dict[str, Any]: @@ -265,7 +303,7 @@ def convert_labels_if_needed( single_label_group_info["class_to_idx"], ) - label_to_idx = {lbl: i for i, lbl in enumerate(merged_class_to_idx.keys())} + label_to_idx = {lbl: i for i, lbl in enumerate(label_names)} return HLabelInfo( label_names=label_names, @@ -279,8 +317,60 @@ def convert_labels_if_needed( label_to_idx=label_to_idx, label_tree_edges=get_label_tree_edges(dm_label_categories.items), empty_multiclass_head_indices=[], # consider the label removing case + label_ids=[str(i) for i in range(len(label_names))], ) + @classmethod + def from_dm_label_groups_arrow(cls, dm_label_categories: LabelCategories) -> HLabelInfo: + """Generate HLabelData from the Datumaro LabelCategories. Arrow-specific implementation. + + Args: + dm_label_categories (LabelCategories): the label categories of datumaro. + """ + dm_label_categories = copy.deepcopy(dm_label_categories) + + empty_label_name = None + for label_group in dm_label_categories.label_groups: + if label_group.group_type == GroupType.RESTRICTED: + empty_label_name = label_group.labels[0] + + dm_label_categories.label_groups = [ + group for group in dm_label_categories.label_groups if group.group_type != GroupType.RESTRICTED + ] + + empty_label_id = None + label_names = [] + for item in dm_label_categories.items: + for attr in item.attributes: + if attr.startswith("__name__"): + name = attr[len("__name__") :] + if name == empty_label_name: + empty_label_id = item.name + label_names.append(name) + break + + if len(label_names) != len(dm_label_categories.items): + msg = "Wrong arrow file: can not extract label names from attributes" + raise ValueError(msg) + + if empty_label_name is not None: + label_names.remove(empty_label_name) + dm_label_categories.items = [item for item in dm_label_categories.items if item.name != empty_label_id] + label_ids = [item.name for item in dm_label_categories.items] + + id_to_name_mapping = {item.name: label_names[i] for i, item in enumerate(dm_label_categories.items)} + + for i, item in enumerate(dm_label_categories.items): + item.name = label_names[i] + item.parent = id_to_name_mapping.get(item.parent, item.parent) + + for label_group in dm_label_categories.label_groups: + label_group.labels = [id_to_name_mapping.get(label, label) for label in label_group.labels] + + obj = cls.from_dm_label_groups(dm_label_categories) + obj.label_ids = label_ids + return obj + def as_head_config_dict(self) -> dict[str, Any]: """Return a dictionary including params needed to configure the HLabel MMPretrained head network.""" return { @@ -326,7 +416,7 @@ def from_num_classes(cls, num_classes: int) -> LabelInfo: if num_classes == 1: # binary segmentation label_names = ["background", "label_0"] - return SegLabelInfo(label_names=label_names, label_groups=[label_names]) + return SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=["0", "1"]) return super().from_num_classes(num_classes) @@ -336,7 +426,7 @@ class NullLabelInfo(LabelInfo): """Represent no label information. It is used for Visual Prompting tasks.""" def __init__(self) -> None: - super().__init__(label_names=[], label_groups=[[]]) + super().__init__(label_names=[], label_groups=[[]], label_ids=[]) @classmethod def from_json(cls, _: str) -> LabelInfo: @@ -349,7 +439,7 @@ class AnomalyLabelInfo(LabelInfo): """Represent no label information. It is used for Anomaly tasks.""" def __init__(self) -> None: - super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]]) + super().__init__(label_names=["Normal", "Anomaly"], label_groups=[["Normal", "Anomaly"]], label_ids=["0", "1"]) # Dispatching rules: diff --git a/src/otx/recipe/_base_/train.yaml b/src/otx/recipe/_base_/train.yaml index bb19266a9ee..169e4e8a764 100644 --- a/src/otx/recipe/_base_/train.yaml +++ b/src/otx/recipe/_base_/train.yaml @@ -39,6 +39,8 @@ callbacks: init_args: max_interval: 5 decay: -0.025 + min_earlystop_patience: 5 + min_lrschedule_patience: 3 logger: - class_path: lightning.pytorch.loggers.csv_logs.CSVLogger init_args: diff --git a/src/otx/recipe/anomaly_classification/stfpm.yaml b/src/otx/recipe/anomaly_classification/stfpm.yaml index ec1c6af8ddc..91cf676c201 100644 --- a/src/otx/recipe/anomaly_classification/stfpm.yaml +++ b/src/otx/recipe/anomaly_classification/stfpm.yaml @@ -16,7 +16,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/anomaly_detection/stfpm.yaml b/src/otx/recipe/anomaly_detection/stfpm.yaml index b13534505a4..25bb7be88bb 100644 --- a/src/otx/recipe/anomaly_detection/stfpm.yaml +++ b/src/otx/recipe/anomaly_detection/stfpm.yaml @@ -21,7 +21,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/anomaly_segmentation/stfpm.yaml b/src/otx/recipe/anomaly_segmentation/stfpm.yaml index 9a3d9c85d6e..604ff9ba029 100644 --- a/src/otx/recipe/anomaly_segmentation/stfpm.yaml +++ b/src/otx/recipe/anomaly_segmentation/stfpm.yaml @@ -16,7 +16,7 @@ overrides: precision: 32 max_epochs: 100 callbacks: - - class_path: lightning.pytorch.callbacks.EarlyStopping + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 5 mode: max diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml index b36f48e14c9..1191e0e22d3 100644 --- a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml @@ -10,12 +10,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -26,11 +30,12 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 - callbacks: - - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup - init_args: - patience: 3 data: task: H_LABEL_CLS data_format: datumaro + + callbacks: + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml index d0ea7daec7b..d2e121fd4a2 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: H_LABEL_CLS @@ -29,11 +33,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml index fc3f6abeab8..a423ee7046f 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml @@ -10,6 +10,18 @@ model: momentum: 0.9 weight_decay: 0.0001 + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy + engine: task: H_LABEL_CLS device: auto @@ -25,8 +37,11 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 warmup_iters: 750 + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 + data: task: H_LABEL_CLS data_format: datumaro diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml index 211bc8fa883..c94b7dd16b6 100644 --- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml @@ -19,7 +19,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -31,10 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml index 2078c98b43b..d36cdfff5b7 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml index 0f2d7b60a6a..9bec7e924e6 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml index faab071ff5d..d00a5109f7d 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: H_LABEL_CLS @@ -29,10 +33,11 @@ overrides: - data.train_subset.transforms max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: H_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml index f5446d3cca6..cdc06e19f52 100644 --- a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -28,7 +32,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml index 300091fab8c..a11967f1068 100644 --- a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -27,7 +31,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml index 872d28789ef..bab3370201c 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml index 0cb77ef8852..2eed9516f8e 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0001 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_CLASS_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 90 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 90 data: train_subset: diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml index c4c6946fd6e..fdef97ef9c8 100644 --- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml @@ -20,7 +20,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -32,7 +32,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml index f06b3b36e32..ccd26a6535e 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml index c72714e9433..06d702e8576 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml index 4c6975c241a..9d626812765 100644 --- a/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_class_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_CLASS_CLS @@ -26,7 +30,8 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 90 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml index afb14dd046f..623bc178f4b 100644 --- a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml @@ -11,12 +11,16 @@ model: weight_decay: 0.05 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -27,14 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: max_epochs: 200 + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml index d2b11411a51..34b741f6343 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0005 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -30,11 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 200 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 + + max_epochs: 200 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml index 87177eb1e17..9913871a850 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml @@ -12,12 +12,16 @@ model: weight_decay: 0.0005 scheduler: - class_path: lightning.pytorch.cli.ReduceLROnPlateau + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - mode: max - factor: 0.5 - patience: 1 - monitor: val/accuracy + num_warmup_steps: 0 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.5 + patience: 3 + monitor: val/accuracy engine: task: MULTI_LABEL_CLS @@ -30,15 +34,12 @@ overrides: reset: - data.train_subset.transforms - max_epochs: 200 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 + + max_epochs: 200 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml index 02021708453..f9322f22f07 100644 --- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml @@ -20,7 +20,7 @@ model: init_args: mode: max factor: 0.5 - patience: 1 + patience: 3 monitor: val/accuracy engine: @@ -31,11 +31,11 @@ callback_monitor: val/accuracy data: ../../_base_/data/classification.yaml overrides: - callbacks: - - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup - init_args: - patience: 3 - data: task: MULTI_LABEL_CLS data_format: datumaro + + callbacks: + - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup + init_args: + patience: 5 diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml index 9579f8e5e57..ebc03324933 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,7 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml index 3003b26eb48..a1992d2b398 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,11 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 4 - - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling - init_args: - min_earlystop_patience: 4 - min_lrschedule_patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml index 492e835ef62..99ef63b59e1 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml @@ -12,10 +12,14 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.CosineAnnealingLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - T_max: 100000 - eta_min: 0 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: 100000 + eta_min: 0 engine: task: MULTI_LABEL_CLS @@ -31,7 +35,7 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 data: task: MULTI_LABEL_CLS diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index ae8bd846e7d..72fdffbd602 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index 0d4dfd53727..cf14d7d747e 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 9bfbb05caa5..c1a0cdb279d 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/atss_resnext101_tile.yaml b/src/otx/recipe/detection/atss_resnext101_tile.yaml index 831d694caad..a1dfbf70b7f 100644 --- a/src/otx/recipe/detection/atss_resnext101_tile.yaml +++ b/src/otx/recipe/detection/atss_resnext101_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtdetr_101_tile.yaml b/src/otx/recipe/detection/rtdetr_101_tile.yaml index 0a6cceb8142..ea9522ea8e6 100644 --- a/src/otx/recipe/detection/rtdetr_101_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_101_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 5 + num_warmup_steps: 100 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtdetr_18_tile.yaml b/src/otx/recipe/detection/rtdetr_18_tile.yaml index 637bdacbaa9..82a7b0df09f 100644 --- a/src/otx/recipe/detection/rtdetr_18_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_18_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 5 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtdetr_50_tile.yaml b/src/otx/recipe/detection/rtdetr_50_tile.yaml index 53f241f4b17..ca2c1df0b0e 100644 --- a/src/otx/recipe/detection/rtdetr_50_tile.yaml +++ b/src/otx/recipe/detection/rtdetr_50_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 5 + num_warmup_steps: 100 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 6a74d780ab7..d34b6d97ec6 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -12,7 +12,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml index 982d7b775d3..0ad11ecadad 100644 --- a/src/otx/recipe/detection/rtmdet_tiny_tile.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny_tile.yaml @@ -12,7 +12,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 60f1cb02391..8e383e26f09 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index f806cee40ea..f98178de1ac 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 23a76f0e1d4..b882e596e77 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index 1e07232ff03..6371541312c 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 12600d5e536..3cb69298124 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 5d224c19f16..94ba2f0aca8 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 744dc3e72a7..a7ce904d0b6 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index c7843ac50f5..2dda37ff5e9 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index a99f0ce9122..9de8698351f 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index 1739ffe3f6a..8b34fc90072 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -13,7 +13,7 @@ model: scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 main_scheduler_callable: class_path: lightning.pytorch.cli.ReduceLROnPlateau init_args: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 33c4e98d578..fda76b20f4a 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -2,10 +2,10 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_version: dinov2_vits14 + model_version: dinov2-small-seg input_size: - - 560 - - 560 + - 518 + - 518 optimizer: class_path: torch.optim.AdamW @@ -17,11 +17,15 @@ model: weight_decay: 0.0001 scheduler: - class_path: torch.optim.lr_scheduler.PolynomialLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - total_iters: 150 - power: 0.9 - last_epoch: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 150 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -33,8 +37,8 @@ data: ../_base_/data/semantic_segmentation.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop diff --git a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yaml b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yaml index 7dc5ece097c..da9a62fa4be 100644 --- a/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yaml +++ b/src/otx/recipe/semantic_segmentation/semisl/dino_v2_semisl.yaml @@ -2,11 +2,11 @@ model: class_path: otx.algo.segmentation.dino_v2_seg.DinoV2Seg init_args: label_info: 2 - model_version: dinov2_vits14 + model_version: dinov2-small-seg train_type: SEMI_SUPERVISED input_size: - - 560 - - 560 + - 518 + - 518 optimizer: class_path: torch.optim.AdamW @@ -34,8 +34,8 @@ data: ../../_base_/data/semisl/semantic_segmentation_semisl.yaml overrides: data: input_size: - - 560 - - 560 + - 518 + - 518 train_subset: transforms: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 377d80b3722..4c8646bab0a 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -18,10 +18,14 @@ model: lr: 0.00001 scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - factor: 1 - total_iters: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -35,4 +39,4 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index bc3bf89351a..4493af39562 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -18,10 +18,14 @@ model: lr: 0.00001 scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - factor: 1 - total_iters: -1 + num_warmup_steps: 0 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -35,4 +39,4 @@ overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: - patience: 3 + patience: 5 diff --git a/src/otx/tools/converter.py b/src/otx/tools/converter.py index 8797910f74e..13edc6f1b55 100644 --- a/src/otx/tools/converter.py +++ b/src/otx/tools/converter.py @@ -239,6 +239,9 @@ def convert(config_path: str, task: OTXTaskType | None = None) -> dict: task_info = TEMPLATE_ID_DICT[template_config["model_template_id"]] if param_dict.get("enable_tiling", None) and not task_info["model_name"].endswith("_tile"): task_info["model_name"] += "_tile" + # classification task type can't be deducted from template name, try to extract from config + if "sub_task_type" in template_config and "CLS" in template_config["sub_task_type"]: + task_info["task"] = template_config["sub_task_type"] if task is not None: task_info["task"] = task default_config = ConfigConverter._get_default_config(task_info) @@ -308,13 +311,16 @@ def update_num_workers(param_value: int) -> None: config["data"]["test_subset"]["num_workers"] = param_value def update_enable_early_stopping(param_value: bool) -> None: - idx = ConfigConverter._get_callback_idx(config["callbacks"], "lightning.pytorch.callbacks.EarlyStopping") + idx = ConfigConverter._get_callback_idx( + config["callbacks"], + "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup", + ) if not param_value and idx > -1: config["callbacks"].pop(idx) def update_early_stop_patience(param_value: int) -> None: for callback in config["callbacks"]: - if callback["class_path"] == "lightning.pytorch.callbacks.EarlyStopping": + if callback["class_path"] == "otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup": callback["init_args"]["patience"] = param_value break diff --git a/src/otx/tools/templates/classification/configuration.yaml b/src/otx/tools/templates/classification/configuration.yaml index ed91ea1cfa3..93f972b7a87 100644 --- a/src/otx/tools/templates/classification/configuration.yaml +++ b/src/otx/tools/templates/classification/configuration.yaml @@ -87,11 +87,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -174,7 +174,7 @@ learning_parameters: visible_in_ui: false early_stop_patience: affects_outcome_of: TRAINING - default_value: 3 + default_value: 5 description: Training will stop if the model does not improve within the number of epochs of patience. editable: true header: Patience for early stopping @@ -186,26 +186,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 8 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 + value: 5 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: @@ -264,7 +245,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml index 4db892a3131..001e1e3d995 100644 --- a/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml +++ b/src/otx/tools/templates/classification/efficientnet_v2_l/template.yaml @@ -20,7 +20,7 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 64 + default_value: 48 auto_hpo_state: POSSIBLE learning_rate: default_value: 0.01 diff --git a/src/otx/tools/templates/detection/detection/configuration.yaml b/src/otx/tools/templates/detection/detection/configuration.yaml index 5cb11d83c9f..9fe02a3d28a 100644 --- a/src/otx/tools/templates/detection/detection/configuration.yaml +++ b/src/otx/tools/templates/detection/detection/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -124,7 +124,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null enable_early_stopping: @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml index c161471d452..19c6f4c7502 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_l/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml index f8ef1d4acd3..ad248a10bf6 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_s/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml index e2e426840ed..f3310cb8138 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_tiny/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.0002 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml index 97f85fed008..30bdfecff16 100644 --- a/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml +++ b/src/otx/tools/templates/detection/detection/cspdarknet_yolox_x/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml index 94dd429e1f1..aeed26d6c19 100644 --- a/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml +++ b/src/otx/tools/templates/detection/detection/mobilenetv2_atss/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.004 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml index 3cdde945a08..62943444683 100644 --- a/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml +++ b/src/otx/tools/templates/detection/detection/mobilenetv2_ssd/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.01 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml index cf12454e78d..c6770cc7827 100644 --- a/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml +++ b/src/otx/tools/templates/detection/detection/resnext101_atss/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.004 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml index 1394cf44159..7b4bcae96a8 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_101/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml index 7738c65f1b7..88999e071d6 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_18/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml index 408e48cd8fb..79497ab1c79 100644 --- a/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtdetr_50/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml index 8b110503b62..a067b186861 100644 --- a/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml +++ b/src/otx/tools/templates/detection/detection/rtmdet_tiny/template.yaml @@ -28,7 +28,7 @@ hyper_parameters: default_value: 0.0007 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: - default_value: 3 + default_value: 0 num_iters: default_value: 200 diff --git a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml index a1c2078ed62..20421f3fd16 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml index 4b5e21a4f83..f30d4c6f792 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.015 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml index e9e289c6bf6..cf609e3d1c8 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/maskrcnn_swin_t/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.0001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml index bd2248adbcd..31f1a310cab 100644 --- a/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/instance_segmentation/resnet50_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml index b41ea7dda25..524376b9d0a 100644 --- a/src/otx/tools/templates/detection/rotated_detection/configuration.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/configuration.yaml @@ -91,11 +91,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -173,25 +173,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -232,7 +213,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml index 21e079c489a..2c5ebee3fc7 100644 --- a/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/efficientnetb2b_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml index 4cb51f466eb..8d1bad4640c 100644 --- a/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml +++ b/src/otx/tools/templates/detection/rotated_detection/resnet50_maskrcnn/template.yaml @@ -27,8 +27,6 @@ hyper_parameters: learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 num_iters: default_value: 100 diff --git a/src/otx/tools/templates/keypoint_detection/configuration.yaml b/src/otx/tools/templates/keypoint_detection/configuration.yaml index e745d787c80..1ef84c01919 100644 --- a/src/otx/tools/templates/keypoint_detection/configuration.yaml +++ b/src/otx/tools/templates/keypoint_detection/configuration.yaml @@ -87,11 +87,11 @@ learning_parameters: num_iters: affects_outcome_of: TRAINING default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -120,7 +120,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null learning_rate_warmup_iters: @@ -189,25 +189,6 @@ learning_parameters: value: 10 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING default_value: true @@ -248,7 +229,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/segmentation/configuration.yaml b/src/otx/tools/templates/segmentation/configuration.yaml index 87a07515e02..4c365e6df55 100644 --- a/src/otx/tools/templates/segmentation/configuration.yaml +++ b/src/otx/tools/templates/segmentation/configuration.yaml @@ -54,8 +54,8 @@ learning_parameters: default_value: 100 description: In this periods of initial training iterations, the model will be trained in low learning rate, - which will be increased incrementally up to the expected learning rate setting. - This warm-up phase is known to be helpful to stabilize training, thus result in better performance. + which will be increased linearly up to the expected learning rate setting. + This warm-up phase is known to be helpful to stabilize training, therefore, can lead to increased performance. editable: true header: Number of iterations for learning rate warmup max_value: 10000 @@ -74,11 +74,11 @@ learning_parameters: auto_hpo_state: not_possible auto_hpo_value: null default_value: 200 - description: - Increasing this value causes the results to be more robust but training - time will be longer. + description: Maximum number of epochs to train a model. + Increasing this value may result in longer training, but potentially in a more robust model. + Note, if the early stopping is enabled, the actual number of epochs may be less than this value. editable: true - header: Number of training iterations + header: Number of training epochs max_value: 1000 min_value: 1 type: INTEGER @@ -109,7 +109,7 @@ learning_parameters: operator: AND rules: [] type: UI_RULES - value: 0 + value: 2 visible_in_ui: true warning: null enable_early_stopping: @@ -143,7 +143,7 @@ learning_parameters: visible_in_ui: false early_stop_patience: affects_outcome_of: TRAINING - default_value: 7 + default_value: 10 description: Training will stop if the model does not improve within the number of epochs of patience. editable: true header: Patience for early stopping @@ -158,25 +158,6 @@ learning_parameters: value: 5 visible_in_ui: true warning: This is applied exclusively when early stopping is enabled. - early_stop_iteration_patience: - affects_outcome_of: TRAINING - default_value: 0 - description: - Training will stop if the model does not improve within the number of iterations of patience. - This ensures the model is trained enough with the number of iterations of patience before early stopping. - editable: true - header: Iteration patience for early stopping - max_value: 1000 - min_value: 0 - type: INTEGER - ui_rules: - action: DISABLE_EDITING - operator: AND - rules: [] - type: UI_RULES - value: 0 - visible_in_ui: true - warning: This is applied exclusively when early stopping is enabled. enable_supcon: affects_outcome_of: TRAINING default_value: false @@ -219,7 +200,7 @@ learning_parameters: auto_num_workers: affects_outcome_of: TRAINING default_value: false - description: Adapt num_workers according to current hardware status automatically. + description: Adapt number of workers according to current hardware status automatically. editable: true header: Enable auto adaptive num_workers type: BOOLEAN diff --git a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml index ac837fa5007..630af3c6b7c 100644 --- a/src/otx/tools/templates/segmentation/dinov2_small/template.yaml +++ b/src/otx/tools/templates/segmentation/dinov2_small/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml index ef390639238..88f3d5d41a3 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_b/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml index 9afd2660cf2..0c13b203b84 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_s/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml index c5879535caa..e23c8c91104 100644 --- a/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml +++ b/src/otx/tools/templates/segmentation/ham_segnext_t/template.yaml @@ -25,14 +25,8 @@ hyper_parameters: learning_rate: default_value: 0.00006 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 early_stop_start: default_value: 100 - early_stop_patience: - default_value: 10 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml index 56af9f4b2b2..ce4e4da393f 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_18_mod2/template.yaml @@ -24,10 +24,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml index 6637673c64e..6c8be01af2a 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_s_mod2/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml index f4bc011554f..d4f98812586 100644 --- a/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml +++ b/src/otx/tools/templates/segmentation/ocr_lite_hrnet_x_mod3/template.yaml @@ -25,10 +25,6 @@ hyper_parameters: learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE - learning_rate_warmup_iters: - default_value: 100 - num_iters: - default_value: 200 # Training resources. max_nodes: 1 diff --git a/tests/conftest.py b/tests/conftest.py index 12cc2ae0884..81ff3254262 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -370,6 +370,7 @@ def fxt_seg_label_info() -> SegLabelInfo: label_names, ["class2", "class3"], ], + label_ids=["0", "1", "2"], ) @@ -382,6 +383,7 @@ def fxt_multiclass_labelinfo() -> LabelInfo: label_names, ["class2", "class3"], ], + label_ids=["0", "1", "2"], ) @@ -395,6 +397,7 @@ def fxt_multilabel_labelinfo() -> LabelInfo: [label_names[1]], [label_names[2]], ], + label_ids=["0", "1", "2"], ) @@ -461,6 +464,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo: ["Spade_A", "Spade"], ["Spade_King", "Spade"], ], + label_ids=[str(i) for i in range(9)], ) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index ead1117c6dd..3fb09304202 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -131,6 +131,7 @@ def fxt_target_dataset_per_task() -> dict: "anomaly_classification": "tests/assets/anomaly_hazelnut", "anomaly_detection": "tests/assets/anomaly_hazelnut", "anomaly_segmentation": "tests/assets/anomaly_hazelnut", + "keypoint_detection": "tests/assets/car_tree_bug_keypoint", } diff --git a/tests/unit/algo/classification/conftest.py b/tests/unit/algo/classification/conftest.py index 945c3d0bc4c..a283eff41b1 100644 --- a/tests/unit/algo/classification/conftest.py +++ b/tests/unit/algo/classification/conftest.py @@ -31,6 +31,7 @@ def fxt_hlabel_data() -> HLabelInfo: ["Heart_Queen", "Heart_King"], ["Spade_A", "Spade_King"], ], + label_ids=[str(i) for i in range(6)], num_multiclass_heads=3, num_multilabel_classes=0, head_idx_to_logits_range={"0": (0, 2), "1": (2, 4), "2": (4, 6)}, @@ -80,6 +81,7 @@ def fxt_hlabel_multilabel_info() -> HLabelInfo: "Red_Joker", "Extra_Joker", ], + label_ids=[str(i) for i in range(9)], label_groups=[ ["Heart", "Spade"], ["Heart_Queen", "Heart_King"], @@ -149,6 +151,7 @@ def fxt_hlabel_cifar() -> HLabelInfo: "aquatic_mammals", "fish", ], + label_ids=[str(i) for i in range(12)], label_groups=[ ["beaver", "dolphin", "otter", "seal", "whale"], ["aquarium_fish", "flatfish", "ray", "shark", "trout"], diff --git a/tests/unit/algo/detection/base_models/test_single_stage_detector.py b/tests/unit/algo/detection/base_models/test_single_stage_detector.py index eb6a25037bc..1f28bd0876c 100644 --- a/tests/unit/algo/detection/base_models/test_single_stage_detector.py +++ b/tests/unit/algo/detection/base_models/test_single_stage_detector.py @@ -48,7 +48,7 @@ def batch(self): inputs = torch.randn(1, 3, 32, 32) return DetBatchDataEntity( batch_size=1, - imgs_info=[LabelInfo(["a"], [["a"]])], + imgs_info=[LabelInfo(["a"], ["0"], [["a"]])], images=inputs, bboxes=[torch.tensor([[0.5, 0.5, 0.5, 0.5]])], labels=[torch.tensor([0])], diff --git a/tests/unit/algo/detection/test_rtdetr.py b/tests/unit/algo/detection/test_rtdetr.py index 22dc258029f..a8ba46f0b91 100644 --- a/tests/unit/algo/detection/test_rtdetr.py +++ b/tests/unit/algo/detection/test_rtdetr.py @@ -13,7 +13,7 @@ class TestRTDETR: def test_customize_outputs(self, mocker): - label_info = LabelInfo(["a", "b", "c"], [["a", "b", "c"]]) + label_info = LabelInfo(["a", "b", "c"], ["0", "1", "2"], [["a", "b", "c"]]) mocker.patch("otx.algo.detection.rtdetr.RTDETR._build_model", return_value=mocker.MagicMock()) model = RTDETR(label_info) model.model.load_from = None diff --git a/tests/unit/algo/segmentation/backbones/test_dinov2.py b/tests/unit/algo/segmentation/backbones/test_dinov2.py deleted file mode 100644 index 0e5f920d67e..00000000000 --- a/tests/unit/algo/segmentation/backbones/test_dinov2.py +++ /dev/null @@ -1,82 +0,0 @@ -from __future__ import annotations - -from pathlib import Path -from unittest.mock import MagicMock - -import pytest -import torch -from otx.algo.segmentation.backbones import dinov2 as target_file -from otx.algo.segmentation.backbones.dinov2 import DinoVisionTransformer - - -class TestDinoVisionTransformer: - @pytest.fixture() - def mock_backbone_named_parameters(self) -> dict[str, MagicMock]: - named_parameter = {} - for i in range(3): - parameter = MagicMock() - parameter.requires_grad = True - named_parameter[f"layer_{i}"] = parameter - return named_parameter - - @pytest.fixture() - def mock_backbone(self, mock_backbone_named_parameters) -> MagicMock: - backbone = MagicMock() - backbone.named_parameters.return_value = list(mock_backbone_named_parameters.items()) - return backbone - - @pytest.fixture(autouse=True) - def mock_torch_hub_load(self, mocker, mock_backbone): - return mocker.patch("otx.algo.segmentation.backbones.dinov2.torch.hub.load", return_value=mock_backbone) - - def test_init(self, mock_backbone, mock_backbone_named_parameters): - dino = DinoVisionTransformer(name="dinov2_vits14", freeze_backbone=True, out_index=[8, 9, 10, 11]) - - assert dino.backbone == mock_backbone - for parameter in mock_backbone_named_parameters.values(): - assert parameter.requires_grad is False - - @pytest.fixture() - def dino_vit(self) -> DinoVisionTransformer: - return DinoVisionTransformer( - name="dinov2_vits14", - freeze_backbone=True, - out_index=[8, 9, 10, 11], - ) - - def test_forward(self, dino_vit, mock_backbone): - tensor = torch.rand(10, 3, 3, 3) - dino_vit.forward(tensor) - - mock_backbone.assert_called_once_with(tensor) - - @pytest.fixture() - def mock_load_from_http(self, mocker) -> MagicMock: - return mocker.patch.object(target_file, "load_from_http") - - @pytest.fixture() - def mock_load_checkpoint_to_model(self, mocker) -> MagicMock: - return mocker.patch.object(target_file, "load_checkpoint_to_model") - - @pytest.fixture() - def pretrained_weight(self, tmp_path) -> str: - weight = tmp_path / "pretrained.pth" - weight.touch() - return str(weight) - - @pytest.fixture() - def mock_torch_load(self, mocker) -> MagicMock: - return mocker.patch("otx.algo.segmentation.backbones.mscan.torch.load") - - def test_load_pretrained_weights(self, dino_vit, pretrained_weight, mock_torch_load, mock_load_checkpoint_to_model): - dino_vit.load_pretrained_weights(pretrained=pretrained_weight) - mock_torch_load.assert_called_once_with(pretrained_weight, "cpu") - mock_load_checkpoint_to_model.assert_called_once() - - def test_load_pretrained_weights_from_url(self, dino_vit, mock_load_from_http, mock_load_checkpoint_to_model): - pretrained_weight = "www.fake.com/fake.pth" - dino_vit.load_pretrained_weights(pretrained=pretrained_weight) - - cache_dir = Path.home() / ".cache" / "torch" / "hub" / "checkpoints" - mock_load_from_http.assert_called_once_with(filename=pretrained_weight, map_location="cpu", model_dir=cache_dir) - mock_load_checkpoint_to_model.assert_called_once() diff --git a/tests/unit/algo/segmentation/test_dino_v2_seg.py b/tests/unit/algo/segmentation/test_dino_v2_seg.py deleted file mode 100644 index 5353a43616a..00000000000 --- a/tests/unit/algo/segmentation/test_dino_v2_seg.py +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -import pytest -from otx.algo.segmentation.dino_v2_seg import DinoV2Seg -from otx.core.exporter.base import OTXModelExporter - - -class TestDinoV2Seg: - @pytest.fixture(scope="class") - def fxt_dino_v2_seg(self) -> DinoV2Seg: - return DinoV2Seg(label_info=10, model_version="dinov2_vits14", input_size=(560, 560)) - - def test_dino_v2_seg_init(self, fxt_dino_v2_seg): - assert isinstance(fxt_dino_v2_seg, DinoV2Seg) - assert fxt_dino_v2_seg.num_classes == 10 - - def test_exporter(self, fxt_dino_v2_seg): - exporter = fxt_dino_v2_seg._exporter - assert isinstance(exporter, OTXModelExporter) - assert exporter.input_size == (1, 3, 560, 560) - - def test_optimization_config(self, fxt_dino_v2_seg): - config = fxt_dino_v2_seg._optimization_config - assert isinstance(config, dict) - assert "model_type" in config - assert config["model_type"] == "transformer" diff --git a/tests/unit/cli/test_cli.py b/tests/unit/cli/test_cli.py index 3b2501066ce..07aa5d083e2 100644 --- a/tests/unit/cli/test_cli.py +++ b/tests/unit/cli/test_cli.py @@ -188,7 +188,7 @@ def test_print_config_scheduler_override_command(self, fxt_print_config_schedule scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: - num_warmup_steps: 3 + num_warmup_steps: 0 monitor: val/test_f1 warmup_interval: step main_scheduler_callable: diff --git a/tests/unit/core/data/conftest.py b/tests/unit/core/data/conftest.py index e79498a6155..2ed017cc4f0 100644 --- a/tests/unit/core/data/conftest.py +++ b/tests/unit/core/data/conftest.py @@ -191,6 +191,7 @@ def fxt_mock_hlabelinfo(): return HLabelInfo( label_names=_LABEL_NAMES, label_groups=[["Non-Rigid", "Rigid"], ["Rectangle", "Triangle"], ["Circle"], ["Lion"], ["Panda"]], + label_ids=_LABEL_NAMES, num_multiclass_heads=2, num_multilabel_classes=3, head_idx_to_logits_range={"0": (0, 2), "1": (2, 4)}, diff --git a/tests/unit/core/data/test_factory.py b/tests/unit/core/data/test_factory.py index 9a5b2a38f61..a3f2b6becaf 100644 --- a/tests/unit/core/data/test_factory.py +++ b/tests/unit/core/data/test_factory.py @@ -101,6 +101,7 @@ def test_create( cfg_subset=cfg_subset, vpm_config=vpm_config, image_color_channel=image_color_channel, + data_format="", ), dataset_cls, ) diff --git a/tests/unit/core/metrics/test_accuracy.py b/tests/unit/core/metrics/test_accuracy.py index d3c43a8a087..73486330a3c 100644 --- a/tests/unit/core/metrics/test_accuracy.py +++ b/tests/unit/core/metrics/test_accuracy.py @@ -52,7 +52,7 @@ def test_default_multi_class_cls_metric_callable(self, fxt_multiclass_labelinfo: metric = MultiClassClsMetricCallable(fxt_multiclass_labelinfo) assert isinstance(metric.accuracy, MulticlassAccuracy) - one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]]) + one_class_label_info = LabelInfo(label_names=["class1"], label_groups=[["class1"]], label_ids=["0"]) assert one_class_label_info.num_classes == 1 binary_metric = MultiClassClsMetricCallable(one_class_label_info) assert isinstance(binary_metric.accuracy, BinaryAccuracy) diff --git a/tests/unit/core/model/test_segmentation.py b/tests/unit/core/model/test_segmentation.py index d364c9ab273..bae470e6f3a 100644 --- a/tests/unit/core/model/test_segmentation.py +++ b/tests/unit/core/model/test_segmentation.py @@ -34,6 +34,7 @@ def label_info(self): return SegLabelInfo( label_names=["Background", "label_0", "label_1"], label_groups=[["Background", "label_0", "label_1"]], + label_ids=["0", "1", "2"], ) @pytest.fixture() @@ -64,8 +65,16 @@ def test_export_parameters(self, model): ("label_info", "expected_label_info"), [ ( - SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), - SegLabelInfo(label_names=["label1", "label2", "label3"], label_groups=[["label1", "label2", "label3"]]), + SegLabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), + SegLabelInfo( + label_names=["label1", "label2", "label3"], + label_groups=[["label1", "label2", "label3"]], + label_ids=["0", "1", "2"], + ), ), (SegLabelInfo.from_num_classes(num_classes=5), SegLabelInfo.from_num_classes(num_classes=5)), ], diff --git a/tests/unit/core/types/test_label.py b/tests/unit/core/types/test_label.py index 3ae1ae1f463..7c6d2359b7a 100644 --- a/tests/unit/core/types/test_label.py +++ b/tests/unit/core/types/test_label.py @@ -4,7 +4,7 @@ from datumaro import LabelCategories from datumaro.components.annotation import GroupType -from otx.core.types.label import HLabelInfo, NullLabelInfo, SegLabelInfo +from otx.core.types.label import HLabelInfo, LabelInfo, NullLabelInfo, SegLabelInfo def test_as_json(fxt_label_info): @@ -13,17 +13,43 @@ def test_as_json(fxt_label_info): assert fxt_label_info == deserialized +def test_label_info_from_arrow(): + labels = [ + LabelCategories.Category(name="car", attributes={"__name__car"}), + LabelCategories.Category(name="truck", attributes={"__name__truck"}), + ] + label_groups = [ + LabelCategories.LabelGroup( + name="Detection labels___vehicle", + labels=["car", "truck"], + group_type=GroupType.EXCLUSIVE, + ), + ] + dm_label_categories = LabelCategories(items=labels, label_groups=label_groups) + + label_info = LabelInfo.from_dm_label_groups_arrow(dm_label_categories) + assert len(label_info.label_names) == 2 + assert len(label_info.label_names) == 2 + assert len(label_info.label_groups[0]) == 2 + assert "car" in label_info.label_names + assert "truck" in label_info.label_names + + def test_seg_label_info(): # Automatically insert background label at zero index assert SegLabelInfo.from_num_classes(3) == SegLabelInfo( ["label_0", "label_1", "label_2"], + ["0", "1", "2"], [["label_0", "label_1", "label_2"]], ) - assert SegLabelInfo.from_num_classes(1) == SegLabelInfo(["background", "label_0"], [["background", "label_0"]]) + assert SegLabelInfo.from_num_classes(1) == SegLabelInfo( + ["background", "label_0"], + ["0", "1"], + [["background", "label_0"]], + ) assert SegLabelInfo.from_num_classes(0) == NullLabelInfo() -# Unit test def test_hlabel_info(): labels = [ LabelCategories.Category(name="car", parent="vehicle"), @@ -52,3 +78,38 @@ def test_hlabel_info(): assert list(hlabel_info.class_to_group_idx.keys()) == list( hlabel_info.label_to_idx.keys(), ), "class_to_group_idx and label_to_idx keys do not match" + + +def test_hlabel_info_arrow(): + labels = [ + LabelCategories.Category(name="car", parent="vehicle", attributes={"__name__car"}), + LabelCategories.Category(name="truck", parent="vehicle", attributes={"__name__truck"}), + LabelCategories.Category(name="plush_toy", parent="plush toy", attributes={"__name__plush toy"}), + LabelCategories.Category(name="No class", attributes={"__name__No class"}), + ] + label_groups = [ + LabelCategories.LabelGroup( + name="Detection labels___vehicle", + labels=["car", "truck"], + group_type=GroupType.EXCLUSIVE, + ), + LabelCategories.LabelGroup( + name="Detection labels___plush toy", + labels=["plush toy"], + group_type=GroupType.EXCLUSIVE, + ), + LabelCategories.LabelGroup(name="No class", labels=["No class"], group_type=GroupType.RESTRICTED), + ] + dm_label_categories = LabelCategories(items=labels, label_groups=label_groups) + + hlabel_info = HLabelInfo.from_dm_label_groups_arrow(dm_label_categories) + + # Check if class_to_group_idx and label_to_idx have the same keys + assert list(hlabel_info.class_to_group_idx.keys()) == list( + hlabel_info.label_to_idx.keys(), + ), "class_to_group_idx and label_to_idx keys do not match" + + assert len(hlabel_info.label_names) == 3 + assert "No class" not in hlabel_info.label_names + for label in ["car", "truck", "plush toy"]: + assert label in hlabel_info.label_names diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py index 9e1c273ea15..c6afcce6777 100644 --- a/tests/unit/engine/utils/test_auto_configurator.py +++ b/tests/unit/engine/utils/test_auto_configurator.py @@ -131,9 +131,9 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None: # With label_info label_names = ["class1", "class2", "class3"] label_info = ( - LabelInfo(label_names=label_names, label_groups=[label_names]) + LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) if fxt_task != OTXTaskType.SEMANTIC_SEGMENTATION - else SegLabelInfo(label_names=label_names, label_groups=[label_names]) + else SegLabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) ) model = auto_configurator.get_model(label_info=label_info) assert isinstance(model, OTXModel) @@ -147,7 +147,7 @@ def test_get_model(self, fxt_task: OTXTaskType) -> None: def test_get_model_set_input_size(self) -> None: auto_configurator = AutoConfigurator(task=OTXTaskType.MULTI_CLASS_CLS) label_names = ["class1", "class2", "class3"] - label_info = LabelInfo(label_names=label_names, label_groups=[label_names]) + label_info = LabelInfo(label_names=label_names, label_groups=[label_names], label_ids=label_names) input_size = 300 model = auto_configurator.get_model(label_info=label_info, input_size=input_size) diff --git a/tests/unit/tools/test_converter.py b/tests/unit/tools/test_converter.py index 711b92b6bd4..eb35d890a4a 100644 --- a/tests/unit/tools/test_converter.py +++ b/tests/unit/tools/test_converter.py @@ -19,7 +19,7 @@ def test_convert(self): assert config["data"]["train_subset"]["num_workers"] == 8 assert config["data"]["val_subset"]["num_workers"] == 8 assert config["data"]["test_subset"]["num_workers"] == 8 - assert config["callbacks"][0]["init_args"]["patience"] == 10 + assert config["callbacks"][0]["init_args"]["patience"] == 4 assert config["data"]["tile_config"]["enable_tiler"] is True assert config["data"]["tile_config"]["overlap"] == 0.5 @@ -57,6 +57,6 @@ def test_instantiate(self, tmp_path): assert engine.datamodule.tile_config.enable_tiler assert len(train_kwargs["callbacks"]) == len(config["callbacks"]) - assert train_kwargs["callbacks"][0].patience == 10 + assert train_kwargs["callbacks"][0].patience == 4 assert len(train_kwargs["logger"]) == len(config["logger"]) assert train_kwargs["max_epochs"] == 50