From 9ab7286837c22b65ddff2fda5158ae9dc266cbaa Mon Sep 17 00:00:00 2001 From: luochunhua Date: Wed, 23 Feb 2022 07:47:59 +0000 Subject: [PATCH 1/8] update docs for maskformer --- mmdet/models/dense_heads/maskformer_head.py | 84 +++++++++++---------- mmdet/models/detectors/maskformer.py | 2 +- mmdet/models/plugins/pixel_decoder.py | 8 +- 3 files changed, 48 insertions(+), 46 deletions(-) diff --git a/mmdet/models/dense_heads/maskformer_head.py b/mmdet/models/dense_heads/maskformer_head.py index 3cd060e53b6..2234b25a1a0 100644 --- a/mmdet/models/dense_heads/maskformer_head.py +++ b/mmdet/models/dense_heads/maskformer_head.py @@ -28,24 +28,24 @@ class MaskFormerHead(AnchorFreeHead): num_things_classes (int): Number of things. num_stuff_classes (int): Number of stuff. num_queries (int): Number of query in Transformer. - pixel_decoder (obj:`mmcv.ConfigDict`|dict): Config for pixel decoder. + pixel_decoder (obj:`mmcv.ConfigDict` | dict): Config for pixel decoder. Defaults to None. enforce_decoder_input_project (bool, optional): Whether to add a layer to change the embed_dim of tranformer encoder in pixel decoder to the embed_dim of transformer decoder. Defaults to False. - transformer_decoder (obj:`mmcv.ConfigDict`|dict): Config for + transformer_decoder (obj:`mmcv.ConfigDict` | dict): Config for transformer decoder. Defaults to None. - positional_encoding (obj:`mmcv.ConfigDict`|dict): Config for + positional_encoding (obj:`mmcv.ConfigDict` | dict): Config for transformer decoder position encoding. Defaults to None. - loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the classification + loss_cls (obj:`mmcv.ConfigDict` | dict): Config of the classification loss. Defaults to `CrossEntropyLoss`. - loss_mask (obj:`mmcv.ConfigDict`|dict): Config of the mask loss. + loss_mask (obj:`mmcv.ConfigDict` | dict): Config of the mask loss. Defaults to `FocalLoss`. - loss_dice (obj:`mmcv.ConfigDict`|dict): Config of the dice loss. + loss_dice (obj:`mmcv.ConfigDict` | dict): Config of the dice loss. Defaults to `DiceLoss`. - train_cfg (obj:`mmcv.ConfigDict`|dict): Training config of Maskformer + train_cfg (obj:`mmcv.ConfigDict` | dict): Training config of Maskformer head. - test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of Maskformer + test_cfg (obj:`mmcv.ConfigDict` | dict): Testing config of Maskformer head. init_cfg (dict or list[dict], optional): Initialization config dict. Defaults to None. @@ -178,11 +178,11 @@ def preprocess_gt(self, gt_labels_list, gt_masks_list, gt_semantic_segs): Returns: tuple: a tuple containing the following targets. - - labels (list[Tensor]): Ground truth class indices for all\ - images. Each with shape (n, ), n is the sum of number\ - of stuff type and number of instance in a image. - - masks (list[Tensor]): Ground truth mask for each image, each\ - with shape (n, h, w). + - | ``labels`` (list[Tensor]): Ground truth class indices\ + for all images. Each with shape (n, ), n is the sum of\ + number of stuff type and number of instance in a image. + - | ``masks`` (list[Tensor]): Ground truth mask for each\ + image, each with shape (n, h, w). """ num_things_list = [self.num_things_classes] * len(gt_labels_list) num_stuff_list = [self.num_stuff_classes] * len(gt_labels_list) @@ -214,18 +214,18 @@ def get_targets(self, cls_scores_list, mask_preds_list, gt_labels_list, Returns: tuple[list[Tensor]]: a tuple containing the following targets. - - labels_list (list[Tensor]): Labels of all images.\ - Each with shape (num_queries, ). - - label_weights_list (list[Tensor]): Label weights of all\ - images. Each with shape (num_queries, ). - - mask_targets_list (list[Tensor]): Mask targets of all\ - images. Each with shape (num_queries, h, w). - - mask_weights_list (list[Tensor]): Mask weights of all\ - images. Each with shape (num_queries, ). - - num_total_pos (int): Number of positive samples in all\ - images. - - num_total_neg (int): Number of negative samples in all\ - images. + - | ``labels_list`` (list[Tensor]): Labels of all images.\ + Each with shape (num_queries, ). + - | ``label_weights_list`` (list[Tensor]): Label weights\ + of all images. Each with shape (num_queries, ). + - | ``mask_targets_list`` (list[Tensor]): Mask targets of\ + all images. Each with shape (num_queries, h, w). + - | ``mask_weights_list`` (list[Tensor]): Mask weights of\ + all images. Each with shape (num_queries, ). + - | ``num_total_pos`` (int): Number of positive samples in\ + all images. + - | ``num_total_neg`` (int): Number of negative samples in\ + all images. """ (labels_list, label_weights_list, mask_targets_list, mask_weights_list, pos_inds_list, @@ -257,16 +257,16 @@ def _get_target_single(self, cls_score, mask_pred, gt_labels, gt_masks, Returns: tuple[Tensor]: a tuple containing the following for one image. - - labels (Tensor): Labels of each image. - shape (num_queries, ). - - label_weights (Tensor): Label weights of each image. - shape (num_queries, ). - - mask_targets (Tensor): Mask targets of each image. - shape (num_queries, h, w). - - mask_weights (Tensor): Mask weights of each image. - shape (num_queries, ). - - pos_inds (Tensor): Sampled positive indices for each image. - - neg_inds (Tensor): Sampled negative indices for each image. + - | ``labels`` (Tensor): Labels of each image. + shape (num_queries, ). + - | ``label_weights`` (Tensor): Label weights of each image. + shape (num_queries, ). + - | ``mask_targets`` (Tensor): Mask targets of each image. + shape (num_queries, h, w). + - | ``mask_weights`` (Tensor): Mask weights of each image. + shape (num_queries, ). + - | ``pos_inds`` (Tensor): Sampled positive indices for each image. + - | ``neg_inds`` (Tensor): Sampled negative indices for each image. """ target_shape = mask_pred.shape[-2:] if gt_masks.shape[0] > 0: @@ -444,11 +444,13 @@ def forward(self, feats, img_metas): img_metas (list[dict]): List of image information. Returns: - all_cls_scores (Tensor): Classification scores for each\ + tuple: a tuple contains two elements. + + - | ``all_cls_scores`` (Tensor): Classification scores for each\ scale level. Each is a 4D-tensor with shape\ (num_decoder, batch_size, num_queries, cls_out_channels).\ Note `cls_out_channels` should includes background. - all_mask_preds (Tensor): Mask scores for each decoder\ + - | ``all_mask_preds`` (Tensor): Mask scores for each decoder\ layer. Each with shape (num_decoder, batch_size,\ num_queries, h, w). """ @@ -528,7 +530,7 @@ def forward_train(self, ignored. Defaults to None. Returns: - losses (dict[str, Tensor]): a dictionary of loss components + dict[str, Tensor]: a dictionary of loss components """ # not consider ignoring bboxes assert gt_bboxes_ignore is None @@ -607,8 +609,8 @@ def simple_test(self, feats, img_metas, rescale=False): def post_process(self, mask_cls, mask_pred): """Panoptic segmengation inference. - This implementation is modified from\ - https://github.com/facebookresearch/MaskFormer + This implementation is modified from `MaskFormer + `_. Args: mask_cls (Tensor): Classfication outputs for a image. @@ -617,7 +619,7 @@ def post_process(self, mask_cls, mask_pred): shape = (num_queries, h, w). Returns: - panoptic_seg (Tensor): panoptic segment result of shape (h, w),\ + Tensor: panoptic segment result of shape (h, w),\ each element in Tensor means: segment_id = _cls + instance_id * INSTANCE_OFFSET. """ diff --git a/mmdet/models/detectors/maskformer.py b/mmdet/models/detectors/maskformer.py index 17c5d6c895c..73676bcdf50 100644 --- a/mmdet/models/detectors/maskformer.py +++ b/mmdet/models/detectors/maskformer.py @@ -7,7 +7,7 @@ class MaskFormer(SingleStageDetector): r"""Implementation of `Per-Pixel Classification is NOT All You Need for Semantic Segmentation - `_""" + `_.""" def __init__(self, backbone, diff --git a/mmdet/models/plugins/pixel_decoder.py b/mmdet/models/plugins/pixel_decoder.py index f69daf46f9a..76a08db0da7 100644 --- a/mmdet/models/plugins/pixel_decoder.py +++ b/mmdet/models/plugins/pixel_decoder.py @@ -96,8 +96,8 @@ def forward(self, feats, img_metas): Returns: tuple: a tuple containing the following: - - mask_feature (Tensor): Shape (batch_size, c, h, w). - - memory (Tensor): Output of last stage of backbone.\ + - | ``mask_feature`` (Tensor): Shape (batch_size, c, h, w). + - | ``memory`` (Tensor): Output of last stage of backbone.\ Shape (batch_size, c, h, w). """ y = self.last_feat_conv(feats[-1]) @@ -201,8 +201,8 @@ def forward(self, feats, img_metas): Returns: tuple: a tuple containing the following: - - mask_feature (Tensor): shape (batch_size, c, h, w). - - memory (Tensor): shape (batch_size, c, h, w). + - | ``mask_feature`` (Tensor): shape (batch_size, c, h, w). + - | ``memory`` (Tensor): shape (batch_size, c, h, w). """ feat_last = feats[-1] bs, c, h, w = feat_last.shape From fd5ff48c899a631892c9384388606aaca0df0efd Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 04:20:35 +0000 Subject: [PATCH 2/8] update readme --- configs/maskformer/README.md | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md index ce1384ae77e..2cd09f9585a 100644 --- a/configs/maskformer/README.md +++ b/configs/maskformer/README.md @@ -2,19 +2,7 @@ ## Abstract -Modern approaches typically formulate semantic segmentation as a per-pixel classification -task, while instance-level segmentation is handled with an alternative mask -classification. Our key insight: mask classification is sufficiently general to solve -both semantic- and instance-level segmentation tasks in a unified manner using -the exact same model, loss, and training procedure. Following this observation, -we propose MaskFormer, a simple mask classification model which predicts a -set of binary masks, each associated with a single global class label prediction. -Overall, the proposed mask classification-based method simplifies the landscape -of effective approaches to semantic and panoptic segmentation tasks and shows -excellent empirical results. In particular, we observe that MaskFormer outperforms -per-pixel classification baselines when the number of classes is large. Our mask -classification-based method outperforms both current state-of-the-art semantic -(55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models. +Modern approaches typically formulate semantic segmentation as a per-pixel classification task, while instance-level segmentation is handled with an alternative mask classification. Our key insight: mask classification is sufficiently general to solve both semantic- and instance-level segmentation tasks in a unified manner using the exact same model, loss, and training procedure. Following this observation, we propose MaskFormer, a simple mask classification model which predicts a set of binary masks, each associated with a single global class label prediction. Overall, the proposed mask classification-based method simplifies the landscape of effective approaches to semantic and panoptic segmentation tasks and shows excellent empirical results. In particular, we observe that MaskFormer outperforms per-pixel classification baselines when the number of classes is large. Our mask classification-based method outperforms both current state-of-the-art semantic (55.6 mIoU on ADE20K) and panoptic segmentation (52.7 PQ on COCO) models.
@@ -55,6 +43,6 @@ mmdetection ## Results and Models -| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | -| :------: | :-----: | :-----: | :------: | :------------: | :-: | :-: | :-: | :---: | :---: | :---: | :---: | :---: | :---: | :---------------------------------------------------------------------------------------------------------------------: | :----------------------: | :---: | -| R-50 | pytorch | 75e | | | | | | | | | | | | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py) | | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | +| :------: | :-----: | :-----: | :------: | :------------: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :---------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | +| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | From 1d48e8e716442d27606695cf8d8bd5a15f8077d7 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 06:22:25 +0000 Subject: [PATCH 3/8] update readme format --- configs/maskformer/README.md | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md index 2cd09f9585a..46bb484ed9d 100644 --- a/configs/maskformer/README.md +++ b/configs/maskformer/README.md @@ -1,4 +1,8 @@ -# Per-Pixel Classification is Not All You Need for Semantic Segmentation +# MaskFormer + +> [Per-Pixel Classification is Not All You Need for Semantic Segmentation](https://arxiv.org/abs/2107.06278) + + ## Abstract @@ -8,18 +12,7 @@ Modern approaches typically formulate semantic segmentation as a per-pixel class
-## Citation - -``` -@inproceedings{cheng2021maskformer, - title={Per-Pixel Classification is Not All You Need for Semantic Segmentation}, - author={Bowen Cheng and Alexander G. Schwing and Alexander Kirillov}, - journal={NeurIPS}, - year={2021} -} -``` - -## Dataset +## Introduction MaskFormer requires COCO and [COCO-panoptic](http://images.cocodataset.org/annotations/panoptic_annotations_trainval2017.zip) dataset for training and evaluation. You need to download and extract it in the COCO dataset path. The directory should be like this. @@ -43,6 +36,17 @@ mmdetection ## Results and Models -| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | -| :------: | :-----: | :-----: | :------: | :------------: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :----: | :---------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :-------------------------------------------------------------------------------------------------------------------------------------------------: | -| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | +| Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | +|:--------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:--------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:| +| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | + +## Citation + +```latex +@inproceedings{cheng2021maskformer, + title={Per-Pixel Classification is Not All You Need for Semantic Segmentation}, + author={Bowen Cheng and Alexander G. Schwing and Alexander Kirillov}, + journal={NeurIPS}, + year={2021} +} +``` From 907ef22ecdf3e149b8c5102391453c7ee64a1014 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 06:48:26 +0000 Subject: [PATCH 4/8] update link --- configs/maskformer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md index 46bb484ed9d..9ba8dca5648 100644 --- a/configs/maskformer/README.md +++ b/configs/maskformer/README.md @@ -38,7 +38,7 @@ mmdetection | Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | |:--------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:--------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:| -| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | +| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | ## Citation From 02a1033b409261d3b47cd9d8d986c62382cb31be Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 07:00:31 +0000 Subject: [PATCH 5/8] update json link --- configs/maskformer/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/maskformer/README.md b/configs/maskformer/README.md index 9ba8dca5648..54110004c94 100644 --- a/configs/maskformer/README.md +++ b/configs/maskformer/README.md @@ -38,7 +38,7 @@ mmdetection | Backbone | style | Lr schd | Mem (GB) | Inf time (fps) | PQ | SQ | RQ | PQ_th | SQ_th | RQ_th | PQ_st | SQ_st | RQ_st | Config | Download | detail | |:--------:|:-------:|:-------:|:--------:|:--------------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:------:|:--------------------------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------:| -| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | +| R-50 | pytorch | 75e | 16.6 | - | 46.854 | 80.617 | 57.085 | 51.089 | 81.511 | 61.853 | 40.463 | 79.269 | 49.888 | [config](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco.py) | [model](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956.log.json) | This version was mentioned in Table XI, in paper [Masked-attention Mask Transformer for Universal Image Segmentation](https://arxiv.org/abs/2112.01527) | ## Citation From ba1edb4d3e1803c4b267edd9561dc2466eab1480 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 07:18:42 +0000 Subject: [PATCH 6/8] update format of ConfigDict --- .../bbox/assigners/mask_hungarian_assigner.py | 6 +++--- mmdet/models/dense_heads/maskformer_head.py | 20 +++++++++---------- mmdet/models/plugins/pixel_decoder.py | 20 +++++++++---------- 3 files changed, 23 insertions(+), 23 deletions(-) diff --git a/mmdet/core/bbox/assigners/mask_hungarian_assigner.py b/mmdet/core/bbox/assigners/mask_hungarian_assigner.py index ef0f35831d6..d10e62edb84 100644 --- a/mmdet/core/bbox/assigners/mask_hungarian_assigner.py +++ b/mmdet/core/bbox/assigners/mask_hungarian_assigner.py @@ -29,9 +29,9 @@ class MaskHungarianAssigner(BaseAssigner): - positive integer: positive sample, index (1-based) of assigned gt Args: - cls_cost (obj:`mmcv.ConfigDict` | dict): Classification cost config. - mask_cost (obj:`mmcv.ConfigDict` | dict): Mask cost config. - dice_cost (obj:`mmcv.ConfigDict` | dict): Dice cost config. + cls_cost (:obj:`mmcv.ConfigDict` | dict): Classification cost config. + mask_cost (:obj:`mmcv.ConfigDict` | dict): Mask cost config. + dice_cost (:obj:`mmcv.ConfigDict` | dict): Dice cost config. """ def __init__(self, diff --git a/mmdet/models/dense_heads/maskformer_head.py b/mmdet/models/dense_heads/maskformer_head.py index 2234b25a1a0..347a555774c 100644 --- a/mmdet/models/dense_heads/maskformer_head.py +++ b/mmdet/models/dense_heads/maskformer_head.py @@ -28,24 +28,24 @@ class MaskFormerHead(AnchorFreeHead): num_things_classes (int): Number of things. num_stuff_classes (int): Number of stuff. num_queries (int): Number of query in Transformer. - pixel_decoder (obj:`mmcv.ConfigDict` | dict): Config for pixel decoder. - Defaults to None. + pixel_decoder (:obj:`mmcv.ConfigDict` | dict): Config for pixel + decoder. Defaults to None. enforce_decoder_input_project (bool, optional): Whether to add a layer to change the embed_dim of tranformer encoder in pixel decoder to the embed_dim of transformer decoder. Defaults to False. - transformer_decoder (obj:`mmcv.ConfigDict` | dict): Config for + transformer_decoder (:obj:`mmcv.ConfigDict` | dict): Config for transformer decoder. Defaults to None. - positional_encoding (obj:`mmcv.ConfigDict` | dict): Config for + positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for transformer decoder position encoding. Defaults to None. - loss_cls (obj:`mmcv.ConfigDict` | dict): Config of the classification + loss_cls (:obj:`mmcv.ConfigDict` | dict): Config of the classification loss. Defaults to `CrossEntropyLoss`. - loss_mask (obj:`mmcv.ConfigDict` | dict): Config of the mask loss. + loss_mask (:obj:`mmcv.ConfigDict` | dict): Config of the mask loss. Defaults to `FocalLoss`. - loss_dice (obj:`mmcv.ConfigDict` | dict): Config of the dice loss. + loss_dice (:obj:`mmcv.ConfigDict` | dict): Config of the dice loss. Defaults to `DiceLoss`. - train_cfg (obj:`mmcv.ConfigDict` | dict): Training config of Maskformer - head. - test_cfg (obj:`mmcv.ConfigDict` | dict): Testing config of Maskformer + train_cfg (:obj:`mmcv.ConfigDict` | dict): Training config of + Maskformer head. + test_cfg (:obj:`mmcv.ConfigDict` | dict): Testing config of Maskformer head. init_cfg (dict or list[dict], optional): Initialization config dict. Defaults to None. diff --git a/mmdet/models/plugins/pixel_decoder.py b/mmdet/models/plugins/pixel_decoder.py index 76a08db0da7..4b1ff29fcc1 100644 --- a/mmdet/models/plugins/pixel_decoder.py +++ b/mmdet/models/plugins/pixel_decoder.py @@ -17,17 +17,17 @@ class PixelDecoder(BaseModule): input feature maps. feat_channels (int): Number channels for feature. out_channels (int): Number channels for output. - norm_cfg (obj:`mmcv.ConfigDict`|dict): Config for normalization. + norm_cfg (:obj:`mmcv.ConfigDict` | dict): Config for normalization. Defaults to dict(type='GN', num_groups=32). - act_cfg (obj:`mmcv.ConfigDict`|dict): Config for activation. + act_cfg (:obj:`mmcv.ConfigDict` | dict): Config for activation. Defaults to dict(type='ReLU'). - encoder (obj:`mmcv.ConfigDict`|dict): Config for transorformer + encoder (:obj:`mmcv.ConfigDict` | dict): Config for transorformer encoder.Defaults to None. - positional_encoding (obj:`mmcv.ConfigDict`|dict): Config for + positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for transformer encoder position encoding. Defaults to dict(type='SinePositionalEncoding', num_feats=128, normalize=True). - init_cfg (obj:`mmcv.ConfigDict`|dict): Initialization config dict. + init_cfg (:obj:`mmcv.ConfigDict` | dict): Initialization config dict. Default: None """ @@ -122,17 +122,17 @@ class TransformerEncoderPixelDecoder(PixelDecoder): input feature maps. feat_channels (int): Number channels for feature. out_channels (int): Number channels for output. - norm_cfg (obj:`mmcv.ConfigDict`|dict): Config for normalization. + norm_cfg (:obj:`mmcv.ConfigDict` | dict): Config for normalization. Defaults to dict(type='GN', num_groups=32). - act_cfg (obj:`mmcv.ConfigDict`|dict): Config for activation. + act_cfg (:obj:`mmcv.ConfigDict` | dict): Config for activation. Defaults to dict(type='ReLU'). - encoder (obj:`mmcv.ConfigDict`|dict): Config for transorformer + encoder (:obj:`mmcv.ConfigDict` | dict): Config for transorformer encoder.Defaults to None. - positional_encoding (obj:`mmcv.ConfigDict`|dict): Config for + positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for transformer encoder position encoding. Defaults to dict(type='SinePositionalEncoding', num_feats=128, normalize=True). - init_cfg (obj:`mmcv.ConfigDict`|dict): Initialization config dict. + init_cfg (:obj:`mmcv.ConfigDict` | dict): Initialization config dict. Default: None """ From 28cbca4e38a261b5194ed5b2f294d633871a91bb Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 07:34:06 +0000 Subject: [PATCH 7/8] update format of function returns --- mmdet/models/dense_heads/maskformer_head.py | 72 ++++++++++----------- mmdet/models/plugins/pixel_decoder.py | 12 ++-- 2 files changed, 39 insertions(+), 45 deletions(-) diff --git a/mmdet/models/dense_heads/maskformer_head.py b/mmdet/models/dense_heads/maskformer_head.py index 347a555774c..7d7a644c7e1 100644 --- a/mmdet/models/dense_heads/maskformer_head.py +++ b/mmdet/models/dense_heads/maskformer_head.py @@ -177,12 +177,11 @@ def preprocess_gt(self, gt_labels_list, gt_masks_list, gt_semantic_segs): Returns: tuple: a tuple containing the following targets. - - - | ``labels`` (list[Tensor]): Ground truth class indices\ - for all images. Each with shape (n, ), n is the sum of\ - number of stuff type and number of instance in a image. - - | ``masks`` (list[Tensor]): Ground truth mask for each\ - image, each with shape (n, h, w). + - labels (list[Tensor]): Ground truth class indices\ + for all images. Each with shape (n, ), n is the sum of\ + number of stuff type and number of instance in a image. + - masks (list[Tensor]): Ground truth mask for each\ + image, each with shape (n, h, w). """ num_things_list = [self.num_things_classes] * len(gt_labels_list) num_stuff_list = [self.num_stuff_classes] * len(gt_labels_list) @@ -213,19 +212,18 @@ def get_targets(self, cls_scores_list, mask_preds_list, gt_labels_list, Returns: tuple[list[Tensor]]: a tuple containing the following targets. - - - | ``labels_list`` (list[Tensor]): Labels of all images.\ - Each with shape (num_queries, ). - - | ``label_weights_list`` (list[Tensor]): Label weights\ - of all images. Each with shape (num_queries, ). - - | ``mask_targets_list`` (list[Tensor]): Mask targets of\ - all images. Each with shape (num_queries, h, w). - - | ``mask_weights_list`` (list[Tensor]): Mask weights of\ - all images. Each with shape (num_queries, ). - - | ``num_total_pos`` (int): Number of positive samples in\ - all images. - - | ``num_total_neg`` (int): Number of negative samples in\ - all images. + - labels_list (list[Tensor]): Labels of all images.\ + Each with shape (num_queries, ). + - label_weights_list (list[Tensor]): Label weights\ + of all images. Each with shape (num_queries, ). + - mask_targets_list (list[Tensor]): Mask targets of\ + all images. Each with shape (num_queries, h, w). + - mask_weights_list (list[Tensor]): Mask weights of\ + all images. Each with shape (num_queries, ). + - num_total_pos (int): Number of positive samples in\ + all images. + - num_total_neg (int): Number of negative samples in\ + all images. """ (labels_list, label_weights_list, mask_targets_list, mask_weights_list, pos_inds_list, @@ -256,17 +254,16 @@ def _get_target_single(self, cls_score, mask_pred, gt_labels, gt_masks, Returns: tuple[Tensor]: a tuple containing the following for one image. - - - | ``labels`` (Tensor): Labels of each image. - shape (num_queries, ). - - | ``label_weights`` (Tensor): Label weights of each image. - shape (num_queries, ). - - | ``mask_targets`` (Tensor): Mask targets of each image. - shape (num_queries, h, w). - - | ``mask_weights`` (Tensor): Mask weights of each image. - shape (num_queries, ). - - | ``pos_inds`` (Tensor): Sampled positive indices for each image. - - | ``neg_inds`` (Tensor): Sampled negative indices for each image. + - labels (Tensor): Labels of each image. + shape (num_queries, ). + - label_weights (Tensor): Label weights of each image. + shape (num_queries, ). + - mask_targets (Tensor): Mask targets of each image. + shape (num_queries, h, w). + - mask_weights (Tensor): Mask weights of each image. + shape (num_queries, ). + - pos_inds (Tensor): Sampled positive indices for each image. + - neg_inds (Tensor): Sampled negative indices for each image. """ target_shape = mask_pred.shape[-2:] if gt_masks.shape[0] > 0: @@ -445,14 +442,13 @@ def forward(self, feats, img_metas): Returns: tuple: a tuple contains two elements. - - - | ``all_cls_scores`` (Tensor): Classification scores for each\ - scale level. Each is a 4D-tensor with shape\ - (num_decoder, batch_size, num_queries, cls_out_channels).\ - Note `cls_out_channels` should includes background. - - | ``all_mask_preds`` (Tensor): Mask scores for each decoder\ - layer. Each with shape (num_decoder, batch_size,\ - num_queries, h, w). + - all_cls_scores (Tensor): Classification scores for each\ + scale level. Each is a 4D-tensor with shape\ + (num_decoder, batch_size, num_queries, cls_out_channels).\ + Note `cls_out_channels` should includes background. + - all_mask_preds (Tensor): Mask scores for each decoder\ + layer. Each with shape (num_decoder, batch_size,\ + num_queries, h, w). """ batch_size = len(img_metas) input_img_h, input_img_w = img_metas[0]['batch_input_shape'] diff --git a/mmdet/models/plugins/pixel_decoder.py b/mmdet/models/plugins/pixel_decoder.py index 4b1ff29fcc1..d1193551ddd 100644 --- a/mmdet/models/plugins/pixel_decoder.py +++ b/mmdet/models/plugins/pixel_decoder.py @@ -95,10 +95,9 @@ def forward(self, feats, img_metas): Returns: tuple: a tuple containing the following: - - - | ``mask_feature`` (Tensor): Shape (batch_size, c, h, w). - - | ``memory`` (Tensor): Output of last stage of backbone.\ - Shape (batch_size, c, h, w). + - mask_feature (Tensor): Shape (batch_size, c, h, w). + - memory (Tensor): Output of last stage of backbone.\ + Shape (batch_size, c, h, w). """ y = self.last_feat_conv(feats[-1]) for i in range(self.num_inputs - 2, -1, -1): @@ -200,9 +199,8 @@ def forward(self, feats, img_metas): Returns: tuple: a tuple containing the following: - - - | ``mask_feature`` (Tensor): shape (batch_size, c, h, w). - - | ``memory`` (Tensor): shape (batch_size, c, h, w). + - mask_feature (Tensor): shape (batch_size, c, h, w). + - memory (Tensor): shape (batch_size, c, h, w). """ feat_last = feats[-1] bs, c, h, w = feat_last.shape From f0a6eaad4ced75afd609f12b120eacbff7adf0e3 Mon Sep 17 00:00:00 2001 From: luochunhua Date: Thu, 24 Feb 2022 07:40:29 +0000 Subject: [PATCH 8/8] uncomment main in deployment/test.py --- tools/deployment/onnx2tensorrt.py | 12 ++++++++++++ tools/deployment/pytorch2onnx.py | 12 ++++++++++++ tools/deployment/test.py | 13 +++++++++++++ 3 files changed, 37 insertions(+) diff --git a/tools/deployment/onnx2tensorrt.py b/tools/deployment/onnx2tensorrt.py index e3e9b57d2b4..b59e52ae199 100644 --- a/tools/deployment/onnx2tensorrt.py +++ b/tools/deployment/onnx2tensorrt.py @@ -252,3 +252,15 @@ def parse_shape(shape): show=args.show, workspace_size=args.workspace_size, verbose=args.verbose) + + # Following strings of text style are from colorama package + bright_style, reset_style = '\x1b[1m', '\x1b[0m' + red_text, blue_text = '\x1b[31m', '\x1b[34m' + white_background = '\x1b[107m' + + msg = white_background + bright_style + red_text + msg += 'DeprecationWarning: This tool will be deprecated in future. ' + msg += blue_text + 'Welcome to use the unified model deployment toolbox ' + msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy' + msg += reset_style + warnings.warn(msg) diff --git a/tools/deployment/pytorch2onnx.py b/tools/deployment/pytorch2onnx.py index c1789b442a7..5c786f8540e 100644 --- a/tools/deployment/pytorch2onnx.py +++ b/tools/deployment/pytorch2onnx.py @@ -343,3 +343,15 @@ def parse_args(): do_simplify=args.simplify, dynamic_export=args.dynamic_export, skip_postprocess=args.skip_postprocess) + + # Following strings of text style are from colorama package + bright_style, reset_style = '\x1b[1m', '\x1b[0m' + red_text, blue_text = '\x1b[31m', '\x1b[34m' + white_background = '\x1b[107m' + + msg = white_background + bright_style + red_text + msg += 'DeprecationWarning: This tool will be deprecated in future. ' + msg += blue_text + 'Welcome to use the unified model deployment toolbox ' + msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy' + msg += reset_style + warnings.warn(msg) diff --git a/tools/deployment/test.py b/tools/deployment/test.py index b32b77332e5..2daf8866e58 100644 --- a/tools/deployment/test.py +++ b/tools/deployment/test.py @@ -1,5 +1,6 @@ # Copyright (c) OpenMMLab. All rights reserved. import argparse +import warnings import mmcv from mmcv import Config, DictAction @@ -141,3 +142,15 @@ def main(): if __name__ == '__main__': main() + + # Following strings of text style are from colorama package + bright_style, reset_style = '\x1b[1m', '\x1b[0m' + red_text, blue_text = '\x1b[31m', '\x1b[34m' + white_background = '\x1b[107m' + + msg = white_background + bright_style + red_text + msg += 'DeprecationWarning: This tool will be deprecated in future. ' + msg += blue_text + 'Welcome to use the unified model deployment toolbox ' + msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy' + msg += reset_style + warnings.warn(msg)