PaddlePaddle · nemonameless · Feb 3, 2023 · Jan 31, 2023 · Feb 2, 2023 · Feb 2, 2023
diff --git a/configs/ppyoloe/distill/README.md b/configs/ppyoloe/distill/README.md
@@ -0,0 +1,40 @@
+# PPYOLOE+ Distillation(PPYOLOE+ 蒸馏)
+
+PaddleDetection提供了对PPYOLOE+ 进行模型蒸馏的方案，结合了logits蒸馏和feature蒸馏。
+
+
+## 模型库
+
+
+
+## 快速开始
+
+### 训练
+```shell
+# 单卡
+python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml
+# 多卡
+python3.7 -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_to_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml
+```
+
+- `-c`: 指定模型配置文件，也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件，也是teacher配置文件。
+
+### 评估
+```shell
+python tools/eval.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=output/ppyoloe_plus_crn_l_80e_coco_distill/model_final.pdparams
+```
+
+- `-c`: 指定模型配置文件，也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件，也是teacher配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+
+### 测试
+```shell
+python tools/infer.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=output/ppyoloe_plus_crn_l_80e_coco_distill/model_final.pdparams --infer_img=demo/000000014439_640x640.jpg
+```
+
+- `-c`: 指定模型配置文件。
+- `--slim_config`: 指定压缩策略配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+- `--infer_img`: 指定测试图像路径。
diff --git a/configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml b/configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml
@@ -0,0 +1,19 @@
+_BASE_: [
+  '../ppyoloe_plus_crn_l_80e_coco.yml',
+]
+for_distill: True
+architecture: PPYOLOE
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/ppyoloe_plus_crn_l_80e_coco/model_final
+
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_l_obj365_pretrained.pdparams
+depth_mult: 1.0
+width_mult: 1.0
diff --git a/configs/ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml b/configs/ppyoloe/distill/ppyoloe_plus_crn_m_80e_coco_distill.yml
@@ -0,0 +1,19 @@
+_BASE_: [
+  '../ppyoloe_plus_crn_m_80e_coco.yml',
+]
+for_distill: True
+architecture: PPYOLOE
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/ppyoloe_plus_crn_m_80e_coco/model_final
+
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_m_obj365_pretrained.pdparams
+depth_mult: 0.67
+width_mult: 0.75
diff --git a/configs/ppyoloe/distill/ppyoloe_plus_crn_s_80e_coco_distill.yml b/configs/ppyoloe/distill/ppyoloe_plus_crn_s_80e_coco_distill.yml
@@ -0,0 +1,19 @@
+_BASE_: [
+  '../ppyoloe_plus_crn_s_80e_coco.yml',
+]
+for_distill: True
+architecture: PPYOLOE
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/ppyoloe_plus_crn_s_80e_coco/model_final
+
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ppyoloe_crn_s_obj365_pretrained.pdparams
+depth_mult: 0.33
+width_mult: 0.50
diff --git a/configs/slim/distill/README.md b/configs/slim/distill/README.md
@@ -38,6 +38,42 @@ CWD全称为[Channel-wise Knowledge Distillation for Dense Prediction*](https://
 |gfl_r50_fpn_1x| student | 41.0 |[download](https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_1x_coco.pdparams) |
 |gfl_r50_fpn_2x + CWD| student | 44.0 |[download](https://paddledet.bj.bcebos.com/models/gfl_r50_fpn_2x_coco_cwd.pdparams) |
 
+## PPYOLOE+模型蒸馏
+
+
+
+## 快速开始
+
+### 训练
+```shell
+# 单卡
+python tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml
+# 多卡
+python3.7 -m paddle.distributed.launch --log_dir=ppyoloe_plus_distill_x_to_l/ --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml --slim_config configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml
+```
+
+- `-c`: 指定模型配置文件，也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件，也是teacher配置文件。
+
+### 评估
+```shell
+python tools/eval.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=output/ppyoloe_plus_crn_l_80e_coco_distill/model_final.pdparams
+```
+
+- `-c`: 指定模型配置文件，也是student配置文件。
+- `--slim_config`: 指定压缩策略配置文件，也是teacher配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+
+### 测试
+```shell
+python tools/infer.py -c configs/ppyoloe/distill/ppyoloe_plus_crn_l_80e_coco_distill.yml -o weights=output/ppyoloe_plus_crn_l_80e_coco_distill/model_final.pdparams --infer_img=demo/000000014439_640x640.jpg
+```
+
+- `-c`: 指定模型配置文件。
+- `--slim_config`: 指定压缩策略配置文件。
+- `-o weights`: 指定压缩算法训好的模型路径。
+- `--infer_img`: 指定测试图像路径。
+
 
 ## Citations
 ```

diff --git a/configs/slim/distill/gfl_r101vd_fpn_coco_distill_cwd.yml b/configs/slim/distill/gfl_r101vd_fpn_coco_distill_cwd.yml
@@ -6,10 +6,10 @@ pretrain_weights: https://paddledet.bj.bcebos.com/models/gfl_r101vd_fpn_mstrain_
 
 slim: Distill
 slim_method: CWD
-distill_loss: ChannelWiseDivergence
+distill_loss: CWDFeatureLoss
 distill_loss_name: ['cls_f_4', 'cls_f_3', 'cls_f_2', 'cls_f_1', 'cls_f_0']
 
-ChannelWiseDivergence:
+CWDFeatureLoss:
   student_channels: 80
   teacher_channels: 80
   tau: 1.0

diff --git a/configs/slim/distill/ppyoloe_plus_distill_l_to_m.yml b/configs/slim/distill/ppyoloe_plus_distill_l_to_m.yml
@@ -0,0 +1,34 @@
+# teacher and slim config
+_BASE_: [
+  '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml',
+]
+depth_mult: 1.0
+width_mult: 1.0
+
+architecture: PPYOLOE
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+  for_distill: True
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_l_80e_coco.pdparams
+find_unused_parameters: True
+for_distill: True
+
+
+slim: Distill
+slim_method: PPYOLOEDistill
+distill_loss: DistillPPYOLOELoss
+
+DistillPPYOLOELoss: # L -> M
+  loss_weight: {'logits': 4.0, 'feat': 1.0}
+  logits_distill: True
+  logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5}
+  feat_distill: True
+  feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
+  feat_distill_place: 'neck_feats'
+  teacher_width_mult: 1.0  # L
+  student_width_mult: 0.75  # M
+  feat_out_channels: [768, 384, 192]  # The actual channel will multiply width_mult
diff --git a/configs/slim/distill/ppyoloe_plus_distill_m_to_s.yml b/configs/slim/distill/ppyoloe_plus_distill_m_to_s.yml
@@ -0,0 +1,34 @@
+# teacher and slim config
+_BASE_: [
+  '../../ppyoloe/ppyoloe_plus_crn_l_80e_coco.yml',
+]
+depth_mult: 0.67
+width_mult: 0.75
+
+architecture: PPYOLOE
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+  for_distill: True
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_m_80e_coco.pdparams
+find_unused_parameters: True
+for_distill: True
+
+
+slim: Distill
+slim_method: PPYOLOEDistill
+distill_loss: DistillPPYOLOELoss
+
+DistillPPYOLOELoss: # M -> S
+  loss_weight: {'logits': 4.0, 'feat': 1.0}
+  logits_distill: True
+  logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5}
+  feat_distill: True
+  feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
+  feat_distill_place: 'neck_feats'
+  teacher_width_mult: 0.75  # M
+  student_width_mult: 0.5  # S
+  feat_out_channels: [768, 384, 192]  # The actual channel will multiply width_mult
diff --git a/configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml b/configs/slim/distill/ppyoloe_plus_distill_x_to_l.yml
@@ -0,0 +1,34 @@
+# teacher and slim config
+_BASE_: [
+  '../../ppyoloe/ppyoloe_plus_crn_x_80e_coco.yml',
+]
+depth_mult: 1.33
+width_mult: 1.25
+
+architecture: PPYOLOE
+PPYOLOE:
+  backbone: CSPResNet
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+  for_distill: True
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/ppyoloe_plus_crn_x_80e_coco.pdparams
+find_unused_parameters: True
+for_distill: True
+
+
+slim: Distill
+slim_method: PPYOLOEDistill
+distill_loss: DistillPPYOLOELoss
+
+DistillPPYOLOELoss: # X -> L
+  loss_weight: {'logits': 4.0, 'feat': 1.0}
+  logits_distill: True
+  logits_loss_weight: {'class': 1.0, 'iou': 2.5, 'dfl': 0.5}
+  feat_distill: True
+  feat_distiller: 'fgd' # ['cwd', 'fgd', 'pkd', 'mgd', 'mimic']
+  feat_distill_place: 'neck_feats'
+  teacher_width_mult: 1.25 # X
+  student_width_mult: 1.0 # L
+  feat_out_channels: [768, 384, 192]  # The actual channel will multiply width_mult
diff --git a/configs/slim/distill/retinanet_resnet101_coco_distill_cwd.yml b/configs/slim/distill/retinanet_resnet101_coco_distill_cwd.yml
@@ -7,12 +7,11 @@ pretrain_weights: https://paddledet.bj.bcebos.com/models/retinanet_r101_fpn_2x_c
 
 slim: Distill
 slim_method: CWD
-distill_loss: ChannelWiseDivergence
+distill_loss: CWDFeatureLoss
 distill_loss_name: ['cls_f_4', 'cls_f_3', 'cls_f_2', 'cls_f_1', 'cls_f_0']
 
-ChannelWiseDivergence:
+CWDFeatureLoss:
   student_channels: 80
   teacher_channels: 80
-  name: cwdloss
   tau: 1.0
   weight: 5.0
diff --git a/ppdet/modeling/architectures/ppyoloe.py b/ppdet/modeling/architectures/ppyoloe.py
@@ -22,20 +22,23 @@
 from .meta_arch import BaseArch
 
 __all__ = ['PPYOLOE', 'PPYOLOEWithAuxHead']
-# PP-YOLOE and PP-YOLOE+ are recommended to use this architecture
-# PP-YOLOE and PP-YOLOE+ can also use the same architecture of YOLOv3 in yolo.py
+# PP-YOLOE and PP-YOLOE+ are recommended to use this architecture, especially when use distillation or aux head
+# PP-YOLOE and PP-YOLOE+ can also use the same architecture of YOLOv3 in yolo.py when not use distillation or aux head
 
 
 @register
 class PPYOLOE(BaseArch):
     __category__ = 'architecture'
+    __shared__ = ['for_distill']
     __inject__ = ['post_process']
 
     def __init__(self,
                  backbone='CSPResNet',
                  neck='CustomCSPPAN',
                  yolo_head='PPYOLOEHead',
                  post_process='BBoxPostProcess',
+                 for_distill=False,
+                 feat_distill_place='neck_feats',
                  for_mot=False):
         """
         PPYOLOE network, see https://arxiv.org/abs/2203.16250
@@ -54,6 +57,10 @@ def __init__(self,
         self.yolo_head = yolo_head
         self.post_process = post_process
         self.for_mot = for_mot
+        self.for_distill = for_distill
+        self.feat_distill_place = feat_distill_place
+        if for_distill:
+            assert feat_distill_place in ['backbone_feats', 'neck_feats']
 
     @classmethod
     def from_config(cls, cfg, *args, **kwargs):
@@ -80,17 +87,31 @@ def _forward(self):
 
         if self.training:
             yolo_losses = self.yolo_head(neck_feats, self.inputs)
+
+            if self.for_distill:
+                if self.feat_distill_place == 'backbone_feats':
+                    self.yolo_head.distill_pairs['backbone_feats'] = body_feats
+                elif self.feat_distill_place == 'neck_feats':
+                    self.yolo_head.distill_pairs['neck_feats'] = neck_feats
+                else:
+                    raise ValueError
             return yolo_losses
         else:
+            cam_data = {}  # record bbox scores and index before nms
             yolo_head_outs = self.yolo_head(neck_feats)
+            cam_data['scores'] = yolo_head_outs[0]
+
             if self.post_process is not None:
-                bbox, bbox_num = self.post_process(
+                bbox, bbox_num, before_nms_indexes = self.post_process(
                     yolo_head_outs, self.yolo_head.mask_anchors,
                     self.inputs['im_shape'], self.inputs['scale_factor'])
+                cam_data['before_nms_indexes'] = before_nms_indexes
             else:
-                bbox, bbox_num = self.yolo_head.post_process(
+                bbox, bbox_num, before_nms_indexes = self.yolo_head.post_process(
                     yolo_head_outs, self.inputs['scale_factor'])
-            output = {'bbox': bbox, 'bbox_num': bbox_num}
+                # data for cam
+                cam_data['before_nms_indexes'] = before_nms_indexes
+            output = {'bbox': bbox, 'bbox_num': bbox_num, 'cam_data': cam_data}
 
             return output
 
@@ -180,15 +201,21 @@ def _forward(self):
                 aux_pred=[aux_cls_scores, aux_bbox_preds])
             return loss
         else:
+            cam_data = {}  # record bbox scores and index before nms
             yolo_head_outs = self.yolo_head(neck_feats)
+            cam_data['scores'] = yolo_head_outs[0]
+
             if self.post_process is not None:
-                bbox, bbox_num = self.post_process(
+                bbox, bbox_num, before_nms_indexes = self.post_process(
                     yolo_head_outs, self.yolo_head.mask_anchors,
                     self.inputs['im_shape'], self.inputs['scale_factor'])
+                cam_data['before_nms_indexes'] = before_nms_indexes
             else:
-                bbox, bbox_num = self.yolo_head.post_process(
+                bbox, bbox_num, before_nms_indexes = self.yolo_head.post_process(
                     yolo_head_outs, self.inputs['scale_factor'])
-            output = {'bbox': bbox, 'bbox_num': bbox_num}
+                # data for cam
+                cam_data['before_nms_indexes'] = before_nms_indexes
+            output = {'bbox': bbox, 'bbox_num': bbox_num, 'cam_data': cam_data}
 
             return output
 

diff --git a/ppdet/modeling/architectures/yolo.py b/ppdet/modeling/architectures/yolo.py
@@ -22,7 +22,7 @@
 
 __all__ = ['YOLOv3']
 # YOLOv3,PP-YOLO,PP-YOLOv2,PP-YOLOE,PP-YOLOE+ use the same architecture as YOLOv3
-# PP-YOLOE and PP-YOLOE+ are recommended to use PPYOLOE architecture in ppyoloe.py
+# PP-YOLOE and PP-YOLOE+ are recommended to use PPYOLOE architecture in ppyoloe.py, especially when use distillation or aux head
 
 
 @register

diff --git a/ppdet/modeling/assigners/atss_assigner.py b/ppdet/modeling/assigners/atss_assigner.py
@@ -221,4 +221,4 @@ def forward(self,
                                          paddle.zeros_like(gather_scores))
             assigned_scores *= gather_scores.unsqueeze(-1)
 
-        return assigned_labels, assigned_bboxes, assigned_scores
+        return assigned_labels, assigned_bboxes, assigned_scores, mask_positive
diff --git a/ppdet/modeling/assigners/task_aligned_assigner.py b/ppdet/modeling/assigners/task_aligned_assigner.py
@@ -190,4 +190,4 @@ def forward(self,
         alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
         assigned_scores = assigned_scores * alignment_metrics
 
-        return assigned_labels, assigned_bboxes, assigned_scores
+        return assigned_labels, assigned_bboxes, assigned_scores, mask_positive