diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
index 18232da1a6..d5cd34f8ee 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py
index b4ab95a9f7..cbe431a9a5 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/higherhrnet_w32_aic_512x512_udp.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
index b47d60d2d5..5178f2e634 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/aic/hrnet_w32_aic_512x512.py
@@ -98,9 +98,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -108,6 +106,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
index c90d963323..96428ceb45 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512.py
@@ -103,9 +103,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -113,6 +111,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
index fce8a984e4..eb4a405552 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_512x512_udp.py
@@ -103,9 +103,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -113,6 +111,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
index be70c08c8f..9b1d291bdd 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640.py
@@ -103,9 +103,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -113,6 +111,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py
index a188c49658..126bde9550 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w32_coco_640x640_udp.py
@@ -103,9 +103,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -113,6 +111,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
index 9c2556dcea..6f75ab50a4 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512.py
@@ -103,9 +103,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -113,6 +111,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
index 21e4e9e676..a0a6ffb3df 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/higherhrnet_w48_coco_512x512_udp.py
@@ -103,9 +103,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -113,6 +111,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
index af264c5d80..e6e090d391 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py
index fcce049a87..a5f70806c7 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_512x512_udp.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py
index 3932853586..f3daa0411d 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py
index d7bd592ddf..5a320c2f34 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w32_coco_640x640_udp.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
index 2f1d9c2b4c..1ccdf89e78 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py
index 8be8badf1a..f13132c114 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_512x512_udp.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py
index 3696316448..4fe7b44f05 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py
index c1dc7849f9..5d24f6f969 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/hrnet_w48_coco_640x640_udp.py
@@ -99,9 +99,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -109,6 +107,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
index 3f5dc99686..0c14892d25 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/mobilenetv2_coco_512x512.py
@@ -68,9 +68,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
index 025daa8266..4d4a34007e 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_512x512.py
@@ -68,9 +68,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py
index 8d0ae90a12..3512603970 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res101_coco_640x640.py
@@ -68,9 +68,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
index 4cb0aeb580..e2f8ab7f2d 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_512x512.py
@@ -68,9 +68,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py
index d75f69e511..b2d43d4e9e 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res152_coco_640x640.py
@@ -68,9 +68,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
index 058cac49ef..a510c5aaa4 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_512x512.py
@@ -69,9 +69,7 @@
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0],
         )),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -79,6 +77,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
index 68e1716705..100635bd07 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/coco/res50_coco_640x640.py
@@ -68,9 +68,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
index 4746ae594f..5bffcbc8fd 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py
index 7538300d2c..9c54b293a1 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_512x512_udp.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py
index ece8ccbb9b..478d739270 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py
index 24c33b3d60..eef885cb39 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w32_crowdpose_640x640_udp.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py
index 8addf87d6c..a45f73129e 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py
index 0ee83bba23..d31c636d60 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/higherhrnet_w48_crowdpose_512x512_udp.py
@@ -102,9 +102,7 @@
             pull_loss_factor=[0.001, 0.001],
             with_heatmaps_loss=[True, True],
             heatmaps_loss_factor=[1.0, 1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -112,6 +110,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=False,
+        align_corners=True,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py
index 2f035e99e6..b4037a6871 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/mobilenetv2_crowdpose_512x512.py
@@ -67,9 +67,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -77,6 +75,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py
index 8fb3959670..65c32641fa 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res101_crowdpose_512x512.py
@@ -67,9 +67,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -77,6 +75,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py
index 45d740e699..0680370f20 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res152_crowdpose_512x512.py
@@ -67,9 +67,7 @@
             pull_loss_factor=[0.001],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -77,6 +75,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py
index aa161e3fe2..bece913c60 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/crowdpose/res50_crowdpose_512x512.py
@@ -68,9 +68,7 @@
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0],
         )),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -78,6 +76,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py
index 27ce967f17..5296a3136f 100644
--- a/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py
+++ b/configs/body/2d_kpt_sview_rgb_img/associative_embedding/mhp/hrnet_w48_mhp_512x512.py
@@ -97,9 +97,7 @@
             pull_loss_factor=[0.01],
             with_heatmaps_loss=[True],
             heatmaps_loss_factor=[1.0])),
-    train_cfg=dict(
-        num_joints=channel_cfg['dataset_joints'],
-        img_size=data_cfg['image_size']),
+    train_cfg=dict(),
     test_cfg=dict(
         num_joints=channel_cfg['dataset_joints'],
         max_num_people=30,
@@ -107,6 +105,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/hrnet_w32_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/hrnet_w32_coco_512x512.py
new file mode 100644
index 0000000000..24d6b7d1b8
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/hrnet_w32_coco_512x512.py
@@ -0,0 +1,213 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    with_bg=True,
+    scale_aware_sigma=False,
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='https://download.openmmlab.com/mmpose/'
+    'pretrain_models/hrnet_w32-36af842e.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256))),
+    ),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=32,
+                out_channels=17,
+                num_deconv_layers=0,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ],
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=32,
+                out_channels=38,
+                num_deconv_layers=0,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ],
+        heatmap_index=[0],
+        paf_index=[0],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0, 1],
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368.py
new file mode 100644
index 0000000000..c391cb7322
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368.py
@@ -0,0 +1,179 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/mobilenetv1_sgd_68.848.pth',
+    backbone=dict(
+        type='LightweightOpenPoseNetwork',
+        in_channels=3,
+        out_channels_cm=17,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=2),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        heatmap_index=[0, 1],
+        paf_index=[2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 2 + [1] * 2,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck.py
new file mode 100644
index 0000000000..af29f3d682
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck.py
@@ -0,0 +1,183 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True,
+    add_neck=True,
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/mobilenetv1_sgd_68.848.pth',
+    backbone=dict(
+        type='LightweightOpenPoseNetwork',
+        in_channels=3,
+        out_channels_cm=19,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=2),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        heatmap_index=[0, 1],
+        paf_index=[2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg'],
+        add_neck=data_cfg['add_neck']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    add_neck=data_cfg['add_neck'],
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 2 + [1] * 2,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_bgr_meanstd.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_bgr_meanstd.py
new file mode 100644
index 0000000000..32c2800b07
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_bgr_meanstd.py
@@ -0,0 +1,177 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True,
+    add_neck=True,
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/mobilenetv1_sgd_68.848.pth',
+    backbone=dict(
+        type='LightweightOpenPoseNetwork',
+        in_channels=3,
+        out_channels_cm=19,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=2),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        heatmap_index=[0, 1],
+        paf_index=[2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg'],
+        add_neck=data_cfg['add_neck']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', channel_order='bgr'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(type='NormalizeTensor', mean=[0.5, 0.5, 0.5], std=[1., 1., 1.]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    add_neck=data_cfg['add_neck'],
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 2 + [1] * 2,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile', channel_order='bgr'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor', mean=[0.5, 0.5, 0.5], std=[1., 1.,
+                                                                   1.]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_bgr_meanstd_lr015.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_bgr_meanstd_lr015.py
new file mode 100644
index 0000000000..eb4c160478
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_bgr_meanstd_lr015.py
@@ -0,0 +1,177 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True,
+    add_neck=True,
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/mobilenetv1_sgd_68.848.pth',
+    backbone=dict(
+        type='LightweightOpenPoseNetwork',
+        in_channels=3,
+        out_channels_cm=19,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=2),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        heatmap_index=[0, 1],
+        paf_index=[2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg'],
+        add_neck=data_cfg['add_neck']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', channel_order='bgr'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(type='NormalizeTensor', mean=[0.5, 0.5, 0.5], std=[1., 1., 1.]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    add_neck=data_cfg['add_neck'],
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 2 + [1] * 2,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile', channel_order='bgr'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor', mean=[0.5, 0.5, 0.5], std=[1., 1.,
+                                                                   1.]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_lr00015.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_lr00015.py
new file mode 100644
index 0000000000..7e91e6f004
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_lr00015.py
@@ -0,0 +1,182 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.00015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    gamma=0.333,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True,
+    add_neck=True,
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/mobilenetv1_sgd_68.848.pth',
+    backbone=dict(
+        type='LightweightOpenPoseNetwork',
+        in_channels=3,
+        out_channels_cm=19,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=2),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        heatmap_index=[0, 1],
+        paf_index=[2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg'],
+        add_neck=data_cfg['add_neck']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    add_neck=data_cfg['add_neck'],
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 2 + [1] * 2,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_lr015.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_lr015.py
new file mode 100644
index 0000000000..00f11dd14e
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/lightweight_openpose_coco_368x368_withbg_addneck_lr015.py
@@ -0,0 +1,181 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True,
+    add_neck=True,
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/mobilenetv1_sgd_68.848.pth',
+    backbone=dict(
+        type='LightweightOpenPoseNetwork',
+        in_channels=3,
+        out_channels_cm=19,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=2),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 2,
+        heatmap_index=[0, 1],
+        paf_index=[2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg'],
+        add_neck=data_cfg['add_neck']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    add_neck=data_cfg['add_neck'],
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 2 + [1] * 2,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_sn_coco_368x368.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_sn_coco_368x368.py
new file mode 100644
index 0000000000..3bf0c84b69
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_sn_coco_368x368.py
@@ -0,0 +1,182 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        out_channels_cm=17,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=5,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=(8, 8, 8, 8, 5),
+        block_channels=(128, 128, 128, 256, 256)),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 4,
+        heatmap_index=[4],
+        paf_index=[0, 1, 2, 3],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 4,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v1_coco_368x368.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v1_coco_368x368.py
new file mode 100644
index 0000000000..7e7156e8ca
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v1_coco_368x368.py
@@ -0,0 +1,179 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV1',
+        in_channels=3,
+        out_channels_cm=17,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=6),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 6,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 6,
+        heatmap_index=[0, 1, 2, 3, 4, 5],
+        paf_index=[6, 7, 8, 9, 10, 11],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 6 + [1] * 6,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v1_coco_368x368_withbg.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v1_coco_368x368_withbg.py
new file mode 100644
index 0000000000..5d10a5b72b
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v1_coco_368x368_withbg.py
@@ -0,0 +1,180 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV1',
+        in_channels=3,
+        # additional channel for bg
+        out_channels_cm=17 + 1,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=6),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 6,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 6,
+        heatmap_index=[0, 1, 2, 3, 4, 5],
+        paf_index=[6, 7, 8, 9, 10, 11],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 6 + [1] * 6,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368.py
new file mode 100644
index 0000000000..fcda6a04f5
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368.py
@@ -0,0 +1,182 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        out_channels_cm=17,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=6,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=5,
+        block_channels=96),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 5,
+        heatmap_index=[5],
+        paf_index=[0, 1, 2, 3, 4],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 5,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg.py
new file mode 100644
index 0000000000..0b1020d6bd
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg.py
@@ -0,0 +1,183 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        # additional channel for bg
+        out_channels_cm=17 + 1,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=6,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=5,
+        block_channels=96),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 5,
+        heatmap_index=[5],
+        paf_index=[0, 1, 2, 3, 4],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 5,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg_addneck.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg_addneck.py
new file mode 100644
index 0000000000..aa3f470da0
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg_addneck.py
@@ -0,0 +1,187 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True,
+    add_neck=True
+)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        # additional channel for bg
+        out_channels_cm=19,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=6,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=5,
+        block_channels=96),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 5,
+        heatmap_index=[5],
+        paf_index=[0, 1, 2, 3, 4],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg'],
+        add_neck=data_cfg['add_neck']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    add_neck=data_cfg['add_neck'],
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 5,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg_sgd.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg_sgd.py
new file mode 100644
index 0000000000..a33e52098d
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/openpose_v2_coco_368x368_withbg_sgd.py
@@ -0,0 +1,183 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='SGD',
+    lr=0.01,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=368,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[46],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg19_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        # additional channel for bg
+        out_channels_cm=17 + 1,
+        out_channels_paf=38,
+        stem_feat_channels=128,
+        num_stages=6,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=5,
+        block_channels=96),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 5,
+        heatmap_index=[5],
+        paf_index=[0, 1, 2, 3, 4],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 5,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/res50_coco_512x512.py b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/res50_coco_512x512.py
new file mode 100644
index 0000000000..239901be51
--- /dev/null
+++ b/configs/body/2d_kpt_sview_rgb_img/part_affinity_field/coco/res50_coco_512x512.py
@@ -0,0 +1,181 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=17,
+    dataset_channel=[
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
+    ],
+    inference_channel=[
+        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+    ])
+
+data_cfg = dict(
+    image_size=512,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[128],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='torchvision://resnet50',
+    backbone=dict(type='ResNet', depth=50),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=2048,
+                out_channels=17,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ],
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=2048,
+                out_channels=38,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ],
+        heatmap_index=[0],
+        paf_index=[0],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[1],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0, 1],
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[1]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoDataset',
+        ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py
index db89340076..c4060f7c4d 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_512x512.py
@@ -113,6 +113,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py
index 4483e6eb81..09c1562b5a 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w32_coco_wholebody_640x640.py
@@ -113,6 +113,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py
index b05b0ad812..0ecbe2f070 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_512x512.py
@@ -113,6 +113,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py
index 38a6938ab3..b406b94b86 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/higherhrnet_w48_coco_wholebody_640x640.py
@@ -113,6 +113,7 @@
         with_heatmaps=[True, True],
         with_ae=[True, False],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py
index 06ff2edf81..2cee3f056a 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_512x512.py
@@ -109,6 +109,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py
index 595ecb0052..40df05bc05 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w32_coco_wholebody_640x640.py
@@ -109,6 +109,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py
index a9a567b48f..d62412a6fe 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_512x512.py
@@ -109,6 +109,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py
index c320d625b2..2b0f7cfb92 100644
--- a/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/associative_embedding/coco-wholebody/hrnet_w48_coco_wholebody_640x640.py
@@ -109,6 +109,7 @@
         with_heatmaps=[True],
         with_ae=[True],
         project2image=True,
+        align_corners=False,
         nms_kernel=5,
         nms_padding=2,
         tag_per_joint=True,
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/hrnet_w32_coco_wholebody_480x480.py b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/hrnet_w32_coco_wholebody_480x480.py
new file mode 100644
index 0000000000..80d3768834
--- /dev/null
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/hrnet_w32_coco_wholebody_480x480.py
@@ -0,0 +1,210 @@
+log_level = 'INFO'
+load_from = None
+resume_from = 'work_dirs/hrnet_w32_coco_wholebody_480x480/epoch_150.pth'
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=133,
+    dataset_channel=[
+        list(range(133)),
+    ],
+    inference_channel=list(range(133)))
+
+data_cfg = dict(
+    image_size=480,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[120],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='https://download.openmmlab.com/mmpose/'
+    'pretrain_models/hrnet_w32-36af842e.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256))),
+    ),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=32,
+                out_channels=133,
+                num_deconv_layers=0,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=True)),
+        ],
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=32,
+                out_channels=270,
+                num_deconv_layers=0,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=True)),
+        ],
+        heatmap_index=[0],
+        paf_index=[0],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[0.5, 1, 1.5, 2, 2.5],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0, 1],
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[0.5, 1, 1.5, 2, 2.5]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/hrnet_w32_coco_wholebody_480x480_withbg.py b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/hrnet_w32_coco_wholebody_480x480_withbg.py
new file mode 100644
index 0000000000..2aa4f9ed0e
--- /dev/null
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/hrnet_w32_coco_wholebody_480x480_withbg.py
@@ -0,0 +1,210 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    gamma=0.333,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=133,
+    dataset_channel=[
+        list(range(133)),
+    ],
+    inference_channel=list(range(133)))
+
+data_cfg = dict(
+    image_size=480,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[120],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/hrnet_w32_coco_512x512/best.pth',
+    backbone=dict(
+        type='HRNet',
+        in_channels=3,
+        extra=dict(
+            stage1=dict(
+                num_modules=1,
+                num_branches=1,
+                block='BOTTLENECK',
+                num_blocks=(4, ),
+                num_channels=(64, )),
+            stage2=dict(
+                num_modules=1,
+                num_branches=2,
+                block='BASIC',
+                num_blocks=(4, 4),
+                num_channels=(32, 64)),
+            stage3=dict(
+                num_modules=4,
+                num_branches=3,
+                block='BASIC',
+                num_blocks=(4, 4, 4),
+                num_channels=(32, 64, 128)),
+            stage4=dict(
+                num_modules=3,
+                num_branches=4,
+                block='BASIC',
+                num_blocks=(4, 4, 4, 4),
+                num_channels=(32, 64, 128, 256))),
+    ),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=32,
+                out_channels=134,
+                num_deconv_layers=0,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=False)),
+        ],
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                in_channels=32,
+                out_channels=270,
+                num_deconv_layers=0,
+                extra=dict(
+                    final_conv_kernel=1,
+                    num_conv_layers=1,
+                    num_conv_kernels=(1, )),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=False)),
+        ],
+        heatmap_index=[0],
+        paf_index=[0],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[0.5, 1, 1.5, 2, 2.5],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0, 1],
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[0.5, 1, 1.5, 2, 2.5]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v1_coco_wholebody_480x480.py b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v1_coco_wholebody_480x480.py
new file mode 100644
index 0000000000..cd34eb9f3a
--- /dev/null
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v1_coco_wholebody_480x480.py
@@ -0,0 +1,177 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=133,
+    dataset_channel=[
+        list(range(133)),
+    ],
+    inference_channel=list(range(133)))
+
+data_cfg = dict(
+    image_size=480,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[60],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg16_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV1',
+        in_channels=3,
+        out_channels_cm=133,
+        out_channels_paf=270,
+        stem_feat_channels=128,
+        num_stages=6),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 6,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(type='MaskedMSELoss', )),
+        ] * 6,
+        heatmap_index=[0, 1, 2, 3, 4, 5],
+        paf_index=[6, 7, 8, 9, 10, 11],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[0.5, 1, 1.5, 2, 2.5],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 6 + [1] * 6,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[0.5, 1, 1.5, 2, 2.5]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v1_coco_wholebody_480x480_withbg.py b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v1_coco_wholebody_480x480_withbg.py
new file mode 100644
index 0000000000..2fd5fc4d28
--- /dev/null
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v1_coco_wholebody_480x480_withbg.py
@@ -0,0 +1,180 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.00015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    gamma=0.333,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[
+        dict(type='TextLoggerHook'),
+        # dict(type='TensorboardLoggerHook')
+    ])
+
+channel_cfg = dict(
+    dataset_joints=133,
+    dataset_channel=[
+        list(range(133)),
+    ],
+    inference_channel=list(range(133)))
+
+data_cfg = dict(
+    image_size=480,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[60],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/openpose_v1_coco_368x368/best.pth',
+    backbone=dict(
+        type='OpenPoseNetworkV1',
+        in_channels=3,
+        out_channels_cm=134,
+        out_channels_paf=270,
+        stem_feat_channels=128,
+        num_stages=6),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=False)),
+        ] * 6,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=False)),
+        ] * 6,
+        heatmap_index=[0, 1, 2, 3, 4, 5],
+        paf_index=[6, 7, 8, 9, 10, 11],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[0.5, 1, 1.5, 2, 2.5],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] * 6 + [1] * 6,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[0.5, 1, 1.5, 2, 2.5]),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=24,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v2_coco_wholebody_480x480.py b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v2_coco_wholebody_480x480.py
new file mode 100644
index 0000000000..f283c2fbfc
--- /dev/null
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v2_coco_wholebody_480x480.py
@@ -0,0 +1,181 @@
+log_level = 'INFO'
+load_from = None
+resume_from = 'work_dirs/openpose_v2_coco_wholebody_480x480/epoch_150.pth'
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=133,
+    dataset_channel=[
+        list(range(133)),
+    ],
+    inference_channel=list(range(133)))
+
+data_cfg = dict(
+    image_size=480,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[60],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=False)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='mmcls://vgg16_bn',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        out_channels_cm=133,
+        out_channels_paf=270,
+        stem_feat_channels=128,
+        num_stages=6,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=5,
+        block_channels=96),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=True)),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=True)),
+        ] * 5,
+        heatmap_index=[5],
+        paf_index=[0, 1, 2, 3, 4],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[0.5, 1, 1.5, 2, 2.5],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 5,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[0.5, 1, 1.5, 2, 2.5],
+         max_input_size=1800),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v2_coco_wholebody_480x480_withbg.py b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v2_coco_wholebody_480x480_withbg.py
new file mode 100644
index 0000000000..1ea565cb54
--- /dev/null
+++ b/configs/wholebody/2d_kpt_sview_rgb_img/part_affinity_field/coco-wholebody/openpose_v2_coco_wholebody_480x480_withbg.py
@@ -0,0 +1,183 @@
+log_level = 'INFO'
+load_from = None
+resume_from = None
+dist_params = dict(backend='nccl')
+workflow = [('train', 1)]
+checkpoint_config = dict(interval=50)
+evaluation = dict(interval=50, metric='mAP', key_indicator='AP')
+
+optimizer = dict(
+    type='Adam',
+    lr=0.0015,
+)
+optimizer_config = dict(grad_clip=None)
+# learning policy
+lr_config = dict(
+    policy='step',
+    gamma=0.333,
+    warmup='linear',
+    warmup_iters=500,
+    warmup_ratio=0.001,
+    step=[200, 260])
+total_epochs = 300
+log_config = dict(
+    interval=50,
+    hooks=[dict(type='TextLoggerHook'),
+           dict(type='TensorboardLoggerHook')])
+
+channel_cfg = dict(
+    dataset_joints=133,
+    dataset_channel=[
+        list(range(133)),
+    ],
+    inference_channel=list(range(133)))
+
+data_cfg = dict(
+    image_size=480,
+    base_size=256,
+    base_sigma=2,
+    heatmap_size=[60],
+    num_joints=channel_cfg['dataset_joints'],
+    dataset_channel=channel_cfg['dataset_channel'],
+    inference_channel=channel_cfg['inference_channel'],
+    num_scales=1,
+    scale_aware_sigma=False,
+    with_bg=True)
+
+# model settings
+model = dict(
+    type='PartAffinityField',
+    pretrained='models/openpose_v2_coco_368x368/best.pth',
+    backbone=dict(
+        type='OpenPoseNetworkV2',
+        in_channels=3,
+        # additional channel for bg
+        out_channels_cm=134,
+        out_channels_paf=270,
+        stem_feat_channels=128,
+        num_stages=6,
+        stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+        num_blocks=5,
+        block_channels=96),
+    keypoint_head=dict(
+        type='PAFHead',
+        heatmap_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=False)),
+        ] * 1,
+        paf_heads_cfg=[
+            dict(
+                type='DeconvHead',
+                num_deconv_layers=0,
+                extra=dict(final_conv_kernel=0),
+                loss_keypoint=dict(
+                    type='MaskedMSELoss', supervise_empty=False)),
+        ] * 5,
+        heatmap_index=[5],
+        paf_index=[0, 1, 2, 3, 4],
+    ),
+    train_cfg=dict(),
+    test_cfg=dict(
+        num_joints=channel_cfg['dataset_joints'],
+        max_num_people=30,
+        scale_factor=[0.5, 1, 1.5, 2, 2.5],
+        with_heatmaps=[True],
+        with_pafs=[True],
+        project2image=True,
+        align_corners=False,
+        nms_kernel=5,
+        nms_padding=2,
+        tag_per_joint=True,
+        detection_threshold=0.1,
+        tag_threshold=1,
+        use_detection_val=True,
+        ignore_too_much=False,
+        adjust=True,
+        refine=True,
+        flip_test=True,
+        with_bg=data_cfg['with_bg']))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='BottomUpRandomAffine',
+        rot_factor=30,
+        scale_factor=[0.75, 1.5],
+        scale_type='short',
+        trans_factor=40),
+    dict(type='BottomUpRandomFlip', flip_prob=0.5),
+    dict(type='ToTensor'),
+    dict(
+        type='NormalizeTensor',
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]),
+    dict(
+        type='MultitaskGatherTarget',
+        pipeline_list=[
+            [
+                dict(
+                    type='BottomUpGenerateHeatmapTarget',
+                    sigma=2,
+                    with_bg=data_cfg['with_bg'])
+            ],
+            [dict(
+                type='BottomUpGeneratePAFTarget',
+                limb_width=1,
+            )],
+        ],
+        pipeline_indices=[0] + [1] * 5,
+        keys=['targets', 'masks']),
+    dict(type='Collect', keys=['img', 'targets', 'masks'], meta_keys=[]),
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='BottomUpGetImgSize', test_scale_factor=[0.5, 1, 1.5, 2, 2.5],
+         max_input_size=1800),
+    dict(
+        type='BottomUpResizeAlign',
+        transforms=[
+            dict(type='ToTensor'),
+            dict(
+                type='NormalizeTensor',
+                mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225]),
+        ]),
+    dict(
+        type='Collect',
+        keys=['img'],
+        meta_keys=[
+            'image_file', 'aug_data', 'test_scale_factor', 'base_size',
+            'center', 'scale', 'flip_index', 'skeleton'
+        ]),
+]
+
+test_pipeline = val_pipeline
+
+data_root = 'data/coco'
+data = dict(
+    samples_per_gpu=16,
+    workers_per_gpu=2,
+    train=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_train_v1.0.json',
+        img_prefix=f'{data_root}/train2017/',
+        data_cfg=data_cfg,
+        pipeline=train_pipeline),
+    val=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+    test=dict(
+        type='BottomUpCocoWholeBodyDataset',
+        ann_file=f'{data_root}/annotations/coco_wholebody_val_v1.0.json',
+        img_prefix=f'{data_root}/val2017/',
+        data_cfg=data_cfg,
+        pipeline=val_pipeline),
+)
diff --git a/mmpose/.mim/configs b/mmpose/.mim/configs
new file mode 120000
index 0000000000..5992d109cb
--- /dev/null
+++ b/mmpose/.mim/configs
@@ -0,0 +1 @@
+../../configs
\ No newline at end of file
diff --git a/mmpose/.mim/demo b/mmpose/.mim/demo
new file mode 120000
index 0000000000..bf71256cd3
--- /dev/null
+++ b/mmpose/.mim/demo
@@ -0,0 +1 @@
+../../demo
\ No newline at end of file
diff --git a/mmpose/.mim/model-index.yml b/mmpose/.mim/model-index.yml
new file mode 120000
index 0000000000..a18c0b389b
--- /dev/null
+++ b/mmpose/.mim/model-index.yml
@@ -0,0 +1 @@
+../../model-index.yml
\ No newline at end of file
diff --git a/mmpose/.mim/tools b/mmpose/.mim/tools
new file mode 120000
index 0000000000..31941e941d
--- /dev/null
+++ b/mmpose/.mim/tools
@@ -0,0 +1 @@
+../../tools
\ No newline at end of file
diff --git a/mmpose/apis/inference.py b/mmpose/apis/inference.py
index bcb6c73a61..0cd989c18e 100644
--- a/mmpose/apis/inference.py
+++ b/mmpose/apis/inference.py
@@ -460,18 +460,40 @@ def inference_bottom_up_pose_model(model,
     test_pipeline = Compose(test_pipeline)
 
     # prepare data
-    data = {
-        'img_or_path': img_or_path,
-        'dataset': 'coco',
-        'ann_info': {
-            'image_size':
-            cfg.data_cfg['image_size'],
-            'num_joints':
-            cfg.data_cfg['num_joints'],
-            'flip_index':
-            [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15],
+    if cfg.data_cfg.get('add_neck', False):
+        data = {
+            'img_or_path': img_or_path,
+            'dataset': 'coco',
+            'ann_info': {
+                'image_size':
+                cfg.data_cfg['image_size'],
+                'num_joints':
+                cfg.data_cfg['num_joints'],
+                'flip_index':
+                [0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16],
+                'skeleton': [[1, 8], [8, 9], [9, 10], [1, 11], [11, 12],
+                             [12, 13], [1, 2], [2, 3], [3, 4], [2, 16], [1, 5],
+                             [5, 6], [6, 7], [5, 17], [1, 0], [0, 14], [0, 15],
+                             [14, 16], [15, 17]]
+            }
+        }
+    else:
+        data = {
+            'img_or_path': img_or_path,
+            'dataset': 'coco',
+            'ann_info': {
+                'image_size':
+                cfg.data_cfg['image_size'],
+                'num_joints':
+                cfg.data_cfg['num_joints'],
+                'flip_index':
+                [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15],
+                'skeleton': [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+                             [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+                             [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],
+                             [3, 5], [4, 6]]
+            }
         }
-    }
 
     data = test_pipeline(data)
     data = collate([data], samples_per_gpu=1)
@@ -496,7 +518,10 @@ def inference_bottom_up_pose_model(model,
 
         returned_outputs.append(h.layer_outputs)
 
+        order_map = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10]
         for idx, pred in enumerate(result['preds']):
+            if cfg.data_cfg['add_neck']:
+                pred = pred[order_map]
             area = (np.max(pred[:, 0]) - np.min(pred[:, 0])) * (
                 np.max(pred[:, 1]) - np.min(pred[:, 1]))
             pose_results.append({
diff --git a/mmpose/core/evaluation/__init__.py b/mmpose/core/evaluation/__init__.py
index f22953ab84..2c2984f313 100644
--- a/mmpose/core/evaluation/__init__.py
+++ b/mmpose/core/evaluation/__init__.py
@@ -1,5 +1,6 @@
-from .bottom_up_eval import (aggregate_results, get_group_preds,
-                             get_multi_stage_outputs)
+from .bottom_up_eval import (aggregate_scale, aggregate_stage_flip,
+                             flip_feature_maps, flip_part_affinity_fields,
+                             get_group_preds, split_ae_outputs)
 from .eval_hooks import DistEvalHook, EvalHook
 from .mesh_eval import compute_similarity_transform
 from .pose3d_eval import keypoint_mpjpe
@@ -12,8 +13,8 @@
 __all__ = [
     'EvalHook', 'DistEvalHook', 'pose_pck_accuracy', 'keypoints_from_heatmaps',
     'keypoints_from_regression', 'keypoint_pck_accuracy', 'keypoint_auc',
-    'keypoint_epe', 'get_group_preds', 'get_multi_stage_outputs',
-    'aggregate_results', 'compute_similarity_transform', 'post_dark_udp',
-    'keypoint_mpjpe', 'keypoints_from_heatmaps3d',
-    'multilabel_classification_accuracy'
+    'keypoint_epe', 'get_group_preds', 'split_ae_outputs', 'flip_feature_maps',
+    'aggregate_stage_flip', 'aggregate_scale', 'compute_similarity_transform',
+    'post_dark_udp', 'keypoint_mpjpe', 'keypoints_from_heatmaps3d',
+    'multilabel_classification_accuracy', 'flip_part_affinity_fields'
 ]
diff --git a/mmpose/core/evaluation/bottom_up_eval.py b/mmpose/core/evaluation/bottom_up_eval.py
index 5d90027044..98b6daaace 100644
--- a/mmpose/core/evaluation/bottom_up_eval.py
+++ b/mmpose/core/evaluation/bottom_up_eval.py
@@ -5,130 +5,285 @@
                                          warp_affine_joints)
 
 
-def get_multi_stage_outputs(outputs,
-                            outputs_flip,
-                            num_joints,
-                            with_heatmaps,
-                            with_ae,
-                            tag_per_joint=True,
-                            flip_index=None,
-                            project2image=True,
-                            size_projected=None,
-                            align_corners=False):
-    """Inference the model to get multi-stage outputs (heatmaps & tags), and
-    resize them to base sizes.
+def split_ae_outputs(outputs, num_joints, with_heatmaps, with_ae):
+    """Split multi-stage outputs into heatmaps & tags.
 
     Args:
         outputs (list(torch.Tensor)): Outputs of network
-        outputs_flip (list(torch.Tensor)): Flip outputs of network
         num_joints (int): Number of joints
         with_heatmaps (list[bool]): Option to output
             heatmaps for different stages.
         with_ae (list[bool]): Option to output
             ae tags for different stages.
-        tag_per_joint (bool): Option to use one tag map per joint.
-        flip_index (list[int]): Keypoint flip index.
-        project2image (bool): Option to resize to base scale.
-        size_projected ([w, h]): Base size of heatmaps.
-        align_corners (bool): Align corners when performing interpolation.
-
     Returns:
         tuple: A tuple containing multi-stage outputs.
-
-        - outputs (list(torch.Tensor)): List of simple outputs and
-          flip outputs.
-        - heatmaps (torch.Tensor): Multi-stage heatmaps that are resized to
-          the base size.
-        - tags (torch.Tensor): Multi-stage tags that are resized to
-          the base size.
+        - heatmaps (list(torch.Tensor)): multi-stage heatmaps.
+        - tags (list(torch.Tensor)): multi-stage tags.
     """
 
-    heatmaps_avg = 0
-    num_heatmaps = 0
     heatmaps = []
     tags = []
 
-    flip_test = outputs_flip is not None
-
     # aggregate heatmaps from different stages
     for i, output in enumerate(outputs):
-        if i != len(outputs) - 1:
-            output = torch.nn.functional.interpolate(
-                output,
-                size=(outputs[-1].size(2), outputs[-1].size(3)),
-                mode='bilinear',
-                align_corners=align_corners)
-
         # staring index of the associative embeddings
         offset_feat = num_joints if with_heatmaps[i] else 0
-
         if with_heatmaps[i]:
-            heatmaps_avg += output[:, :num_joints]
-            num_heatmaps += 1
-
+            heatmaps.append(output[:, :num_joints])
         if with_ae[i]:
             tags.append(output[:, offset_feat:])
+    return heatmaps, tags
+
+
+def flip_feature_maps(feature_maps, flip_index=None):
+    """Flip the feature maps and swap the channels.
+
+     Args:
+        feature_maps (list(torch.Tensor)): Feature maps.
+        flip_index (list(int) | None): Channel-flip indexes. If None,
+            do not flip channels.
+    Returns:
+        flipped_feature_maps (list(torch.Tensor)): Flipped feature_maps.
+    """
+    flipped_feature_maps = []
+    for feature_map in feature_maps:
+        feature_map = torch.flip(feature_map, [3])
+        if flip_index is not None:
+            flipped_feature_maps.append(feature_map[:, flip_index, :, :])
+        else:
+            flipped_feature_maps.append(feature_map)
 
-    if num_heatmaps > 0:
-        heatmaps.append(heatmaps_avg / num_heatmaps)
+    return flipped_feature_maps
 
-    if flip_test and flip_index:
-        # perform flip testing
-        heatmaps_avg = 0
-        num_heatmaps = 0
 
-        for i, output in enumerate(outputs_flip):
-            if i != len(outputs_flip) - 1:
-                output = torch.nn.functional.interpolate(
-                    output,
-                    size=(outputs_flip[-1].size(2), outputs_flip[-1].size(3)),
-                    mode='bilinear',
-                    align_corners=align_corners)
-            output = torch.flip(output, [3])
-            outputs.append(output)
+def flip_part_affinity_fields(pafs, flip_index, skeleton):
+    """Flip the part affinity fields and swap the channels.
 
-            offset_feat = num_joints if with_heatmaps[i] else 0
+     Args:
+        pafs (list(torch.Tensor)): Part-affinity fields.
+        flip_index (list(int) | None): Channel-flip indexes. If None,
+            do not flip channels.
+        skeleton (list(list(int, int))): Pairs of linked keypoints.
+    Returns:
+        flipped_pafs (list(torch.Tensor)): Flipped pafs.
+    """
+    flipped_skeleton = [[flip_index[a], flip_index[b]] for a, b in skeleton]
+
+    flip_index_paf = []
+    flip_x_index = []
+    for sk_id, sk in enumerate(flipped_skeleton):
+        try:
+            # found flip-pairs
+            ind = skeleton.index(sk)
+            flip_x_index.append(sk_id * 2)
+        except ValueError:
+            try:
+                # unidirectional edge
+                ind = skeleton.index([sk[1], sk[0]])
+            except ValueError:
+                raise ValueError('The skeleton should be symmetric.')
+
+        flip_index_paf.append(2 * ind)
+        flip_index_paf.append(2 * ind + 1)
+
+    flipped_pafs = []
+    for paf in pafs:
+        paf = torch.flip(paf, [3])
+        # flip the x-axis direction
+        paf[:, flip_x_index, :, :] *= -1
+        # flip channels
+        if flip_index is not None:
+            flipped_pafs.append(paf[:, flip_index_paf, :, :])
+        else:
+            flipped_pafs.append(paf)
+
+    return flipped_pafs
+
+
+def _resize_average(feature_maps, align_corners, index=-1, resize_size=None):
+    """Resize the feature maps and compute the average.
+
+     Args:
+        feature_maps (list(torch.Tensor)): Feature maps.
+        align_corners (bool): Align corners when performing interpolation.
+        index (int): If `resize_size' is None, the target size is the size
+            of the indexed feature maps.
+        resize_size ([w, h]): The target size.
+    Returns:
+        feature_maps_avg (list(torch.Tensor)): Averaged feature_maps.
+    """
 
-            if with_heatmaps[i]:
-                heatmaps_avg += output[:, :num_joints][:, flip_index, :, :]
-                num_heatmaps += 1
+    if feature_maps is None:
+        return None
+    feature_maps_avg = 0
 
-            if with_ae[i]:
-                tags.append(output[:, offset_feat:])
-                if tag_per_joint:
-                    tags[-1] = tags[-1][:, flip_index, :, :]
+    feature_map_list = _resize_concate(
+        feature_maps, align_corners, index=index, resize_size=resize_size)
+    for feature_map in feature_map_list:
+        feature_maps_avg += feature_map
 
-        heatmaps.append(heatmaps_avg / num_heatmaps)
+    feature_maps_avg /= len(feature_map_list)
+    return [feature_maps_avg]
 
-    if project2image and size_projected:
-        heatmaps = [
-            torch.nn.functional.interpolate(
-                hms,
-                size=(size_projected[1], size_projected[0]),
-                mode='bilinear',
-                align_corners=align_corners) for hms in heatmaps
-        ]
 
-        tags = [
-            torch.nn.functional.interpolate(
-                tms,
-                size=(size_projected[1], size_projected[0]),
+def _resize_unsqueeze_concat(feature_maps,
+                             align_corners,
+                             index=-1,
+                             resize_size=None):
+    """Resize, unsqueeze and concatenate the feature_maps.
+
+     Args:
+        feature_maps (list(torch.Tensor)): Feature maps.
+        align_corners (bool): Align corners when performing interpolation.
+        index (int): If `resize_size' is None, the target size is the size
+            of the indexed feature maps.
+        resize_size ([w, h]): The target size.
+    Returns:
+        output_feature_maps (list(torch.Tensor)): Averaged feature_maps.
+    """
+    if feature_maps is None:
+        return None
+    feature_map_list = _resize_concate(
+        feature_maps, align_corners, index=index, resize_size=resize_size)
+
+    feat_dim = len(feature_map_list[0].shape) - 1
+    output_feature_maps = torch.cat(
+        [torch.unsqueeze(fmap, dim=feat_dim + 1) for fmap in feature_map_list],
+        dim=feat_dim + 1)
+    return [output_feature_maps]
+
+
+def _resize_concate(feature_maps, align_corners, index=-1, resize_size=None):
+    """Resize and concatenate the feature_maps.
+
+     Args:
+        feature_maps (list(torch.Tensor)): Feature maps.
+        align_corners (bool): Align corners when performing interpolation.
+        index (int): If `resize_size' is None, the target size is the size
+            of the indexed feature maps.
+        resize_size ([w, h]): The target size.
+    Returns:
+        feature_map_list (list(torch.Tensor)): Averaged feature_maps.
+    """
+    if feature_maps is None:
+        return None
+
+    feature_map_list = []
+
+    if index < 0:
+        index += len(feature_maps)
+
+    if resize_size is None:
+        resize_size = (feature_maps[index].size(2),
+                       feature_maps[index].size(3))
+
+    for feature_map in feature_maps:
+        ori_size = (feature_map.size(2), feature_map.size(3))
+        if ori_size != resize_size:
+            feature_map = torch.nn.functional.interpolate(
+                feature_map,
+                size=resize_size,
                 mode='bilinear',
-                align_corners=align_corners) for tms in tags
-        ]
+                align_corners=align_corners)
 
-    return outputs, heatmaps, tags
+        feature_map_list.append(feature_map)
 
+    return feature_map_list
 
-def aggregate_results(scale,
-                      aggregated_heatmaps,
-                      tags_list,
-                      heatmaps,
-                      tags,
-                      test_scale_factor,
-                      project2image,
-                      flip_test,
-                      align_corners=False):
+
+def aggregate_stage_flip(feature_maps,
+                         feature_maps_flip,
+                         index=-1,
+                         project2image=True,
+                         size_projected=None,
+                         align_corners=False,
+                         aggregate_stage='concat',
+                         aggregate_flip='average'):
+    """Inference the model to get multi-stage outputs (heatmaps & tags), and
+    resize them to base sizes.
+
+    Args:
+        feature_maps (list(torch.Tensor)): feature_maps can be heatmaps,
+            tags, and pafs.
+        feature_maps_flip (list(torch.Tensor) | None): flipped feature_maps.
+            feature maps can be heatmaps, tags, and pafs.
+        project2image (bool): Option to resize to base scale.
+        size_projected ([w, h]): Base size of heatmaps.
+        align_corners (bool): Align corners when performing interpolation.
+        aggregate_stage (str): Methods to aggregate multi-stage feature maps.
+            Options: 'concat', 'average'.
+            'concat': Concatenate the original and the flipped feature maps.
+            'average': Get the average of the original and the flipped
+                feature maps..
+            Default: 'concat.
+        aggregate_flip (str): Methods to aggregate the original and
+            the flipped feature maps.
+            Options: 'concat', 'average', 'none'.
+            'concat': Concatenate the original and the flipped feature maps.
+            'average': Get the average of the original and the flipped
+                feature maps..
+            'none': no flipped feature maps.
+            Default: 'average.
+
+    Returns:
+        - output_feature_maps (List(torch.Tensor[NxKxWxH])):
+            Aggregated feature maps.
+    """
+
+    if feature_maps_flip is None:
+        aggregate_flip = 'none'
+
+    output_feature_maps = []
+
+    if aggregate_stage == 'average':
+        _aggregate_stage_func = _resize_average
+    elif aggregate_stage == 'concat':
+        _aggregate_stage_func = _resize_concate
+    else:
+        NotImplementedError()
+
+    if project2image and size_projected:
+        _origin = _aggregate_stage_func(
+            feature_maps,
+            align_corners,
+            index=index,
+            resize_size=(size_projected[1], size_projected[0]))
+
+        _flipped = _aggregate_stage_func(
+            feature_maps_flip,
+            align_corners,
+            index=index,
+            resize_size=(size_projected[1], size_projected[0]))
+    else:
+        _origin = _aggregate_stage_func(
+            feature_maps, align_corners, index=index, resize_size=None)
+        _flipped = _aggregate_stage_func(
+            feature_maps_flip, align_corners, index=index, resize_size=None)
+
+    if aggregate_flip == 'average':
+        assert feature_maps_flip is not None
+        for _ori, _fli in zip(_origin, _flipped):
+            output_feature_maps.append((_ori + _fli) / 2.0)
+
+    elif aggregate_flip == 'concat':
+        assert feature_maps_flip is not None
+        output_feature_maps.append(*_origin)
+        output_feature_maps.append(*_flipped)
+
+    elif aggregate_flip == 'none':
+        if isinstance(_origin, list):
+            output_feature_maps.append(*_origin)
+        else:
+            output_feature_maps.append(_origin)
+    else:
+        NotImplementedError()
+
+    return output_feature_maps
+
+
+def aggregate_scale(feature_maps_list,
+                    align_corners=False,
+                    aggregate_scale='average'):
     """Aggregate multi-scale outputs.
 
     Note:
@@ -138,50 +293,30 @@ def aggregate_results(scale,
         heatmap height: H
 
     Args:
-        scale (int): current scale
-        aggregated_heatmaps (torch.Tensor | None): Aggregated heatmaps.
-        tags_list (list(torch.Tensor)): Tags list of previous scale.
-        heatmaps (List(torch.Tensor[NxKxWxH])): A batch of heatmaps.
-        tags (List(torch.Tensor[NxKxWxH])): A batch of tag maps.
-        test_scale_factor (List(int)): Multi-scale factor for testing.
+        feature_maps_list (list(torch.Tensor)): Aggregated feature maps.
         project2image (bool): Option to resize to base scale.
-        flip_test (bool): Option to use flip test.
         align_corners (bool): Align corners when performing interpolation.
+        aggregate_scale (str): Methods to aggregate multi-scale feature maps.
+            Options: 'average', 'unsqueeze_concat'.
+            'average': Get the average of the feature maps.
+            'unsqueeze_concat': Concatenate the feature maps along new axis.
+            Default: 'average.
 
     Return:
-        tuple: a tuple containing aggregated results.
-
-        - aggregated_heatmaps (torch.Tensor): Heatmaps with multi scale.
-        - tags_list (list(torch.Tensor)): Tag list of multi scale.
+        - output_feature_maps (torch.Tensor): Aggregated feature maps.
     """
-    if scale == 1 or len(test_scale_factor) == 1:
-        if aggregated_heatmaps is not None and not project2image:
-            tags = [
-                torch.nn.functional.interpolate(
-                    tms,
-                    size=(aggregated_heatmaps.size(2),
-                          aggregated_heatmaps.size(3)),
-                    mode='bilinear',
-                    align_corners=align_corners) for tms in tags
-            ]
-        for tms in tags:
-            tags_list.append(torch.unsqueeze(tms, dim=4))
-
-    heatmaps_avg = (heatmaps[0] +
-                    heatmaps[1]) / 2.0 if flip_test else heatmaps[0]
-
-    if aggregated_heatmaps is None:
-        aggregated_heatmaps = heatmaps_avg
-    elif project2image:
-        aggregated_heatmaps += heatmaps_avg
+
+    if aggregate_scale == 'average':
+        output_feature_maps = _resize_average(
+            feature_maps_list, align_corners, index=0, resize_size=None)
+
+    elif aggregate_scale == 'unsqueeze_concat':
+        output_feature_maps = _resize_unsqueeze_concat(
+            feature_maps_list, align_corners, index=0, resize_size=None)
     else:
-        aggregated_heatmaps += torch.nn.functional.interpolate(
-            heatmaps_avg,
-            size=(aggregated_heatmaps.size(2), aggregated_heatmaps.size(3)),
-            mode='bilinear',
-            align_corners=align_corners)
+        NotImplementedError()
 
-    return aggregated_heatmaps, tags_list
+    return output_feature_maps[0]
 
 
 def get_group_preds(grouped_joints,
@@ -204,6 +339,9 @@ def get_group_preds(grouped_joints,
     Returns:
         list: List of the pose result for each person.
     """
+    if len(grouped_joints) == 0:
+        return []
+
     if use_udp:
         if grouped_joints[0].shape[0] > 0:
             heatmap_size_t = np.array(heatmap_size, dtype=np.float32) - 1.0
diff --git a/mmpose/core/post_processing/group.py b/mmpose/core/post_processing/group.py
index 9a870d6ce3..2d443205c8 100644
--- a/mmpose/core/post_processing/group.py
+++ b/mmpose/core/post_processing/group.py
@@ -1,7 +1,4 @@
-# ------------------------------------------------------------------------------
-# Adapted from https://github.com/princeton-vl/pose-ae-train/
-# Original licence: Copyright (c) 2017, umich-vl, under BSD 3-Clause License.
-# ------------------------------------------------------------------------------
+from abc import ABCMeta, abstractmethod
 
 import numpy as np
 import torch
@@ -117,19 +114,20 @@ def _match_by_tag(inp, params):
 
 
 class _Params:
-    """A class of parameter.
+    """A class of parameters for keypoint grouping.
 
     Args:
         cfg(Config): config.
     """
 
     def __init__(self, cfg):
-        self.num_joints = cfg['num_joints']
+        self.add_neck = cfg.get('add_neck', False)
+        if self.add_neck:
+            self.num_joints = cfg['num_joints'] + 1
+        else:
+            self.num_joints = cfg['num_joints']
         self.max_num_people = cfg['max_num_people']
-
         self.detection_threshold = cfg['detection_threshold']
-        self.tag_threshold = cfg['tag_threshold']
-        self.use_detection_val = cfg['use_detection_val']
         self.ignore_too_much = cfg['ignore_too_much']
 
         if self.num_joints == 17:
@@ -141,12 +139,11 @@ def __init__(self, cfg):
             self.joint_order = list(np.arange(self.num_joints))
 
 
-class HeatmapParser:
-    """The heatmap parser for post processing."""
+class BaseBottomUpParser(metaclass=ABCMeta):
+    """The base bottom-up parser for post processing."""
 
     def __init__(self, cfg):
         self.params = _Params(cfg)
-        self.tag_per_joint = cfg['tag_per_joint']
         self.pool = torch.nn.MaxPool2d(cfg['nms_kernel'], 1,
                                        cfg['nms_padding'])
         self.use_udp = cfg.get('use_udp', False)
@@ -167,28 +164,9 @@ def nms(self, heatmaps):
 
         return heatmaps
 
-    def match(self, tag_k, loc_k, val_k):
-        """Group keypoints to human poses in a batch.
-
-        Args:
-            tag_k (np.ndarray[NxKxMxL]): tag corresponding to the
-                top k values of feature map per keypoint.
-            loc_k (np.ndarray[NxKxMx2]): top k locations of the
-                feature maps for keypoint.
-            val_k (np.ndarray[NxKxM]): top k value of the
-                feature maps per keypoint.
-
-        Returns:
-            list
-        """
-
-        def _match(x):
-            return _match_by_tag(x, self.params)
-
-        return list(map(_match, zip(tag_k, loc_k, val_k)))
-
-    def top_k(self, heatmaps, tags):
-        """Find top_k values in an image.
+    @staticmethod
+    def top_k_value(feature_maps, M):
+        """Find top_k values in the feature_maps.
 
         Note:
             batch size: N
@@ -196,86 +174,21 @@ def top_k(self, heatmaps, tags):
             heatmap height: H
             heatmap width: W
             max number of people: M
-            dim of tags: L
-                If use flip testing, L=2; else L=1.
 
         Args:
-            heatmaps (torch.Tensor[NxKxHxW])
-            tags (torch.Tensor[NxKxHxWxL])
+            feature_maps (torch.Tensor[NxKxHxW])
 
         Return:
-            dict: A dict containing top_k values.
-
-            - tag_k (np.ndarray[NxKxMxL]):
-                tag corresponding to the top k values of
-                feature map per keypoint.
-            - loc_k (np.ndarray[NxKxMx2]):
-                top k location of feature map per keypoint.
-            - val_k (np.ndarray[NxKxM]):
+            - val_k (torch.Tensor[NxKxM]):
                 top k value of feature map per keypoint.
+            - ind_k (torch.Tensor[NxKxM]):
+                index of the selected locations.
         """
-        heatmaps = self.nms(heatmaps)
-        N, K, H, W = heatmaps.size()
-        heatmaps = heatmaps.view(N, K, -1)
-        val_k, ind = heatmaps.topk(self.params.max_num_people, dim=2)
-
-        tags = tags.view(tags.size(0), tags.size(1), W * H, -1)
-        if not self.tag_per_joint:
-            tags = tags.expand(-1, self.params.num_joints, -1, -1)
-
-        tag_k = torch.stack(
-            [torch.gather(tags[..., i], 2, ind) for i in range(tags.size(3))],
-            dim=3)
-
-        x = ind % W
-        y = ind // W
-
-        ind_k = torch.stack((x, y), dim=3)
-
-        ans = {
-            'tag_k': tag_k.cpu().numpy(),
-            'loc_k': ind_k.cpu().numpy(),
-            'val_k': val_k.cpu().numpy()
-        }
-
-        return ans
-
-    @staticmethod
-    def adjust(ans, heatmaps):
-        """Adjust the coordinates for better accuracy.
+        N, K, H, W = feature_maps.size()
+        feature_maps = feature_maps.view(N, K, -1)
+        val_k, ind_k = feature_maps.topk(M, dim=2)
 
-        Note:
-            batch size: N
-            number of keypoints: K
-            heatmap height: H
-            heatmap width: W
-
-        Args:
-            ans (list(np.ndarray)): Keypoint predictions.
-            heatmaps (torch.Tensor[NxKxHxW]): Heatmaps.
-        """
-        _, _, H, W = heatmaps.shape
-        for batch_id, people in enumerate(ans):
-            for people_id, people_i in enumerate(people):
-                for joint_id, joint in enumerate(people_i):
-                    if joint[2] > 0:
-                        x, y = joint[0:2]
-                        xx, yy = int(x), int(y)
-                        tmp = heatmaps[batch_id][joint_id]
-                        if tmp[min(H - 1, yy + 1), xx] > tmp[max(0, yy - 1),
-                                                             xx]:
-                            y += 0.25
-                        else:
-                            y -= 0.25
-
-                        if tmp[yy, min(W - 1, xx + 1)] > tmp[yy,
-                                                             max(0, xx - 1)]:
-                            x += 0.25
-                        else:
-                            x -= 0.25
-                        ans[batch_id][people_id, joint_id,
-                                      0:2] = (x + 0.5, y + 0.5)
-        return ans
+        return val_k, ind_k
 
     @staticmethod
     def refine(heatmap, tag, keypoints, use_udp=False):
@@ -356,7 +269,170 @@ def refine(heatmap, tag, keypoints, use_udp=False):
 
         return keypoints
 
-    def parse(self, heatmaps, tags, adjust=True, refine=True):
+    @staticmethod
+    def adjust(ans, heatmaps, use_udp=False):
+        """Adjust the coordinates for better accuracy.
+
+        Note:
+            batch size: N
+            number of person: M
+            number of keypoints: K
+            heatmap height: H
+            heatmap width: W
+
+        Args:
+            ans (list(np.array([M,K,3+]))): Keypoint predictions.
+            heatmaps (torch.Tensor[NxKxHxW]): Heatmaps.
+        """
+
+        if use_udp:
+            for i in range(len(ans)):
+                if ans[i].shape[0] > 0:
+                    ans[i][..., :2] = post_dark_udp(ans[i][..., :2].copy(),
+                                                    heatmaps[i:i + 1, :])
+        else:
+            _, _, H, W = heatmaps.shape
+            for batch_id, people in enumerate(ans):
+                for people_id, people_i in enumerate(people):
+                    for joint_id, joint in enumerate(people_i):
+                        if joint[2] > 0:
+                            x, y = joint[0:2]
+                            xx, yy = int(x), int(y)
+                            tmp = heatmaps[batch_id][joint_id]
+                            if tmp[min(H - 1, yy + 1),
+                                   xx] > tmp[max(0, yy - 1), xx]:
+                                y += 0.25
+                            else:
+                                y -= 0.25
+
+                            if tmp[yy, min(W - 1, xx +
+                                           1)] > tmp[yy, max(0, xx - 1)]:
+                                x += 0.25
+                            else:
+                                x -= 0.25
+                            ans[batch_id][people_id, joint_id,
+                                          0:2] = (x + 0.5, y + 0.5)
+        return ans
+
+    def filter_pose(self, ans, kpt_num_thr=3, mean_score_thr=0.2):
+        """Filter out the poses with #keypoints < kpt_num_thr, and those with
+        keypoint score < mean_score_thr.
+
+        Note:
+            number of person: M
+            number of keypoints: K
+
+        Args:
+            filtered_ans (list(np.array([M,K,3+]))): Keypoint predictions.
+        """
+        filtered_ans = []
+        for i in range(len(ans[0])):
+            score = ans[0][i, :, 2]
+            if sum(score > 0) < kpt_num_thr or (score[score > 0].mean() <
+                                                mean_score_thr):
+                continue
+            filtered_ans.append(ans[0][i])
+        filtered_ans = np.asarray(filtered_ans)
+
+        return [filtered_ans]
+
+    @abstractmethod
+    def parse(self, *args, **kwargs):
+        """Group keypoints into poses."""
+
+
+class HeatmapParser(BaseBottomUpParser):
+    """The associative embedding parser.
+
+    Paper ref: Alejandro Newell et al. "Associative Embedding:
+    End-to-end Learning for Joint Detection and Grouping." (NeurIPS'2017)
+
+    Adapted from https://github.com/princeton-vl/pose-ae-train/
+    Original licence: Copyright (c) 2017, umich-vl, under BSD 3-Clause License.
+    """
+
+    def __init__(self, cfg):
+        super().__init__(cfg)
+        self.tag_per_joint = cfg['tag_per_joint']
+
+        self.params.tag_threshold = cfg['tag_threshold']
+        self.params.use_detection_val = cfg['use_detection_val']
+
+    def match(self, tag_k, loc_k, val_k):
+        """Group keypoints to human poses in a batch.
+
+        Args:
+            tag_k (np.ndarray[NxKxMxL]): tag corresponding to the
+                top k values of feature map per keypoint.
+            loc_k (np.ndarray[NxKxMx2]): top k locations of the
+                feature maps for keypoint.
+            val_k (np.ndarray[NxKxM]): top k value of the
+                feature maps per keypoint.
+
+        Returns:
+            list
+        """
+
+        def _match(x):
+            return _match_by_tag(x, self.params)
+
+        return list(map(_match, zip(tag_k, loc_k, val_k)))
+
+    def top_k(self, heatmaps, tags):
+        """Find top_k values in the feature maps.
+
+        Note:
+            batch size: N
+            number of keypoints: K
+            heatmap height: H
+            heatmap width: W
+            max number of people: M
+            dim of tags: L
+                If use flip testing, L=2; else L=1.
+
+        Args:
+            heatmaps (torch.Tensor[NxKxHxW])
+            tags (torch.Tensor[NxKxHxWxL])
+
+        Return:
+            dict: A dict containing top_k values.
+
+            - tag_k (np.ndarray[NxKxMxL]):
+                tag corresponding to the top k values of
+                feature map per keypoint.
+            - loc_k (np.ndarray[NxKxMx2]):
+                top k location of feature map per keypoint.
+            - val_k (np.ndarray[NxKxM]):
+                top k value of feature map per keypoint.
+        """
+
+        heatmaps = self.nms(heatmaps)
+        N, K, H, W = heatmaps.size()
+        val_k, ind_k = self.top_k_value(heatmaps, self.params.max_num_people)
+
+        x = ind_k % W
+        y = ind_k // W
+
+        loc_k = torch.stack((x, y), dim=3)
+
+        tags = tags.view(tags.size(0), tags.size(1), W * H, -1)
+        if not self.tag_per_joint:
+            tags = tags.expand(-1, self.params.num_joints, -1, -1)
+
+        tag_k = torch.stack([
+            torch.gather(tags[..., i], 2, ind_k) for i in range(tags.size(3))
+        ],
+                            dim=3)
+
+        ans = {
+            'tag_k': tag_k.cpu().numpy(),
+            'loc_k': loc_k.cpu().numpy(),
+            'val_k': val_k.cpu().numpy()
+        }
+
+        return ans
+
+    def parse(self, heatmaps, tags, adjust=True, refine=True, filter=False):
         """Group keypoints into poses given heatmap and tag.
 
         Note:
@@ -379,14 +455,14 @@ def parse(self, heatmaps, tags, adjust=True, refine=True):
         """
         ans = self.match(**self.top_k(heatmaps, tags))
 
+        if len(ans) == 0:
+            return [], []
+
+        if filter:
+            ans = self.filter_pose(ans)
+
         if adjust:
-            if self.use_udp:
-                for i in range(len(ans)):
-                    if ans[i].shape[0] > 0:
-                        ans[i][..., :2] = post_dark_udp(
-                            ans[i][..., :2].copy(), heatmaps[i:i + 1, :])
-            else:
-                ans = self.adjust(ans, heatmaps)
+            ans = self.adjust(ans, heatmaps, self.use_udp)
 
         scores = [i[:, 2].mean() for i in ans[0]]
 
@@ -404,3 +480,420 @@ def parse(self, heatmaps, tags, adjust=True, refine=True):
             ans = [ans]
 
         return ans, scores
+
+
+class PAFParser(BaseBottomUpParser):
+    """The part-affinity field parser.
+
+    Paper ref: Cao, Zhe, et al. "OpenPose: realtime multi-person 2D pose
+    estimation using Part Affinity Fields." (TPAMI'2019)
+
+    Adapted from 'https://github.com/Daniil-Osokin/
+    lightweight-human-pose-estimation.pytorch'
+
+    Original licence: Copyright 2018, under Apache License 2.0.
+    """
+
+    def __init__(self, cfg):
+        super().__init__(cfg)
+
+        self.paf_thr = 0.05
+        self.add_neck = cfg.get('add_neck', False)
+
+    def output_format(self, all_keypoints, pose_entries):
+        """Format transform.
+
+        Note:
+            batch size: N
+            number of people: M
+            number of keypoints: K
+            number of detected keypoints in the image: P
+
+        Args:
+            all_keypoints (np.ndarray(P, 4)): Each keypoint contains
+                (x, y, score, keypoint id)
+            pose_entries (np.ndarray(M, K + 2)): For each person,
+                it contains K keypoint id, the human score, and
+                the number of detected keypoints.
+
+        Returns:
+            ans (list(np.ndarray)): Pose results.
+        """
+        ans = []
+
+        if len(pose_entries) > 0:
+            for person in pose_entries:
+                ans_person = np.zeros(
+                    (self.params.num_joints, all_keypoints.shape[1]),
+                    np.float32)
+                for j in range(self.params.num_joints):
+                    joint_id = int(person[j])
+                    if joint_id < 0:
+                        continue
+                    ans_person[j] = all_keypoints[joint_id]
+                ans.append(ans_person)
+            return [np.stack(ans)]
+        else:
+            return []
+
+    def connections_nms(self, a_idx, b_idx, affinity_scores):
+        """From all retrieved connections that share the same starting/ending
+        keypoints leave only the top-scoring ones.
+
+        Args:
+            a_idx (list(int)): index of the starting keypoints.
+            b_idx (list(int)): index of the ending keypoints.
+            affinity_scores (list(float)): affinity scores.
+
+        Returns:
+            a_idx (list(int)): index of the starting keypoints.
+            b_idx (list(int)): index of the ending keypoints.
+            affinity_scores (list(float)): affinity scores.
+        """
+        order = affinity_scores.argsort()[::-1]
+        affinity_scores = affinity_scores[order]
+        a_idx = a_idx[order]
+        b_idx = b_idx[order]
+        idx = []
+        has_kpt_a = set()
+        has_kpt_b = set()
+        for t, (i, j) in enumerate(zip(a_idx, b_idx)):
+            if i not in has_kpt_a and j not in has_kpt_b:
+                idx.append(t)
+                has_kpt_a.add(i)
+                has_kpt_b.add(j)
+        idx = np.asarray(idx, dtype=np.int32)
+        return a_idx[idx], b_idx[idx], affinity_scores[idx]
+
+    def group_keypoints(self, all_keypoints_by_type, pafs):
+        """Group keypoints based on part-affinity fields.
+
+        Note:
+            batch size: N
+            number of keypoints: K
+            heatmap height: H
+            heatmap width: W
+
+        Args:
+            all_keypoints_by_type (list(tuple)): list of tuples
+                containing keypoint detection results (x, y, score, id).
+            pafs (np.ndarray[W, H, C]): part-affinity fields
+
+        Returns:
+            - ans (list(np.array([M, K, 3+]))): Keypoint predictions.
+            - scores (list): Score of people.
+        """
+        pose_entries = []
+        all_keypoints = np.array(
+            [item for sublist in all_keypoints_by_type for item in sublist])
+        points_per_limb = 10
+        grid = np.arange(points_per_limb, dtype=np.float32).reshape(1, -1, 1)
+        all_keypoints_by_type = [
+            np.array(keypoints, np.float32)
+            for keypoints in all_keypoints_by_type
+        ]
+        for part_id in range(len(self.limb2paf)):
+            part_pafs = pafs[:, :, self.limb2paf[part_id]]
+            kpts_a = all_keypoints_by_type[self.limb2joint[part_id][0]]
+            kpts_b = all_keypoints_by_type[self.limb2joint[part_id][1]]
+            n = len(kpts_a)
+            m = len(kpts_b)
+            if n == 0 or m == 0:
+                continue
+
+            # Get vectors between all pairs of keypoints,
+            # i.e. candidate limb vectors.
+            a = kpts_a[:, :2]
+            a = np.broadcast_to(a[None], (m, n, 2))
+            b = kpts_b[:, :2]
+            vec_raw = (b[:, None, :] - a).reshape(-1, 1, 2)
+
+            # Sample points along every candidate limb vector.
+            steps = (1 / (points_per_limb - 1) * vec_raw)
+            points = steps * grid + a.reshape(-1, 1, 2)
+            points = points.round().astype(dtype=np.int32)
+            x = points[..., 0].ravel()
+            y = points[..., 1].ravel()
+
+            # Compute affinity score between candidate
+            # limb vectors and part affinity field.
+            field = part_pafs[y, x].reshape(-1, points_per_limb, 2)
+            vec_norm = np.linalg.norm(vec_raw, ord=2, axis=-1, keepdims=True)
+            vec = vec_raw / (vec_norm + 1e-6)
+            affinity_scores = (field *
+                               vec).sum(-1).reshape(-1, points_per_limb)
+            valid_affinity_scores = affinity_scores > self.paf_thr
+            valid_num = valid_affinity_scores.sum(1)
+            affinity_scores = (affinity_scores *
+                               valid_affinity_scores).sum(1) / (
+                                   valid_num + 1e-6)
+            success_ratio = valid_num / points_per_limb
+
+            # Get a list of limbs according to the obtained affinity score.
+            valid_limbs = np.where(
+                np.logical_and(affinity_scores > 0, success_ratio > 0.8))[0]
+            if len(valid_limbs) == 0:
+                continue
+            b_idx, a_idx = np.divmod(valid_limbs, n)
+            affinity_scores = affinity_scores[valid_limbs]
+
+            # Suppress incompatible connections.
+            a_idx, b_idx, affinity_scores = self.connections_nms(
+                a_idx, b_idx, affinity_scores)
+            connections = list(
+                zip(kpts_a[a_idx, 3].astype(np.int32),
+                    kpts_b[b_idx, 3].astype(np.int32), affinity_scores))
+            if len(connections) == 0:
+                continue
+
+            if part_id == 0:
+                pose_entries = [
+                    np.ones(self.params.num_joints + 2) * -1
+                    for _ in range(len(connections))
+                ]
+                for i in range(len(connections)):
+                    pose_entries[i][self.limb2joint[0][0]] = connections[i][0]
+                    pose_entries[i][self.limb2joint[0][1]] = connections[i][1]
+                    pose_entries[i][-1] = 2
+                    pose_entries[i][-2] = np.sum(
+                        all_keypoints[connections[i][0:2],
+                                      2]) + connections[i][2]
+            else:
+                kpt_a_id = self.limb2joint[part_id][0]
+                kpt_b_id = self.limb2joint[part_id][1]
+                for i in range(len(connections)):
+                    found_pose_list = []
+                    for j in range(len(pose_entries)):
+                        if pose_entries[j][kpt_a_id] == connections[i][
+                                0] and pose_entries[j][kpt_b_id] == -1:
+                            pose_entries[j][kpt_b_id] = connections[i][1]
+                            pose_entries[j][-1] += 1
+                            pose_entries[j][-2] += all_keypoints[
+                                connections[i][1], 2] + connections[i][2]
+                            found_pose_list.append(
+                                (j, all_keypoints[connections[i][1], 2] +
+                                 connections[i][2]))
+
+                        if pose_entries[j][kpt_b_id] == connections[i][
+                                1] and pose_entries[j][kpt_a_id] == -1:
+                            pose_entries[j][kpt_a_id] = connections[i][0]
+                            pose_entries[j][-1] += 1
+                            pose_entries[j][-2] += all_keypoints[
+                                connections[i][1], 2] + connections[i][2]
+                            found_pose_list.append(
+                                (j, all_keypoints[connections[i][1], 2] +
+                                 connections[i][2]))
+
+                    if len(found_pose_list) == 0:
+                        pose_entry = np.ones(self.params.num_joints + 2) * -1
+                        pose_entry[kpt_a_id] = connections[i][0]
+                        pose_entry[kpt_b_id] = connections[i][1]
+                        pose_entry[-1] = 2
+                        pose_entry[-2] = np.sum(
+                            all_keypoints[connections[i][0:2],
+                                          2]) + connections[i][2]
+                        pose_entries.append(pose_entry)
+
+                    elif len(found_pose_list) == 2:
+                        # merge two pose entries
+                        found_pose_list.sort(key=lambda x: x[0], reverse=True)
+                        pose_entry = np.ones(self.params.num_joints + 2) * -1
+
+                        entry_id1, score1 = found_pose_list[0]
+                        entry_id2, score2 = found_pose_list[1]
+                        assert score1 == score2
+
+                        pose_entry1 = pose_entries.pop(entry_id1)
+                        pose_entry2 = pose_entries.pop(entry_id2)
+
+                        num_kpt = 0
+                        score = pose_entry1[-2] + pose_entry2[-2] - score1
+
+                        for j in range(self.params.num_joints):
+                            kpt_id1 = int(pose_entry1[j])
+                            kpt_id2 = int(pose_entry2[j])
+
+                            if kpt_id1 == -1 and kpt_id2 == -1:
+                                continue
+                            elif kpt_id1 == -1 and kpt_id2 != -1:
+                                pose_entry[j] = kpt_id2
+                                num_kpt += 1
+                            elif kpt_id2 == -1 and kpt_id1 != -1:
+                                pose_entry[j] = kpt_id1
+                                num_kpt += 1
+                            else:
+                                # both have the same joint-id,
+                                # choose the one with higher score.
+                                if all_keypoints[kpt_id1,
+                                                 2] > all_keypoints[kpt_id2,
+                                                                    2]:
+                                    pose_entry[j] = kpt_id1
+                                else:
+                                    pose_entry[j] = kpt_id2
+                                num_kpt += 1
+
+                        pose_entry[-2] = score
+                        pose_entry[-1] = num_kpt
+
+                        pose_entries.append(pose_entry)
+
+        ans = self.output_format(all_keypoints, pose_entries)
+        scores = [person[-2] for person in pose_entries]
+
+        return ans, scores
+
+    def get_keypoints(self, heatmaps):
+        """Extract keypoints from heatmaps.
+
+        Note:
+            batch size: N
+            number of keypoints: K
+            heatmap height: H
+            heatmap width: W
+
+        Args:
+            heatmaps (torch.Tensor[NxKxHxW]): model output heatmaps.
+
+        Returns:
+            list(tuple): list of tuples containing keypoint detection
+                results (x, y, score, id).
+        """
+
+        keypoint_num = 0
+        all_keypoints_by_type = [[] for _ in range(heatmaps.size(1))]
+
+        heatmaps = self.nms(heatmaps)
+        N, K, H, W = heatmaps.size()
+        val_k, ind_k = self.top_k_value(heatmaps, self.params.max_num_people)
+
+        x = ind_k % W
+        y = ind_k // W
+
+        loc_k = torch.stack((x, y), dim=3)
+
+        for kpt_idx in range(self.params.num_joints):
+            for m in range(self.params.max_num_people):
+                if val_k[0][kpt_idx][m] < self.params.detection_threshold:
+                    break
+                else:
+                    x = loc_k[0][kpt_idx][m][0].item()
+                    y = loc_k[0][kpt_idx][m][1].item()
+                    score = val_k[0][kpt_idx][m].item()
+                    all_keypoints_by_type[kpt_idx].append(
+                        (x, y, score, keypoint_num))
+                    keypoint_num += 1
+
+        return all_keypoints_by_type
+
+    def define_limb(self, skeleton):
+        if self.add_neck:
+            # Heatmap indices to find each limb (joint connection).
+            self.limb2joint = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7],
+                               [1, 8], [8, 9], [9, 10], [1, 11], [11, 12],
+                               [12, 13], [1, 0], [0, 14], [14, 16], [0, 15],
+                               [15, 17], [2, 16], [5, 17]]
+
+            # PAF indices containing the x and y coordinates of the PAF for a
+            # given limb.
+            self.limb2paf = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23],
+                             [24, 25], [0, 1], [2, 3], [4, 5], [6, 7], [8, 9],
+                             [10, 11], [28, 29], [30, 31], [34, 35], [32, 33],
+                             [36, 37], [18, 19], [26, 27]]
+
+        elif skeleton is None:
+            # Heatmap indices to find each limb (joint connection).
+            self.limb2joint = [[15, 13], [13, 11], [16, 14], [14, 12],
+                               [11, 12], [5, 11], [6, 12], [5, 6], [5, 7],
+                               [6, 8], [7, 9], [8, 10], [1, 2], [0, 1], [0, 2],
+                               [1, 3], [2, 4], [3, 5], [4, 6]]
+
+            # PAF indices containing the x and y coordinates of the PAF for a
+            # given limb.
+            self.limb2paf = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11],
+                             [12, 13], [14, 15], [16, 17], [18, 19], [20, 21],
+                             [22, 23], [24, 25], [26, 27], [28, 29], [30, 31],
+                             [32, 33], [34, 35], [36, 37]]
+
+        else:
+            # Heatmap indices to find each limb (joint connection).
+            self.limb2joint = skeleton
+
+            # PAF indices containing the x and y coordinates of the PAF for a
+            # given limb.
+            self.limb2paf = np.array(range(len(self.limb2joint *
+                                               2))).reshape(-1, 2).tolist()
+
+        self.NUM_LIMBS = len(self.limb2joint)
+
+    def parse(self,
+              heatmaps,
+              pafs,
+              skeleton=None,
+              adjust=True,
+              refine=True,
+              filter=False):
+        """Group keypoints into poses given heatmap and paf.
+
+        Note:
+            batch size: N (currently we only support N==1)
+            number of people: M
+            number of keypoints: K
+            number of paf maps: P
+            heatmap height: H
+            heatmap width: W
+
+        Args:
+            heatmaps (torch.Tensor[NxKxHxW]): model output heatmaps.
+            pafs (torch.Tensor[NxPxHxW]): model output pafs.
+
+        Returns:
+            tuple: A tuple containing keypoint grouping results.
+
+            - ans (list(np.array([M,K,4]))): Keypoint predictions.
+            - scores (list): Score of people.
+        """
+
+        assert heatmaps.shape[0] == 1, 'The batch size is ' \
+            f'{heatmaps.shape[0]}, but we only support batch size==1.'
+
+        self.define_limb(skeleton)
+
+        all_keypoints_by_type = self.get_keypoints(heatmaps)
+        pafs_np = np.transpose(pafs.detach().cpu().numpy()[0], [1, 2, 0])
+        ans, scores = self.group_keypoints(all_keypoints_by_type, pafs_np)
+
+        if len(ans) == 0:
+            return [], []
+
+        if filter:
+            ans = self.filter_pose(ans)
+
+        if adjust:
+            if self.use_udp:
+                for i in range(len(ans)):
+                    if ans[i].shape[0] > 0:
+                        ans[i][..., :2] = post_dark_udp(
+                            ans[i][..., :2].copy(), heatmaps[i:i + 1, :])
+            else:
+                ans = self.adjust(ans, heatmaps)
+
+        if refine:
+            ans = ans[0]
+            # for every detected person
+            for i in range(len(ans)):
+                heatmap_numpy = heatmaps[0].cpu().numpy()
+                _, image_height, image_width = heatmap_numpy.shape
+                y_coords = 2.0 * np.repeat(
+                    np.arange(image_height)[:, None], image_width,
+                    axis=1) / (image_height - 1.0) - 1.0
+                x_coords = 2.0 * np.repeat(
+                    np.arange(image_width)[None, :], image_height,
+                    axis=0) / (image_width - 1.0) - 1.0
+                coord_numpy = np.tile(
+                    np.stack([x_coords, y_coords], axis=-1),
+                    (self.params.num_joints, 1, 1, 1))
+                ans[i] = self.refine(
+                    heatmap_numpy, coord_numpy, ans[i], use_udp=self.use_udp)
+            ans = [ans]
+
+        return ans, scores
diff --git a/mmpose/core/post_processing/post_transforms.py b/mmpose/core/post_processing/post_transforms.py
index ba6594f778..96091daa3e 100644
--- a/mmpose/core/post_processing/post_transforms.py
+++ b/mmpose/core/post_processing/post_transforms.py
@@ -186,7 +186,7 @@ def transform_preds(coords, center, scale, output_size, use_udp=False):
         scale_x = scale[0] / output_size[0]
         scale_y = scale[1] / output_size[1]
 
-    target_coords = np.ones_like(coords)
+    target_coords = coords.copy()
     target_coords[:, 0] = coords[:, 0] * scale_x + center[0] - scale[0] * 0.5
     target_coords[:, 1] = coords[:, 1] * scale_y + center[1] - scale[1] * 0.5
 
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py b/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
index 3941f25a39..f26c59f683 100644
--- a/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_base_dataset.py
@@ -44,6 +44,8 @@ def __init__(self,
         self.base_sigma = data_cfg['base_sigma']
         self.int_sigma = False
 
+        self.ann_info['add_neck'] = data_cfg.get('add_neck', False)
+
         self.ann_info['image_size'] = np.array(data_cfg['image_size'])
         self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size'])
         self.ann_info['num_joints'] = data_cfg['num_joints']
@@ -52,6 +54,7 @@ def __init__(self,
 
         self.ann_info['inference_channel'] = data_cfg['inference_channel']
         self.ann_info['dataset_channel'] = data_cfg['dataset_channel']
+        self.ann_info['add_neck'] = data_cfg.get('add_neck', False)
 
         self.use_nms = data_cfg.get('use_nms', False)
         self.soft_nms = data_cfg.get('soft_nms', True)
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py b/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
index 02fe73a816..99354b84d7 100644
--- a/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_coco.py
@@ -57,24 +57,47 @@ def __init__(self,
                  test_mode=False):
         super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode)
 
-        self.ann_info['flip_index'] = [
-            0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15
-        ]
-
         self.ann_info['use_different_joint_weights'] = False
-        self.ann_info['joint_weights'] = np.array(
-            [
-                1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2,
-                1.2, 1.5, 1.5
-            ],
-            dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
-
-        # joint index starts from 1
-        self.ann_info['skeleton'] = [[16, 14], [14, 12], [17, 15], [15, 13],
-                                     [12, 13], [6, 12], [7, 13], [6, 7],
-                                     [6, 8], [7, 9], [8, 10], [9, 11], [2, 3],
-                                     [1, 2], [1, 3], [2, 4], [3, 5], [4, 6],
-                                     [5, 7]]
+
+        if self.ann_info['add_neck']:
+            self.ann_info['flip_index'] = [
+                0, 1, 5, 6, 7, 2, 3, 4, 11, 12, 13, 8, 9, 10, 15, 14, 17, 16
+            ]
+
+            # joint index starts from 0
+            self.ann_info['skeleton'] = [[1, 8], [8, 9], [9, 10], [1, 11],
+                                         [11, 12], [12, 13], [1, 2], [2, 3],
+                                         [3, 4], [2, 16], [1, 5], [5, 6],
+                                         [6, 7], [5, 17], [1, 0], [0, 14],
+                                         [0, 15], [14, 16], [15, 17]]
+
+            self.ann_info['joint_weights'] = np.array(
+                [
+                    1., 1., 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2,
+                    1.5, 1., 1., 1., 1.
+                ],
+                dtype=np.float32).reshape((self.ann_info['num_joints'] + 1, 1))
+
+        else:
+            self.ann_info['flip_index'] = [
+                0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15
+            ]
+
+            # joint index starts from 0
+            self.ann_info['skeleton'] = [[15, 13], [13, 11], [16,
+                                                              14], [14, 12],
+                                         [11, 12], [5, 11], [6, 12], [5, 6],
+                                         [5, 7], [6, 8], [7, 9], [8,
+                                                                  10], [1, 2],
+                                         [0, 1], [0, 2], [1, 3], [2, 4],
+                                         [3, 5], [4, 6]]
+
+            self.ann_info['joint_weights'] = np.array(
+                [
+                    1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1.,
+                    1.2, 1.2, 1.5, 1.5
+                ],
+                dtype=np.float32).reshape((self.ann_info['num_joints'], 1))
 
         # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/'
         # 'pycocotools/cocoeval.py#L523'
@@ -144,10 +167,40 @@ def _get_single(self, idx):
 
         mask = self._get_mask(anno, idx)
         anno = [
-            obj for obj in anno
+            obj.copy() for obj in anno
             if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
         ]
 
+        if self.ann_info['add_neck']:
+            reorder_map = [
+                0, 6, 8, 10, 5, 7, 9, 12, 14, 16, 11, 13, 15, 2, 1, 4, 3
+            ]
+            for obj in anno:
+                keypoints = np.array(obj['keypoints']).reshape(
+                    self.ann_info['num_joints'], 3)
+                converted_keypoints = np.zeros(
+                    (self.ann_info['num_joints'] + 1, 3))
+                for i, idx in enumerate(reorder_map):
+                    if i == 0:
+                        converted_keypoints[i] = keypoints[idx]
+                    else:
+                        converted_keypoints[i + 1] = keypoints[idx]
+
+                # Add neck as a mean of shoulders
+                converted_keypoints[1, 0] = (keypoints[5, 0] +
+                                             keypoints[6, 0]) / 2
+                converted_keypoints[1, 1] = (keypoints[5, 1] +
+                                             keypoints[6, 1]) / 2
+                if keypoints[5][2] == 2 and keypoints[6][2] == 2:
+                    converted_keypoints[1][2] = 2
+                elif keypoints[5][2] == 0 or keypoints[6][2] == 0:
+                    converted_keypoints[1][2] = 0
+                else:
+                    converted_keypoints[1][2] = 1
+
+                keypoints = list(converted_keypoints.reshape(-1))
+                obj['keypoints'] = keypoints
+
         joints = self._get_joints(anno)
         mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])]
         joints_list = [
@@ -168,14 +221,24 @@ def _get_joints(self, anno):
         num_people = len(anno)
 
         if self.ann_info['scale_aware_sigma']:
-            joints = np.zeros((num_people, self.ann_info['num_joints'], 4),
-                              dtype=np.float32)
+            if self.ann_info['add_neck']:
+                joints = np.zeros(
+                    (num_people, self.ann_info['num_joints'] + 1, 4),
+                    dtype=np.float32)
+            else:
+                joints = np.zeros((num_people, self.ann_info['num_joints'], 4),
+                                  dtype=np.float32)
         else:
-            joints = np.zeros((num_people, self.ann_info['num_joints'], 3),
-                              dtype=np.float32)
+            if self.ann_info['add_neck']:
+                joints = np.zeros(
+                    (num_people, self.ann_info['num_joints'] + 1, 3),
+                    dtype=np.float32)
+            else:
+                joints = np.zeros((num_people, self.ann_info['num_joints'], 3),
+                                  dtype=np.float32)
 
         for i, obj in enumerate(anno):
-            joints[i, :self.ann_info['num_joints'], :3] = \
+            joints[i, :, :3] = \
                 np.array(obj['keypoints']).reshape([-1, 3])
             if self.ann_info['scale_aware_sigma']:
                 # get person box
@@ -255,11 +318,14 @@ def evaluate(self, outputs, res_folder, metric='mAP', **kwargs):
 
         kpts = defaultdict(list)
         # iterate over images
+        order_map = [0, 15, 14, 17, 16, 5, 2, 6, 3, 7, 4, 11, 8, 12, 9, 13, 10]
         for idx, _preds in enumerate(preds):
             str_image_path = image_paths[idx]
             image_id = self.name2id[os.path.basename(str_image_path)]
             # iterate over people
             for idx_person, kpt in enumerate(_preds):
+                if self.ann_info['add_neck']:
+                    kpt = kpt[order_map]
                 # use bbox area
                 area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * (
                     np.max(kpt[:, 1]) - np.min(kpt[:, 1]))
diff --git a/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py b/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
index 8abccabbba..0aac6f1c38 100644
--- a/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
+++ b/mmpose/datasets/datasets/bottom_up/bottom_up_coco_wholebody.py
@@ -49,6 +49,43 @@ def __init__(self,
         self.ann_info['flip_index'] = self.get_flip_index_from_flip_pairs(
             self.ann_info['flip_pairs'])
 
+        # joint index starts from 0
+        skeleton_body = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12],
+                         [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9],
+                         [8, 10], [1, 2], [0, 1], [0, 2], [1, 3], [2, 4],
+                         [3, 5], [4, 6]]
+        skeleton_foot = [[15, 17], [15, 18], [15, 19], [16, 20], [16, 21],
+                         [16, 22]]
+        skeleton_face = [[0, 53], [53, 52], [52, 51], [51, 50], [50, 65],
+                         [65, 66], [66, 67], [67, 68], [65, 70], [70, 69],
+                         [50, 62], [62, 61], [61, 60], [60, 59], [62, 63],
+                         [63, 64], [50, 45], [45, 46], [46, 47], [47, 48],
+                         [48, 49], [50, 44], [44, 43], [43, 42], [42, 41],
+                         [41, 40], [53, 56], [56, 57], [57, 58], [56, 55],
+                         [55, 54], [56, 74], [74, 75], [75, 76], [76, 77],
+                         [74, 73], [73, 72], [72, 71], [74, 85], [85, 86],
+                         [86, 87], [85, 84], [84, 83], [85, 89], [89, 88],
+                         [89, 90], [89, 80], [80, 79], [79, 78], [80, 81],
+                         [81, 82], [80, 31], [31, 32], [32, 33], [33, 34],
+                         [34, 35], [35, 36], [36, 37], [37, 38], [38, 39],
+                         [31, 30], [30, 29], [29, 28], [28, 27], [27, 26],
+                         [26, 25], [25, 24], [24, 23]]
+        skeleton_lefthand = [[9, 91], [91, 92], [92, 93], [93, 94], [94, 95],
+                             [91, 96], [96, 97], [97, 98], [98, 99], [91, 100],
+                             [100, 101], [101, 102], [102, 103], [91, 104],
+                             [104, 105], [105, 106], [106, 107], [91, 108],
+                             [108, 109], [109, 110], [110, 111]]
+        skeleton_righthand = [[10, 112], [112, 113], [113, 114], [114, 115],
+                              [115, 116], [112, 117], [117, 118], [118, 119],
+                              [119, 120], [112, 121], [121, 122], [122, 123],
+                              [123, 124], [112, 125], [125, 126], [126, 127],
+                              [127, 128], [112, 129], [129, 130], [130, 131],
+                              [131, 132]]
+
+        self.ann_info['skeleton'] = (
+            skeleton_body + skeleton_foot + skeleton_face + skeleton_lefthand +
+            skeleton_righthand)
+
         self.ann_info['use_different_joint_weights'] = False
         self.ann_info['joint_weights'] = \
             np.ones((self.ann_info['num_joints'], 1), dtype=np.float32)
@@ -166,6 +203,22 @@ def _get_joints(self, anno):
 
         return joints
 
+    def _get_part_score(self, keypoints):
+        """Get part score for new evaluation tools."""
+        kpt_score = 0
+        valid_num = 0
+        num_joints = int(len(keypoints) / 3)
+        for n_jt in range(0, num_joints):
+            t_s = keypoints[n_jt * 3 + 2]
+            if t_s > 0.2:
+                kpt_score = kpt_score + t_s
+                valid_num = valid_num + 1
+        if valid_num != 0:
+            kpt_score = kpt_score / valid_num
+        part_score = kpt_score
+
+        return float(part_score)
+
     def _coco_keypoint_results_one_category_kernel(self, data_pack):
         """Get coco keypoint results."""
         cat_id = data_pack['cat_id']
@@ -211,6 +264,18 @@ def _coco_keypoint_results_one_category_kernel(self, data_pack):
                     key_point[cuts[4]:cuts[5]].tolist(),
                     'score':
                     img_kpt['score'],
+                    # 'score':
+                    # self._get_part_score(key_point[cuts[0]:cuts[1]]),
+                    # 'foot_score':
+                    # self._get_part_score(key_point[cuts[1]:cuts[2]]),
+                    # 'face_score':
+                    # self._get_part_score(key_point[cuts[2]:cuts[3]]),
+                    # 'lefthand_score':
+                    # self._get_part_score(key_point[cuts[3]:cuts[4]]),
+                    # 'righthand_score':
+                    # self._get_part_score(key_point[cuts[4]:cuts[5]]),
+                    # 'wholebody_score':
+                    # img_kpt['score'],
                     'bbox': [left_top[0], left_top[1], w, h]
                 })
 
diff --git a/mmpose/datasets/pipelines/bottom_up_transform.py b/mmpose/datasets/pipelines/bottom_up_transform.py
index de43945e42..461959483d 100644
--- a/mmpose/datasets/pipelines/bottom_up_transform.py
+++ b/mmpose/datasets/pipelines/bottom_up_transform.py
@@ -138,13 +138,26 @@ class HeatmapGenerator:
             Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
     """
 
-    def __init__(self, output_size, num_joints, sigma=-1, use_udp=False):
+    def __init__(self,
+                 output_size,
+                 num_joints,
+                 sigma=-1,
+                 use_udp=False,
+                 add_neck=False,
+                 with_bg=False):
         self.output_size = output_size
         self.num_joints = num_joints
+        self.add_neck = add_neck
+        self.with_bg = with_bg
+
+        if self.add_neck:
+            self.num_joints += 1
+
         if sigma < 0:
             sigma = self.output_size / 64
         self.sigma = sigma
         size = 6 * sigma + 3
+
         self.use_udp = use_udp
         if use_udp:
             self.x = np.arange(0, size, 1, np.float32)
@@ -157,8 +170,15 @@ def __init__(self, output_size, num_joints, sigma=-1, use_udp=False):
 
     def __call__(self, joints):
         """Generate heatmaps."""
-        hms = np.zeros((self.num_joints, self.output_size, self.output_size),
-                       dtype=np.float32)
+        if self.with_bg:
+            hms = np.zeros(
+                (self.num_joints + 1, self.output_size, self.output_size),
+                dtype=np.float32)
+        else:
+            hms = np.zeros(
+                (self.num_joints, self.output_size, self.output_size),
+                dtype=np.float32)
+
         sigma = self.sigma
         for p in joints:
             for idx, pt in enumerate(p):
@@ -189,6 +209,8 @@ def __call__(self, joints):
                     hms[idx, aa:bb,
                         cc:dd] = np.maximum(hms[idx, aa:bb, cc:dd], g[a:b,
                                                                       c:d])
+        if self.with_bg:
+            hms[-1] = 1 - np.max(hms[:-1], axis=0)
         return hms
 
 
@@ -283,17 +305,20 @@ def _accumulate_paf_map_(self, pafs, src, dst, count):
         min_y = max(np.floor(min(src[1], dst[1]) - self.limb_width), 0)
         max_y = min(
             np.ceil(max(src[1], dst[1]) + self.limb_width),
-            self.output_size + 1)
+            self.output_size - 1)
 
         range_x = list(range(int(min_x), int(max_x + 1), 1))
         range_y = list(range(int(min_y), int(max_y + 1), 1))
-        xx, yy = np.meshgrid(range_x, range_y)
-        delta_x = xx - src[0]
-        delta_y = yy - src[1]
-        dist = np.abs(delta_x * unit_limb_vec[1] - delta_y * unit_limb_vec[0])
-        mask_local = (dist < self.limb_width)
+
         mask = np.zeros_like(count, dtype=bool)
-        mask[xx, yy] = mask_local
+        if len(range_x) > 0 and len(range_y) > 0:
+            xx, yy = np.meshgrid(range_x, range_y)
+            delta_x = xx - src[0]
+            delta_y = yy - src[1]
+            dist = np.abs(delta_x * unit_limb_vec[1] -
+                          delta_y * unit_limb_vec[0])
+            mask_local = (dist < self.limb_width)
+            mask[yy, xx] = mask_local
 
         pafs[0, mask] += unit_limb_vec[0]
         pafs[1, mask] += unit_limb_vec[1]
@@ -312,8 +337,8 @@ def __call__(self, joints):
                              dtype=np.float32)
 
             for p in joints:
-                src = p[sk[0] - 1]
-                dst = p[sk[1] - 1]
+                src = p[sk[0]]
+                dst = p[sk[1]]
                 if src[2] > 0 and dst[2] > 0:
                     self._accumulate_paf_map_(pafs[2 * idx:2 * idx + 2],
                                               src[:2], dst[:2], count)
@@ -347,9 +372,9 @@ def __call__(self, results):
         assert len(mask) == len(self.output_size)
 
         if np.random.random() < self.flip_prob:
-            image = image[:, ::-1] - np.zeros_like(image)
+            image = image[:, ::-1].copy() - np.zeros_like(image)
             for i, _output_size in enumerate(self.output_size):
-                mask[i] = mask[i][:, ::-1]
+                mask[i] = mask[i][:, ::-1].copy()
                 joints[i] = joints[i][:, self.flip_index]
                 joints[i][:, :, 0] = _output_size - joints[i][:, :, 0] - 1
         results['img'], results['mask'], results[
@@ -517,14 +542,17 @@ class BottomUpGenerateHeatmapTarget:
             Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
     """
 
-    def __init__(self, sigma, use_udp=False):
+    def __init__(self, sigma, use_udp=False, add_neck=False, with_bg=False):
         self.sigma = sigma
         self.use_udp = use_udp
+        self.add_neck = add_neck
+        self.with_bg = with_bg
 
     def _generate(self, num_joints, heatmap_size):
         """Get heatmap generator."""
         heatmap_generator = [
-            HeatmapGenerator(output_size, num_joints, self.sigma, self.use_udp)
+            HeatmapGenerator(output_size, num_joints, self.sigma, self.use_udp,
+                             self.add_neck, self.with_bg)
             for output_size in heatmap_size
         ]
         return heatmap_generator
@@ -535,19 +563,22 @@ def __call__(self, results):
             self._generate(results['ann_info']['num_joints'],
                            results['ann_info']['heatmap_size'])
         target_list = list()
-        joints_list = results['joints']
+        mask_list, joints_list = results['mask'], results['joints']
 
         for scale_id in range(results['ann_info']['num_scales']):
             heatmaps = heatmap_generator[scale_id](joints_list[scale_id])
             target_list.append(heatmaps.astype(np.float32))
-        results['target'] = target_list
+            mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
+
+        results['targets'] = target_list
+        results['masks'] = mask_list
 
         return results
 
 
 @PIPELINES.register_module()
 class BottomUpGenerateTarget:
-    """Generate multi-scale heatmap target for bottom-up.
+    """Generate multi-scale heatmap target for associate embedding.
 
     Args:
         sigma (int): Sigma of heatmap Gaussian
@@ -625,21 +656,20 @@ def __call__(self, results):
         if self.skeleton is None:
             assert results['ann_info']['skeleton'] is not None
             self.skeleton = results['ann_info']['skeleton']
-        else:
-            assert np.array(
-                self.skeleton).max() < results['ann_info']['num_joints']
 
         paf_generator = \
             self._generate(results['ann_info']['heatmap_size'],
                            self.skeleton)
         target_list = list()
-        joints_list = results['joints']
+        mask_list, joints_list = results['mask'], results['joints']
 
         for scale_id in range(results['ann_info']['num_scales']):
             pafs = paf_generator[scale_id](joints_list[scale_id])
             target_list.append(pafs.astype(np.float32))
+            mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
 
-        results['target'] = target_list
+        results['targets'] = target_list
+        results['masks'] = mask_list
 
         return results
 
@@ -651,16 +681,19 @@ class BottomUpGetImgSize:
     `results['ann_info']['image_size']×current_scale`.
 
     Args:
-        test_scale_factor (List[float]): Multi scale
+        test_scale_factor (List[float]): Multi scale.
+        max_input_size (int): Constraint of the max input size.
         current_scale (int): default 1
         use_udp (bool): To use unbiased data processing.
             Paper ref: Huang et al. The Devil is in the Details: Delving into
             Unbiased Data Processing for Human Pose Estimation (CVPR 2020).
     """
 
-    def __init__(self, test_scale_factor, current_scale=1, use_udp=False):
+    def __init__(self, test_scale_factor, max_input_size=None,
+                 current_scale=1, use_udp=False):
         self.test_scale_factor = test_scale_factor
         self.min_scale = min(test_scale_factor)
+        self.max_input_size = max_input_size
         self.current_scale = current_scale
         self.use_udp = use_udp
 
@@ -701,7 +734,18 @@ def __call__(self, results):
             center = (scale_w / 2.0, scale_h / 2.0)
         else:
             center = np.array([round(w / 2.0), round(h / 2.0)])
-        results['ann_info']['test_scale_factor'] = self.test_scale_factor
+
+        # calculate the test scale factor
+        if self.max_input_size is not None:
+            test_scale_factor = np.array(self.test_scale_factor)
+            accept_scale_w = (test_scale_factor * w_resized) < self.max_input_size
+            accept_scale_h = (test_scale_factor * h_resized) < self.max_input_size
+            keep = (accept_scale_w * accept_scale_h) > 0
+            test_scale_factor = test_scale_factor[keep].tolist()
+        else:
+            test_scale_factor = self.test_scale_factor
+
+        results['ann_info']['test_scale_factor'] = test_scale_factor
         results['ann_info']['base_size'] = (w_resized, h_resized)
         results['ann_info']['center'] = center
         results['ann_info']['scale'] = np.array([scale_w, scale_h])
diff --git a/mmpose/datasets/pipelines/shared_transform.py b/mmpose/datasets/pipelines/shared_transform.py
index 15e837c9a8..352418e331 100644
--- a/mmpose/datasets/pipelines/shared_transform.py
+++ b/mmpose/datasets/pipelines/shared_transform.py
@@ -414,29 +414,31 @@ class MultitaskGatherTarget:
         pipeline_indices (list[int]): Pipeline index of each head.
     """
 
-    def __init__(self, pipeline_list, pipeline_indices):
+    def __init__(self,
+                 pipeline_list,
+                 pipeline_indices=None,
+                 keys=('target', 'target_weight')):
+        self.keys = keys
         self.pipelines = []
         for pipeline in pipeline_list:
             self.pipelines.append(Compose(pipeline))
-        self.pipeline_indices = pipeline_indices
+        if pipeline_indices is None:
+            self.pipeline_indices = list(range(len(pipeline_list)))
+        else:
+            self.pipeline_indices = pipeline_indices
 
     def __call__(self, results):
         # generate target and target weights using all pipelines
-        _target, _target_weight = [], []
+        pipeline_outputs = []
         for pipeline in self.pipelines:
-            results_head = pipeline(results)
-            _target.append(results_head['target'])
-            _target_weight.append(results_head['target_weight'])
-
-        # reorganize generated target, target_weights according
-        # to self.pipelines_indices
-        target, target_weight = [], []
-        for ind in self.pipeline_indices:
-            target.append(_target[ind])
-            target_weight.append(_target_weight[ind])
-
-        results['target'] = target
-        results['target_weight'] = target_weight
+            pipeline_output = pipeline(results)
+            pipeline_outputs.append(pipeline_output.copy())
+
+        for key in self.keys:
+            result_key = []
+            for ind in self.pipeline_indices:
+                result_key.append(pipeline_outputs[ind].get(key, None))
+            results[key] = result_key
         return results
 
 
diff --git a/mmpose/models/backbones/__init__.py b/mmpose/models/backbones/__init__.py
index eb3959d495..45bb35fb24 100644
--- a/mmpose/models/backbones/__init__.py
+++ b/mmpose/models/backbones/__init__.py
@@ -2,9 +2,12 @@
 from .cpm import CPM
 from .hourglass import HourglassNet
 from .hrnet import HRNet
+from .lightweight_openpose import LightweightOpenPoseNetwork
 from .mobilenet_v2 import MobileNetV2
 from .mobilenet_v3 import MobileNetV3
 from .mspn import MSPN
+from .openpose_v1 import OpenPoseNetworkV1
+from .openpose_v2 import OpenPoseNetworkV2
 from .regnet import RegNet
 from .resnest import ResNeSt
 from .resnet import ResNet, ResNetV1d
@@ -22,5 +25,6 @@
     'AlexNet', 'HourglassNet', 'HRNet', 'MobileNetV2', 'MobileNetV3', 'RegNet',
     'ResNet', 'ResNetV1d', 'ResNeXt', 'SCNet', 'SEResNet', 'SEResNeXt',
     'ShuffleNetV1', 'ShuffleNetV2', 'CPM', 'RSN', 'MSPN', 'ResNeSt', 'VGG',
-    'TCN'
+    'TCN', 'OpenPoseNetworkV1', 'OpenPoseNetworkV2',
+    'LightweightOpenPoseNetwork'
 ]
diff --git a/mmpose/models/backbones/cpm.py b/mmpose/models/backbones/cpm.py
index 3aa09523ed..1659589bac 100644
--- a/mmpose/models/backbones/cpm.py
+++ b/mmpose/models/backbones/cpm.py
@@ -14,22 +14,34 @@
 class CpmBlock(nn.Module):
     """CpmBlock for Convolutional Pose Machine.
 
-    Generate module recursively and use BasicBlock as the base unit.
-
     Args:
         in_channels (int): Input channels of this block.
-        out_channels (int): Output channels of this block.
+        channels (list): Output channels of each conv module.
+        kernels (list): Kernel sizes of each conv module.
     """
 
-    def __init__(self, in_channels, out_channels, norm_cfg=None):
+    def __init__(self,
+                 in_channels,
+                 channels=(128, 128, 128),
+                 kernels=(11, 11, 11),
+                 norm_cfg=None):
         super().__init__()
-        self.model = nn.Sequential(
-            ConvModule(
-                in_channels, out_channels, 11, padding=5, norm_cfg=norm_cfg),
-            ConvModule(
-                out_channels, out_channels, 11, padding=5, norm_cfg=norm_cfg),
-            ConvModule(
-                out_channels, out_channels, 11, padding=5, norm_cfg=norm_cfg))
+
+        assert len(channels) == len(kernels)
+        layers = []
+        for i in range(len(channels)):
+            if i == 0:
+                input_channels = in_channels
+            else:
+                input_channels = channels[i - 1]
+            layers.append(
+                ConvModule(
+                    input_channels,
+                    channels[i],
+                    kernels[i],
+                    padding=(kernels[i] - 1) // 2,
+                    norm_cfg=norm_cfg))
+        self.model = nn.Sequential(*layers)
 
     def forward(self, x):
         """Model forward function."""
@@ -107,8 +119,11 @@ def __init__(self,
             nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
 
         self.cpm_stages = nn.ModuleList([
-            CpmBlock(middle_channels + out_channels, feat_channels, norm_cfg)
-            for _ in range(num_stages - 1)
+            CpmBlock(
+                middle_channels + out_channels,
+                channels=[feat_channels, feat_channels, feat_channels],
+                kernels=[11, 11, 11],
+                norm_cfg=norm_cfg) for _ in range(num_stages - 1)
         ])
 
         self.middle_conv = nn.ModuleList([
diff --git a/mmpose/models/backbones/lightweight_openpose.py b/mmpose/models/backbones/lightweight_openpose.py
new file mode 100644
index 0000000000..3ca73ab22b
--- /dev/null
+++ b/mmpose/models/backbones/lightweight_openpose.py
@@ -0,0 +1,487 @@
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint
+
+
+class CpmLayer(nn.Module):
+    """A CPM-type layer.
+
+    Args:
+        in_channels (int): The input channels.
+        out_channels (int): The output channels.
+    """
+
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.pre_conv = ConvModule(
+            in_channels, out_channels, 1, padding=0, norm_cfg=None, bias=True)
+        self.feat = nn.Sequential(
+            ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                groups=out_channels,
+                bias=False,
+                act_cfg=dict(type='ELU'),
+                norm_cfg=None),
+            ConvModule(
+                out_channels,
+                out_channels,
+                1,
+                padding=0,
+                bias=False,
+                act_cfg=dict(type='ELU'),
+                norm_cfg=None),
+            ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                groups=out_channels,
+                bias=False,
+                act_cfg=dict(type='ELU'),
+                norm_cfg=None),
+            ConvModule(
+                out_channels,
+                out_channels,
+                1,
+                padding=0,
+                bias=False,
+                act_cfg=dict(type='ELU'),
+                norm_cfg=None),
+            ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                groups=out_channels,
+                bias=False,
+                act_cfg=dict(type='ELU'),
+                norm_cfg=None),
+            ConvModule(
+                out_channels,
+                out_channels,
+                1,
+                padding=0,
+                bias=False,
+                act_cfg=dict(type='ELU'),
+                norm_cfg=None))
+        self.out_conv = ConvModule(
+            out_channels, out_channels, 3, padding=1, norm_cfg=None, bias=True)
+
+    def forward(self, x):
+        x = self.pre_conv(x)
+        x = self.out_conv(x + self.feat(x))
+        return x
+
+
+class InitialStage(nn.Module):
+    """The initial stage.
+
+    Args:
+        in_channels (int): The input channels.
+        mid_channels (int): The middle-layer channels.
+        out_channels_cm (int): The output channels for CM (
+            confidence map, or heatmap).
+        out_channels_paf (int): The output channels for PAF (
+            part-affinity field).
+    """
+
+    def __init__(self, in_channels, mid_channels, out_channels_cm,
+                 out_channels_paf):
+        super().__init__()
+        self.feat = nn.Sequential(
+            ConvModule(
+                in_channels,
+                in_channels,
+                3,
+                padding=1,
+                norm_cfg=None,
+                bias=True),
+            ConvModule(
+                in_channels,
+                in_channels,
+                3,
+                padding=1,
+                norm_cfg=None,
+                bias=True),
+            ConvModule(
+                in_channels,
+                in_channels,
+                3,
+                padding=1,
+                norm_cfg=None,
+                bias=True))
+        self.cm_out_conv = nn.Sequential(
+            ConvModule(
+                in_channels,
+                mid_channels,
+                kernel_size=1,
+                padding=0,
+                norm_cfg=None,
+                bias=True),
+            ConvModule(
+                mid_channels,
+                out_channels_cm,
+                kernel_size=1,
+                padding=0,
+                norm_cfg=None,
+                act_cfg=None,
+                bias=True))
+        self.paf_out_conv = nn.Sequential(
+            ConvModule(
+                in_channels,
+                mid_channels,
+                kernel_size=1,
+                padding=0,
+                norm_cfg=None,
+                bias=True),
+            ConvModule(
+                mid_channels,
+                out_channels_paf,
+                kernel_size=1,
+                padding=0,
+                norm_cfg=None,
+                act_cfg=None,
+                bias=True))
+
+    def forward(self, x):
+        features = self.feat(x)
+        cm_output = self.cm_out_conv(features)
+        paf_output = self.paf_out_conv(features)
+        return [cm_output, paf_output]
+
+
+class RefinementStageBlock(nn.Module):
+    """The block for the refinement stage.
+
+    Args:
+        in_channels (int): The input channels.
+        out_channels (int): The output channels.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 norm_cfg=dict(type='BN', requires_grad=True)):
+        super().__init__()
+        self.pre_conv = ConvModule(
+            in_channels, out_channels, 1, padding=0, norm_cfg=None, bias=True)
+        self.feat = nn.Sequential(
+            ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                padding=1,
+                norm_cfg=norm_cfg,
+                bias=True),
+            ConvModule(
+                out_channels,
+                out_channels,
+                3,
+                dilation=2,
+                padding=2,
+                norm_cfg=norm_cfg,
+                bias=True))
+
+    def forward(self, x):
+        pre_features = self.pre_conv(x)
+        features = self.feat(pre_features)
+        return pre_features + features
+
+
+class RefinementStage(nn.Module):
+    """The refinement stage.
+
+    Args:
+        in_channels (int): The input channels.
+        mid_channels (int): The middle-layer channels.
+        out_channels_cm (int): The output channels for CM (
+            confidence map, or heatmap).
+        out_channels_paf (int): The output channels for PAF (
+            part-affinity field).
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 mid_channels,
+                 out_channels_cm,
+                 out_channels_paf,
+                 norm_cfg=dict(type='BN', requires_grad=True)):
+        super().__init__()
+        self.feat = nn.Sequential(
+            RefinementStageBlock(in_channels, mid_channels, norm_cfg),
+            RefinementStageBlock(mid_channels, mid_channels, norm_cfg),
+            RefinementStageBlock(mid_channels, mid_channels, norm_cfg),
+            RefinementStageBlock(mid_channels, mid_channels, norm_cfg),
+            RefinementStageBlock(mid_channels, mid_channels, norm_cfg))
+        self.cm_out_conv = nn.Sequential(
+            ConvModule(
+                mid_channels,
+                mid_channels,
+                1,
+                padding=0,
+                norm_cfg=None,
+                bias=True),
+            ConvModule(
+                mid_channels,
+                out_channels_cm,
+                1,
+                padding=0,
+                norm_cfg=None,
+                act_cfg=None,
+                bias=True))
+        self.paf_out_conv = nn.Sequential(
+            ConvModule(
+                mid_channels,
+                mid_channels,
+                1,
+                padding=0,
+                norm_cfg=None,
+                bias=True),
+            ConvModule(
+                mid_channels,
+                out_channels_paf,
+                1,
+                padding=0,
+                norm_cfg=None,
+                act_cfg=None,
+                bias=True))
+
+    def forward(self, x):
+        features = self.feat(x)
+        cm_output = self.cm_out_conv(features)
+        paf_output = self.paf_out_conv(features)
+        return [cm_output, paf_output]
+
+
+@BACKBONES.register_module()
+class LightweightOpenPoseNetwork(BaseBackbone):
+    """Lightweight OpenPose backbone Network.
+
+    Real-time 2D Multi-Person Pose Estimation on
+    CPU: Lightweight OpenPose
+
+    More details can be found in the `paper
+    <https://arxiv.org/pdf/1811.12004>`__ .
+
+    Args:
+        in_channels (int): The input channels.
+        out_channels_cm (int): The output channels for CM (
+            confidence map, or heatmap).
+        out_channels_paf (int): The output channels for PAF (
+            part-affinity field).
+        stem_feat_channels (int): Feature channel of the stem network.
+        num_stages (int): Number of stages.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+
+    Example:
+        >>> from mmpose.models import LightweightOpenPoseNetwork
+        >>> import torch
+        >>> self = LightweightOpenPoseNetwork(3, 19, 38)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 3, 368, 368)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_output in level_outputs:
+        ...     print(tuple(level_output.shape))
+        (1, 19, 46, 46)
+        (1, 19, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels_cm=19,
+                 out_channels_paf=38,
+                 stem_feat_channels=128,
+                 num_stages=2,
+                 norm_cfg=dict(type='BN', requires_grad=True)):
+        # Protect mutable default arguments
+        norm_cfg = copy.deepcopy(norm_cfg)
+        super().__init__()
+
+        assert in_channels == 3
+
+        self.num_stages = num_stages
+        assert self.num_stages >= 1
+
+        self.features = nn.Sequential(
+            ConvModule(
+                in_channels,
+                32,
+                3,
+                stride=2,
+                padding=1,
+                norm_cfg=norm_cfg,
+                bias=False),
+            # conv_dw(32, 64)
+            ConvModule(
+                32, 32, 3, padding=1, groups=32, bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(32, 64, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(64, 128, stride=2)
+            ConvModule(
+                64,
+                64,
+                3,
+                stride=2,
+                padding=1,
+                groups=64,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(64, 128, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(128, 128)
+            ConvModule(
+                128,
+                128,
+                3,
+                padding=1,
+                groups=128,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(128, 128, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(128, 256, stride=2)
+            ConvModule(
+                128,
+                128,
+                3,
+                stride=2,
+                padding=1,
+                groups=128,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(128, 256, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(256, 256)
+            ConvModule(
+                256,
+                256,
+                3,
+                padding=1,
+                groups=256,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(256, 256, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(256, 512)
+            ConvModule(
+                256,
+                256,
+                3,
+                padding=1,
+                groups=256,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(256, 512, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(512, 512, dilation=2, padding=2)
+            ConvModule(
+                512,
+                512,
+                3,
+                padding=2,
+                dilation=2,
+                groups=512,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(512, 512, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(512, 512)
+            ConvModule(
+                512,
+                512,
+                3,
+                padding=1,
+                groups=512,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(512, 512, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(512, 512)
+            ConvModule(
+                512,
+                512,
+                3,
+                padding=1,
+                groups=512,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(512, 512, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(512, 512)
+            ConvModule(
+                512,
+                512,
+                3,
+                padding=1,
+                groups=512,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(512, 512, 1, padding=0, bias=False, norm_cfg=norm_cfg),
+            # conv_dw(512, 512)
+            ConvModule(
+                512,
+                512,
+                3,
+                padding=1,
+                groups=512,
+                bias=False,
+                norm_cfg=norm_cfg),
+            ConvModule(512, 512, 1, padding=0, bias=False, norm_cfg=norm_cfg))
+
+        self.cpm = CpmLayer(512, stem_feat_channels)
+
+        self.initial_stage = InitialStage(stem_feat_channels, 512,
+                                          out_channels_cm, out_channels_paf)
+        self.refinement_stages = nn.ModuleList()
+        for idx in range(num_stages - 1):
+            self.refinement_stages.append(
+                RefinementStage(
+                    stem_feat_channels + out_channels_cm + out_channels_paf,
+                    stem_feat_channels,
+                    out_channels_cm,
+                    out_channels_paf,
+                    norm_cfg=norm_cfg))
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                normal_init(m, std=0.001)
+            elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                constant_init(m, 1)
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is not None:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Model forward function."""
+        stem_feat = self.cpm(self.features(x))
+
+        cm_outputs = []
+        paf_outputs = []
+
+        cm_output, paf_output = self.initial_stage(stem_feat)
+        cm_outputs.append(cm_output)
+        paf_outputs.append(paf_output)
+
+        for refinement_stage in self.refinement_stages:
+            cm_output, paf_output = refinement_stage(
+                torch.cat([stem_feat, cm_outputs[-1], paf_outputs[-1]], dim=1))
+            cm_outputs.append(cm_output)
+            paf_outputs.append(paf_output)
+
+        return [*cm_outputs, *paf_outputs]
diff --git a/mmpose/models/backbones/openpose_v1.py b/mmpose/models/backbones/openpose_v1.py
new file mode 100644
index 0000000000..3a49fbc0ef
--- /dev/null
+++ b/mmpose/models/backbones/openpose_v1.py
@@ -0,0 +1,186 @@
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .cpm import CpmBlock
+from .utils import load_checkpoint
+
+
+@BACKBONES.register_module()
+class OpenPoseNetworkV1(BaseBackbone):
+    """OpenPose backbone Network.
+
+    Realtime Multi-Person 2D Pose Estimation using Part Affinity Fields.
+    More details can be found in the `paper
+    <https://arxiv.org/pdf/1611.08050>`__ .
+
+    Based on the officially released model
+    'https://github.com/CMU-Perceptual-Computing-Lab/openpose/
+    blob/master/models/pose/coco/pose_deploy_linevec.prototxt'
+
+    Args:
+        in_channels (int): The input channels.
+        out_channels_cm (int): The output channels for CM (
+            confidence map, or heatmap).
+        out_channels_paf (int): The output channels for PAF (
+            part-affinity field).
+        stem_feat_channels (int): Feature channel of the stem network.
+        num_stages (int): Number of stages.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+
+    Example:
+        >>> from mmpose.models import OpenPoseNetworkV1
+        >>> import torch
+        >>> self = OpenPoseNetwork(3, 19, 38)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 3, 368, 368)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_output in level_outputs:
+        ...     print(tuple(level_output.shape))
+        (1, 19, 46, 46)
+        (1, 19, 46, 46)
+        (1, 19, 46, 46)
+        (1, 19, 46, 46)
+        (1, 19, 46, 46)
+        (1, 19, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels_cm=19,
+                 out_channels_paf=38,
+                 stem_feat_channels=128,
+                 num_stages=6,
+                 norm_cfg=dict(type='BN', requires_grad=True)):
+        # Protect mutable default arguments
+        norm_cfg = copy.deepcopy(norm_cfg)
+        super().__init__()
+
+        assert in_channels == 3
+
+        self.num_stages = num_stages
+        assert self.num_stages >= 1
+
+        self.features = nn.Sequential(
+            ConvModule(
+                in_channels, 64, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(64, 64, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            ConvModule(64, 128, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(128, 128, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            ConvModule(128, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(256, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(256, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(256, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            ConvModule(256, 512, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(512, 512, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(512, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(
+                256,
+                stem_feat_channels,
+                3,
+                padding=1,
+                norm_cfg=norm_cfg,
+                bias=True))
+
+        # stage 0
+        self.cm_stages = nn.ModuleList([
+            CpmBlock(stem_feat_channels, [
+                stem_feat_channels, stem_feat_channels, stem_feat_channels, 512
+            ], [3, 3, 3, 1], norm_cfg)
+        ])
+        self.paf_stages = nn.ModuleList([
+            CpmBlock(stem_feat_channels, [
+                stem_feat_channels, stem_feat_channels, stem_feat_channels, 512
+            ], [3, 3, 3, 1], norm_cfg)
+        ])
+
+        # stage 1 to n-1
+        for _ in range(1, self.num_stages):
+            self.cm_stages.append(
+                CpmBlock(
+                    stem_feat_channels + out_channels_cm + out_channels_paf, [
+                        stem_feat_channels, stem_feat_channels,
+                        stem_feat_channels, stem_feat_channels,
+                        stem_feat_channels, stem_feat_channels
+                    ], [7, 7, 7, 7, 7, 1], norm_cfg))
+            self.paf_stages.append(
+                CpmBlock(
+                    stem_feat_channels + out_channels_cm + out_channels_paf, [
+                        stem_feat_channels, stem_feat_channels,
+                        stem_feat_channels, stem_feat_channels,
+                        stem_feat_channels, stem_feat_channels
+                    ], [7, 7, 7, 7, 7, 1], norm_cfg))
+
+        self.cm_out_convs = nn.ModuleList()
+        self.paf_out_convs = nn.ModuleList()
+
+        for i in range(self.num_stages):
+            if i == 0:
+                input_channels = 512
+            else:
+                input_channels = stem_feat_channels
+            self.cm_out_convs.append(
+                ConvModule(input_channels, out_channels_cm, 1, act_cfg=None))
+            self.paf_out_convs.append(
+                ConvModule(input_channels, out_channels_paf, 1, act_cfg=None))
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    normal_init(m, std=0.001)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Model forward function."""
+        stem_feat = self.features(x)
+        out_feats = []
+        out_feats.append(stem_feat)
+
+        cm_outputs = []
+        paf_outputs = []
+
+        for ind in range(self.num_stages):
+            cm_stage = self.cm_stages[ind]
+            paf_stage = self.paf_stages[ind]
+
+            cm_out_conv = self.cm_out_convs[ind]
+            paf_out_conv = self.paf_out_convs[ind]
+
+            cm_output = cm_out_conv(cm_stage(out_feats[-1]))
+            cm_outputs.append(cm_output)
+            paf_output = paf_out_conv(paf_stage(out_feats[-1]))
+            paf_outputs.append(paf_output)
+
+            out_feat = torch.cat([stem_feat, cm_output, paf_output], 1)
+
+            out_feats.append(out_feat)
+
+        return [*cm_outputs, *paf_outputs]
diff --git a/mmpose/models/backbones/openpose_v2.py b/mmpose/models/backbones/openpose_v2.py
new file mode 100644
index 0000000000..4166c698f7
--- /dev/null
+++ b/mmpose/models/backbones/openpose_v2.py
@@ -0,0 +1,303 @@
+import copy
+
+import torch
+import torch.nn as nn
+from mmcv.cnn import ConvModule, constant_init, normal_init
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmpose.utils import get_root_logger
+from ..builder import BACKBONES
+from .base_backbone import BaseBackbone
+from .utils import load_checkpoint
+
+
+class MconvBlock(nn.Module):
+    """MconvBlock for replacing convolutions of 7x7 kernel.
+
+    Args:
+        in_channels (int): Input channels of this block.
+        channels (list): Output channels of each conv module.
+        kernels (list): Kernel sizes of each conv module.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 channels=(96, 96, 96),
+                 kernels=(3, 3, 3),
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 act_cfg=dict(type='PReLU')):
+        super().__init__()
+
+        assert len(channels) == len(kernels)
+
+        self.num_layers = len(channels)
+
+        self.model = nn.ModuleList()
+        for i in range(self.num_layers):
+            if i == 0:
+                input_channels = in_channels
+            else:
+                input_channels = channels[i - 1]
+            self.model.append(
+                ConvModule(
+                    input_channels,
+                    channels[i],
+                    kernels[i],
+                    padding=(kernels[i] - 1) // 2,
+                    norm_cfg=norm_cfg,
+                    act_cfg=act_cfg))
+
+    def forward(self, x):
+        """Model forward function."""
+        feat = []
+        feat.append(x)
+        for i in range(self.num_layers):
+            feat.append(self.model[i](feat[-1]))
+        out = torch.cat([*feat[1:]], 1)
+        return out
+
+
+class MconvStage(nn.Module):
+    """MconvStage.
+
+    Args:
+        in_channels (int): Input channels of this block.
+        channels (list): Output channels of each conv module.
+        kernels (list): Kernel sizes of each conv module.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 num_blocks=5,
+                 block_channels=(96, 96, 96),
+                 block_kernels=(3, 3, 3),
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 act_cfg=dict(type='PReLU')):
+        super().__init__()
+
+        layers = []
+        for i in range(num_blocks):
+
+            if i == 0:
+                input_channels = in_channels
+            else:
+                input_channels = sum(block_channels)
+
+            layers.append(
+                MconvBlock(
+                    input_channels,
+                    channels=block_channels,
+                    kernels=block_kernels,
+                    norm_cfg=dict(type='BN', requires_grad=True),
+                    act_cfg=dict(type='PReLU')))
+
+        layers.append(
+            ConvModule(
+                sum(block_channels),
+                out_channels,
+                kernel_size=1,
+                padding=0,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg))
+
+        self.model = nn.Sequential(*layers)
+
+    def forward(self, x):
+        """Model forward function."""
+        out = self.model(x)
+        return out
+
+
+@BACKBONES.register_module()
+class OpenPoseNetworkV2(BaseBackbone):
+    """OpenPose backbone Network.
+
+    Open{P}ose: realtime multi-person 2{D} pose estimation
+    using {P}art {A}ffinity {F}ields.
+    More details can be found in the `paper
+    <https://arxiv.org/abs/1812.08008>`__ .
+
+    Based on the officially released model
+    'https://raw.githubusercontent.com/CMU-Perceptual-Computing-Lab/'
+    'openpose/master/models/pose/body_25/pose_deploy.prototxt'
+
+    Args:
+        in_channels (int): The input channels.
+        out_channels_cm (int): The output channels for CM (
+            confidence map, or heatmap).
+        out_channels_paf (int): The output channels for PAF (
+            part-affinity field).
+        stem_feat_channels (int): Feature channel of the stem network.
+        num_stages (int): Number of stages.
+        stage_types (list): Types can be 'CM' or 'PAF'.
+        num_blocks (int|list): Number of blocks in each stage. If
+            `num_blocks' is int, the same `num_blocks' will be used
+            for all stages.
+        block_channels (int|list): Number of block channels in each
+            stage. If `block_channels' is int, the same `block_channels'
+            will be used for all stages.
+        norm_cfg (dict): Dictionary to construct and config norm layer.
+        act_cfg (dict): Config dict for activation layer.
+
+    Example:
+        >>> from mmpose.models import OpenPoseNetworkV2
+        >>> import torch
+        >>> self = OpenPoseNetworkV2(3)
+        >>> self.eval()
+        >>> inputs = torch.rand(1, 3, 368, 368)
+        >>> level_outputs = self.forward(inputs)
+        >>> for level_output in level_outputs:
+        ...     print(tuple(level_output.shape))
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 38, 46, 46)
+        (1, 19, 46, 46)
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels_cm=19,
+                 out_channels_paf=38,
+                 stem_feat_channels=128,
+                 num_stages=6,
+                 stage_types=('PAF', 'PAF', 'PAF', 'PAF', 'PAF', 'CM'),
+                 num_blocks=5,
+                 block_channels=96,
+                 norm_cfg=dict(type='BN', requires_grad=True),
+                 act_cfg=dict(type='PReLU')):
+        # Protect mutable default arguments
+        norm_cfg = copy.deepcopy(norm_cfg)
+        super().__init__()
+
+        assert in_channels == 3
+        assert num_stages == len(stage_types)
+
+        if isinstance(num_blocks, int):
+            num_blocks = [num_blocks] * num_stages
+        if isinstance(block_channels, int):
+            block_channels = [block_channels] * num_stages
+
+        self.num_stages = num_stages
+        assert self.num_stages >= 1
+
+        self.features = nn.Sequential(
+            ConvModule(
+                in_channels, 64, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(64, 64, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            ConvModule(64, 128, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(128, 128, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            ConvModule(128, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(256, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(256, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(256, 256, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
+            ConvModule(256, 512, 3, padding=1, norm_cfg=norm_cfg, bias=True),
+            ConvModule(
+                512,
+                512,
+                3,
+                padding=1,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                bias=True),
+            ConvModule(
+                512,
+                256,
+                3,
+                padding=1,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                bias=True),
+            ConvModule(
+                256,
+                stem_feat_channels,
+                3,
+                padding=1,
+                norm_cfg=norm_cfg,
+                act_cfg=act_cfg,
+                bias=True))
+
+        self.stages = nn.ModuleList()
+        self.out_convs = nn.ModuleList()
+
+        for i, stage_type in enumerate(stage_types):
+            if i == 0:
+                input_channels = stem_feat_channels
+            else:
+                if stage_types[i - 1] == 'CM':
+                    input_channels = stem_feat_channels + out_channels_cm
+                else:
+                    # stage_types[i-1] == 'PAF':
+                    input_channels = stem_feat_channels + out_channels_paf
+
+            if stage_type.upper() == 'CM':
+                self.stages.append(
+                    MconvStage(
+                        input_channels,
+                        256,
+                        num_blocks=num_blocks[i],
+                        block_channels=[block_channels[i]] * num_blocks[i],
+                        block_kernels=[3] * num_blocks[i],
+                        norm_cfg=dict(type='BN', requires_grad=True),
+                        act_cfg=dict(type='PReLU')))
+                self.out_convs.append(
+                    ConvModule(256, out_channels_cm, 1, act_cfg=None))
+
+            elif stage_type.upper() == 'PAF':
+                self.stages.append(
+                    MconvStage(
+                        input_channels,
+                        256,
+                        num_blocks=num_blocks[i],
+                        block_channels=[block_channels[i]] * num_blocks[i],
+                        block_kernels=[3] * num_blocks[i],
+                        norm_cfg=dict(type='BN', requires_grad=True),
+                        act_cfg=dict(type='PReLU')))
+                self.out_convs.append(
+                    ConvModule(256, out_channels_paf, 1, act_cfg=None))
+
+            else:
+                raise ValueError("stage_type should be either 'CM' or 'PAF'.")
+
+    def init_weights(self, pretrained=None):
+        """Initialize the weights in backbone.
+
+        Args:
+            pretrained (str, optional): Path to pre-trained weights.
+                Defaults to None.
+        """
+        if isinstance(pretrained, str):
+            logger = get_root_logger()
+            load_checkpoint(self, pretrained, strict=False, logger=logger)
+        elif pretrained is None:
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    normal_init(m, std=0.001)
+                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
+                    constant_init(m, 1)
+        else:
+            raise TypeError('pretrained must be a str or None')
+
+    def forward(self, x):
+        """Model forward function."""
+        stem_feat = self.features(x)
+        out_feats = []
+        out_feats.append(stem_feat)
+
+        outputs = []
+
+        for ind in range(self.num_stages):
+            single_stage = self.stages[ind]
+            single_out_conv = self.out_convs[ind]
+            output = single_out_conv(single_stage(out_feats[-1]))
+            outputs.append(output)
+
+            out_feat = torch.cat([stem_feat, output], 1)
+            out_feats.append(out_feat)
+
+        return [*outputs]
diff --git a/mmpose/models/detectors/__init__.py b/mmpose/models/detectors/__init__.py
index 5420dfd1c6..cfa964e181 100644
--- a/mmpose/models/detectors/__init__.py
+++ b/mmpose/models/detectors/__init__.py
@@ -2,10 +2,11 @@
 from .interhand_3d import Interhand3D
 from .mesh import ParametricMesh
 from .multi_task import MultiTask
+from .paf import PartAffinityField
 from .pose_lifter import PoseLifter
 from .top_down import TopDown
 
 __all__ = [
     'TopDown', 'AssociativeEmbedding', 'ParametricMesh', 'MultiTask',
-    'PoseLifter', 'Interhand3D'
+    'PoseLifter', 'Interhand3D', 'PartAffinityField'
 ]
diff --git a/mmpose/models/detectors/associative_embedding.py b/mmpose/models/detectors/associative_embedding.py
index 30c07a66f4..078fe1cdc8 100644
--- a/mmpose/models/detectors/associative_embedding.py
+++ b/mmpose/models/detectors/associative_embedding.py
@@ -5,8 +5,9 @@
 from mmcv.image import imwrite
 from mmcv.visualization.image import imshow
 
-from mmpose.core.evaluation import (aggregate_results, get_group_preds,
-                                    get_multi_stage_outputs)
+from mmpose.core.evaluation import (aggregate_scale, aggregate_stage_flip,
+                                    flip_feature_maps, get_group_preds,
+                                    split_ae_outputs)
 from mmpose.core.post_processing.group import HeatmapParser
 from mmpose.core.visualization import imshow_keypoints
 from .. import builder
@@ -48,7 +49,6 @@ def __init__(self,
         self.backbone = builder.build_backbone(backbone)
 
         if keypoint_head is not None:
-
             if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
                 warnings.warn(
                     '`loss_pose` for BottomUp is deprecated, '
@@ -98,11 +98,11 @@ def forward(self,
             heatmaps height: H
             max_num_people: M
         Args:
-            img(torch.Tensor[NxCximgHximgW]): Input image.
-            targets(List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
-            masks(List(torch.Tensor[NxHxW])): Masks of multi-scale target
+            img (torch.Tensor[NxCximgHximgW]): Input image.
+            targets (List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
+            masks (List(torch.Tensor[NxHxW])): Masks of multi-scale target
                                               heatmaps
-            joints(List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
+            joints (List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
                                                  heatmaps for ae loss
             img_metas(dict):Information about val&test
                 By default this includes:
@@ -144,13 +144,13 @@ def forward_train(self, img, targets, masks, joints, img_metas, **kwargs):
             max_num_people: M
 
         Args:
-            img(torch.Tensor[NxCximgHximgW]): Input image.
-            targets(List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
-            masks(List(torch.Tensor[NxHxW])): Masks of multi-scale target
+            img (torch.Tensor[NxCximgHximgW]): Input image.
+            targets (List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
+            masks (List(torch.Tensor[NxHxW])): Masks of multi-scale target
                                               heatmaps
-            joints(List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
+            joints (List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
                                                  heatmaps for ae loss
-            img_metas(dict):Information about val&test
+            img_metas (dict):Information about val&test
                 By default this includes:
                 - "image_file": image path
                 - "aug_data": input
@@ -225,8 +225,9 @@ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
 
         result = {}
 
-        aggregated_heatmaps = None
-        tags_list = []
+        scale_heatmaps_list = []
+        scale_tags_list = []
+
         for idx, s in enumerate(sorted(test_scale_factor, reverse=True)):
             image_resized = aug_data[idx].to(img.device)
 
@@ -234,47 +235,82 @@ def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
             if self.with_keypoint:
                 outputs = self.keypoint_head(features)
 
+            heatmaps, tags = split_ae_outputs(outputs,
+                                              self.test_cfg['num_joints'],
+                                              self.test_cfg['with_heatmaps'],
+                                              self.test_cfg['with_ae'])
+
             if self.test_cfg.get('flip_test', True):
                 # use flip test
                 features_flipped = self.backbone(
                     torch.flip(image_resized, [3]))
                 if self.with_keypoint:
                     outputs_flipped = self.keypoint_head(features_flipped)
+
+                heatmaps_flipped, tags_flipped = split_ae_outputs(
+                    outputs_flipped, self.test_cfg['num_joints'],
+                    self.test_cfg['with_heatmaps'], self.test_cfg['with_ae'])
+
+                heatmaps_flipped = flip_feature_maps(
+                    heatmaps_flipped, flip_index=img_metas['flip_index'])
+                if self.test_cfg['tag_per_joint']:
+                    tags_flipped = flip_feature_maps(
+                        tags_flipped, flip_index=img_metas['flip_index'])
+                else:
+                    tags_flipped = flip_feature_maps(
+                        tags_flipped, flip_index=None, flip_output=True)
+
             else:
-                outputs_flipped = None
-
-            _, heatmaps, tags = get_multi_stage_outputs(
-                outputs,
-                outputs_flipped,
-                self.test_cfg['num_joints'],
-                self.test_cfg['with_heatmaps'],
-                self.test_cfg['with_ae'],
-                self.test_cfg['tag_per_joint'],
-                img_metas['flip_index'],
-                self.test_cfg['project2image'],
-                base_size,
-                align_corners=self.use_udp)
-
-            aggregated_heatmaps, tags_list = aggregate_results(
-                s,
-                aggregated_heatmaps,
-                tags_list,
+                heatmaps_flipped = None
+                tags_flipped = None
+
+            aggregated_heatmaps = aggregate_stage_flip(
                 heatmaps,
+                heatmaps_flipped,
+                index=-1,
+                project2image=self.test_cfg['project2image'],
+                size_projected=base_size,
+                align_corners=self.test_cfg.get('align_corners', True),
+                aggregate_stage='average',
+                aggregate_flip='average')
+
+            aggregated_tags = aggregate_stage_flip(
                 tags,
-                test_scale_factor,
-                self.test_cfg['project2image'],
-                self.test_cfg.get('flip_test', True),
-                align_corners=self.use_udp)
+                tags_flipped,
+                index=-1,
+                project2image=self.test_cfg['project2image'],
+                size_projected=base_size,
+                align_corners=self.test_cfg.get('align_corners', True),
+                aggregate_stage='concat',
+                aggregate_flip='concat')
+
+            if s == 1 or len(test_scale_factor) == 1:
+                if isinstance(aggregated_tags, list):
+                    scale_tags_list.extend(aggregated_tags)
+                else:
+                    scale_tags_list.append(aggregated_tags)
+
+            if isinstance(aggregated_heatmaps, list):
+                scale_heatmaps_list.extend(aggregated_heatmaps)
+            else:
+                scale_heatmaps_list.append(aggregated_heatmaps)
+
+        aggregated_heatmaps = aggregate_scale(
+            scale_heatmaps_list,
+            align_corners=self.test_cfg.get('align_corners', True),
+            aggregate_scale='average')
 
-        # average heatmaps of different scales
-        aggregated_heatmaps = aggregated_heatmaps / float(
-            len(test_scale_factor))
-        tags = torch.cat(tags_list, dim=4)
+        aggregated_tags = aggregate_scale(
+            scale_tags_list,
+            align_corners=self.test_cfg.get('align_corners', True),
+            aggregate_scale='unsqueeze_concat')
 
         # perform grouping
-        grouped, scores = self.parser.parse(aggregated_heatmaps, tags,
+        grouped, scores = self.parser.parse(aggregated_heatmaps,
+                                            aggregated_tags,
                                             self.test_cfg['adjust'],
-                                            self.test_cfg['refine'])
+                                            self.test_cfg['refine'],
+                                            self.test_cfg.get('filter', False))
 
         preds = get_group_preds(
             grouped,
diff --git a/mmpose/models/detectors/paf.py b/mmpose/models/detectors/paf.py
new file mode 100644
index 0000000000..2d06fb41f8
--- /dev/null
+++ b/mmpose/models/detectors/paf.py
@@ -0,0 +1,400 @@
+import warnings
+
+import mmcv
+import torch
+from mmcv.image import imwrite
+from mmcv.visualization.image import imshow
+
+from mmpose.core.evaluation import (aggregate_scale, aggregate_stage_flip,
+                                    flip_feature_maps,
+                                    flip_part_affinity_fields, get_group_preds)
+from mmpose.core.post_processing.group import PAFParser
+from mmpose.core.visualization import imshow_keypoints
+from .. import builder
+from ..builder import POSENETS
+from .base import BasePose
+
+try:
+    from mmcv.runner import auto_fp16
+except ImportError:
+    warnings.warn('auto_fp16 from mmpose will be deprecated from v0.15.0'
+                  'Please install mmcv>=1.1.4')
+    from mmpose.core import auto_fp16
+
+
+@POSENETS.register_module()
+class PartAffinityField(BasePose):
+    """Bottom-up PAF (part affinity field) pose detectors.
+
+    Paper ref: Cao, Zhe, et al. "OpenPose: realtime multi-person
+    2D pose estimation using Part Affinity Fields." (TPAMI'2019)
+
+    Args:
+        backbone (dict): Backbone modules to extract feature.
+        keypoint_head (dict): Keypoint head to process feature.
+        train_cfg (dict): Config for training. Default: None.
+        test_cfg (dict): Config for testing. Default: None.
+        pretrained (str): Path to the pretrained models.
+        loss_pose (None): Deprecated arguments. Please use
+            `loss_keypoint` for heads instead.
+    """
+
+    def __init__(self,
+                 backbone,
+                 keypoint_head=None,
+                 train_cfg=None,
+                 test_cfg=None,
+                 pretrained=None,
+                 loss_pose=None):
+        super().__init__()
+        self.fp16_enabled = False
+
+        self.backbone = builder.build_backbone(backbone)
+
+        if keypoint_head is not None:
+            if 'loss_keypoint' not in keypoint_head and loss_pose is not None:
+                warnings.warn(
+                    '`loss_pose` for BottomUp is deprecated, '
+                    'use `loss_keypoint` for heads instead. See '
+                    'https://github.com/open-mmlab/mmpose/pull/382'
+                    ' for more information.', DeprecationWarning)
+                keypoint_head['loss_keypoint'] = loss_pose
+
+            self.keypoint_head = builder.build_head(keypoint_head)
+
+        self.train_cfg = train_cfg
+        self.test_cfg = test_cfg
+        self.use_udp = test_cfg.get('use_udp', False)
+        self.parser = PAFParser(self.test_cfg)
+        self.init_weights(pretrained=pretrained)
+
+    @property
+    def with_keypoint(self):
+        """Check if has keypoint_head."""
+        return hasattr(self, 'keypoint_head')
+
+    def init_weights(self, pretrained=None):
+        """Weight initialization for model."""
+        self.backbone.init_weights(pretrained)
+        if self.with_keypoint:
+            self.keypoint_head.init_weights()
+
+    @auto_fp16(apply_to=('img', ))
+    def forward(self,
+                img=None,
+                targets=None,
+                masks=None,
+                img_metas=None,
+                return_loss=True,
+                return_heatmap=False,
+                **kwargs):
+        """Calls either forward_train or forward_test depending on whether
+        return_loss is True.
+        Note:
+            batch_size: N
+            num_keypoints: K
+            num_img_channel: C
+            img_width: imgW
+            img_height: imgH
+            heatmaps weight: W
+            heatmaps height: H
+            max_num_people: M
+        Args:
+            img(torch.Tensor[NxCximgHximgW]): Input image.
+            targets (list(list)): List of heatmaps
+                and pafs, each of which multi-scale targets.
+            masks (list(list(torch.Tensor[NxHxW]))): Masks of multi-scale
+                target heatmaps.
+            img_metas(dict):Information about val&test
+                By default this includes:
+                - "image_file": image path
+                - "aug_data": input
+                - "test_scale_factor": test scale factor
+                - "base_size": base size of input
+                - "center": center of image
+                - "scale": scale of image
+                - "flip_index": flip index of keypoints
+
+            return loss(bool): Option to 'return_loss'. 'return_loss=True' for
+                training, 'return_loss=False' for validation & test
+            return_heatmap (bool) : Option to return heatmap.
+
+        Returns:
+            dict|tuple: if 'return_loss' is true, then return losses.
+              Otherwise, return predicted poses, scores, image
+              paths and heatmaps.
+        """
+
+        if return_loss:
+            return self.forward_train(img, targets, masks, img_metas, **kwargs)
+        return self.forward_test(
+            img, img_metas, return_heatmap=return_heatmap, **kwargs)
+
+    def forward_train(self, img, targets, masks, img_metas, **kwargs):
+        """Forward the bottom-up model and calculate the loss.
+
+        Note:
+            batch_size: N
+            num_keypoints: K
+            num_img_channel: C
+            img_width: imgW
+            img_height: imgH
+            heatmaps weight: W
+            heatmaps height: H
+            max_num_people: M
+
+        Args:
+            img (torch.Tensor[NxCximgHximgW]): Input image.
+            targets (list(list)): List of heatmaps
+                and pafs, each of which multi-scale targets.
+            masks (list(list(torch.Tensor[NxHxW]))): Masks of multi-scale
+                target heatmaps.
+            img_metas (dict):Information about val&test
+                By default this includes:
+                - "image_file": image path
+                - "aug_data": input
+                - "test_scale_factor": test scale factor
+                - "base_size": base size of input
+                - "center": center of image
+                - "scale": scale of image
+                - "flip_index": flip index of keypoints
+
+        Returns:
+            dict: The total loss for bottom-up
+        """
+
+        output = self.backbone(img)
+
+        if self.with_keypoint:
+            output = self.keypoint_head(output)
+
+        # if return loss
+        losses = dict()
+        if self.with_keypoint:
+            keypoint_losses = self.keypoint_head.get_loss(
+                output, targets, masks)
+            losses.update(keypoint_losses)
+
+        return losses
+
+    def forward_dummy(self, img):
+        """Used for computing network FLOPs.
+
+        See ``tools/get_flops.py``.
+
+        Args:
+            img (torch.Tensor): Input image.
+
+        Returns:
+            Tensor: Outputs.
+        """
+        output = self.backbone(img)
+        if self.with_keypoint:
+            output = self.keypoint_head(output)
+        return output
+
+    def forward_test(self, img, img_metas, return_heatmap=False, **kwargs):
+        """Inference the bottom-up model.
+
+        Note:
+            Batchsize = N (currently support batchsize = 1)
+            num_img_channel: C
+            img_width: imgW
+            img_height: imgH
+
+        Args:
+            flip_index (List(int)):
+            aug_data (List(Tensor[NxCximgHximgW])): Multi-scale image
+            test_scale_factor (List(float)): Multi-scale factor
+            base_size (Tuple(int)): Base size of image when scale is 1
+            center (np.ndarray): center of image
+            scale (np.ndarray): the scale of image
+        """
+        assert img.size(0) == 1
+        assert len(img_metas) == 1
+
+        img_metas = img_metas[0]
+
+        aug_data = img_metas['aug_data']
+
+        test_scale_factor = img_metas['test_scale_factor']
+        base_size = img_metas['base_size']
+        center = img_metas['center']
+        scale = img_metas['scale']
+
+        result = {}
+
+        scale_heatmaps_list = []
+        scale_pafs_list = []
+
+        for idx, s in enumerate(sorted(test_scale_factor, reverse=True)):
+            image_resized = aug_data[idx].to(img.device)
+
+            features = self.backbone(image_resized)
+            if self.with_keypoint:
+                outputs = self.keypoint_head(features)
+                # ignore back-ground confidence maps
+                heatmaps = [
+                    hm[:, :-1] if self.test_cfg['with_bg'] else hm
+                    for hm in outputs['heatmaps'][-1]
+                ]
+                pafs = outputs['pafs'][-1]
+
+            if self.test_cfg.get('flip_test', True):
+                # use flip test
+                features_flipped = self.backbone(
+                    torch.flip(image_resized, [3]))
+                if self.with_keypoint:
+                    outputs_flipped = self.keypoint_head(features_flipped)
+                # ignore back-ground confidence maps
+                heatmaps_flipped = [
+                    hm[:, :-1] if self.test_cfg['with_bg'] else hm
+                    for hm in outputs_flipped['heatmaps'][-1]
+                ]
+                pafs_flipped = outputs_flipped['pafs'][-1]
+
+                heatmaps_flipped = flip_feature_maps(
+                    heatmaps_flipped, flip_index=img_metas['flip_index'])
+                pafs_flipped = flip_part_affinity_fields(
+                    pafs_flipped,
+                    flip_index=img_metas['flip_index'],
+                    skeleton=img_metas['skeleton'])
+
+            else:
+                heatmaps_flipped = None
+                pafs_flipped = None
+
+            aggregated_heatmaps = aggregate_stage_flip(
+                heatmaps,
+                heatmaps_flipped,
+                index=-1,
+                project2image=self.test_cfg['project2image'],
+                size_projected=base_size,
+                align_corners=self.test_cfg.get('align_corners', True),
+                aggregate_stage='average',
+                aggregate_flip='average')
+
+            aggregated_pafs = aggregate_stage_flip(
+                pafs,
+                pafs_flipped,
+                index=-1,
+                project2image=self.test_cfg['project2image'],
+                size_projected=base_size,
+                align_corners=self.test_cfg.get('align_corners', True),
+                aggregate_stage='average',
+                aggregate_flip='average')
+
+            if isinstance(aggregated_pafs, list):
+                scale_pafs_list.extend(aggregated_pafs)
+            else:
+                scale_pafs_list.append(aggregated_pafs)
+
+            if isinstance(aggregated_heatmaps, list):
+                scale_heatmaps_list.extend(aggregated_heatmaps)
+            else:
+                scale_heatmaps_list.append(aggregated_heatmaps)
+
+        # average heatmaps of different scales
+        aggregated_heatmaps = aggregate_scale(
+            scale_heatmaps_list,
+            aggregate_scale='average',
+            align_corners=self.test_cfg.get('align_corners', True))
+
+        aggregated_pafs = aggregate_scale(
+            scale_pafs_list,
+            aggregate_scale='average',
+            align_corners=self.test_cfg.get('align_corners', True))
+
+        # perform grouping
+        grouped, scores = self.parser.parse(aggregated_heatmaps,
+                                            aggregated_pafs,
+                                            img_metas['skeleton'],
+                                            self.test_cfg['adjust'],
+                                            self.test_cfg['refine'],
+                                            self.test_cfg.get('filter', False))
+
+        preds = get_group_preds(
+            grouped,
+            center,
+            scale, [aggregated_heatmaps.size(3),
+                    aggregated_heatmaps.size(2)],
+            use_udp=self.use_udp)
+
+        image_paths = []
+        image_paths.append(img_metas['image_file'])
+
+        if return_heatmap:
+            output_heatmap = aggregated_heatmaps.detach().cpu().numpy()
+        else:
+            output_heatmap = None
+
+        result['preds'] = preds
+        result['scores'] = scores
+        result['image_paths'] = image_paths
+        result['output_heatmap'] = output_heatmap
+
+        return result
+
+    def show_result(self,
+                    img,
+                    result,
+                    skeleton=None,
+                    kpt_score_thr=0.3,
+                    bbox_color=None,
+                    pose_kpt_color=None,
+                    pose_limb_color=None,
+                    radius=4,
+                    thickness=1,
+                    font_scale=0.5,
+                    win_name='',
+                    show=False,
+                    show_keypoint_weight=False,
+                    wait_time=0,
+                    out_file=None):
+        """Draw `result` over `img`.
+
+        Args:
+            img (str or Tensor): The image to be displayed.
+            result (list[dict]): The results to draw over `img`
+                (bbox_result, pose_result).
+            skeleton (list[list]): The connection of keypoints.
+            kpt_score_thr (float, optional): Minimum score of keypoints
+                to be shown. Default: 0.3.
+            pose_kpt_color (np.array[Nx3]`): Color of N keypoints.
+                If None, do not draw keypoints.
+            pose_limb_color (np.array[Mx3]): Color of M limbs.
+                If None, do not draw limbs.
+            radius (int): Radius of circles.
+            thickness (int): Thickness of lines.
+            font_scale (float): Font scales of texts.
+            win_name (str): The window name.
+            show (bool): Whether to show the image. Default: False.
+            show_keypoint_weight (bool): Whether to change the transparency
+                using the predicted confidence scores of keypoints.
+            wait_time (int): Value of waitKey param.
+                Default: 0.
+            out_file (str or None): The filename to write the image.
+                Default: None.
+
+        Returns:
+            Tensor: Visualized image only if not `show` or `out_file`
+        """
+
+        img = mmcv.imread(img)
+        img = img.copy()
+        img_h, img_w, _ = img.shape
+
+        pose_result = []
+        for res in result:
+            pose_result.append(res['keypoints'])
+
+        imshow_keypoints(img, pose_result, skeleton, kpt_score_thr,
+                         pose_kpt_color, pose_limb_color, radius, thickness)
+
+        if show:
+            imshow(img, win_name, wait_time)
+
+        if out_file is not None:
+            imwrite(img, out_file)
+
+        return img
diff --git a/mmpose/models/heads/__init__.py b/mmpose/models/heads/__init__.py
index 5f4da4484b..fa9727dae2 100644
--- a/mmpose/models/heads/__init__.py
+++ b/mmpose/models/heads/__init__.py
@@ -1,8 +1,10 @@
 from .ae_higher_resolution_head import AEHigherResolutionHead
 from .ae_simple_head import AESimpleHead
+from .deconv_head import DeconvHead
 from .deeppose_regression_head import DeepposeRegressionHead
 from .hmr_head import HMRMeshHead
 from .interhand_3d_head import Interhand3DHead
+from .paf_head import PAFHead
 from .temporal_regression_head import TemporalRegressionHead
 from .topdown_heatmap_base_head import TopdownHeatmapBaseHead
 from .topdown_heatmap_multi_stage_head import (TopdownHeatmapMSMUHead,
@@ -13,5 +15,6 @@
     'TopdownHeatmapSimpleHead', 'TopdownHeatmapMultiStageHead',
     'TopdownHeatmapMSMUHead', 'TopdownHeatmapBaseHead',
     'AEHigherResolutionHead', 'AESimpleHead', 'DeepposeRegressionHead',
-    'TemporalRegressionHead', 'Interhand3DHead', 'HMRMeshHead'
+    'TemporalRegressionHead', 'Interhand3DHead', 'HMRMeshHead', 'PAFHead',
+    'DeconvHead'
 ]
diff --git a/mmpose/models/heads/ae_higher_resolution_head.py b/mmpose/models/heads/ae_higher_resolution_head.py
index a4c7a55036..fc510f9f86 100644
--- a/mmpose/models/heads/ae_higher_resolution_head.py
+++ b/mmpose/models/heads/ae_higher_resolution_head.py
@@ -171,7 +171,7 @@ def _get_deconv_cfg(deconv_kernel):
 
         return deconv_kernel, padding, output_padding
 
-    def get_loss(self, output, targets, masks, joints):
+    def get_loss(self, outputs, targets, masks, joints):
         """Calculate bottom-up keypoint loss.
 
         Note:
@@ -182,18 +182,18 @@ def get_loss(self, output, targets, masks, joints):
             heatmaps weight: W
 
         Args:
-            output (torch.Tensor[NxKxHxW]): Output heatmaps.
-            targets(List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
-            masks(List(torch.Tensor[NxHxW])): Masks of multi-scale target
-                                              heatmaps
-            joints(List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
-                                                 heatmaps for ae loss
+            outputs (List(torch.Tensor[NxKxHxW])): Multi-scale output heatmaps.
+            targets (List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
+            masks (List(torch.Tensor[NxHxW])): Masks of multi-scale target
+                heatmaps
+            joints (List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
+                heatmaps for ae loss
         """
 
         losses = dict()
 
         heatmaps_losses, push_losses, pull_losses = self.loss(
-            output, targets, masks, joints)
+            outputs, targets, masks, joints)
 
         for idx in range(len(targets)):
             if heatmaps_losses[idx] is not None:
diff --git a/mmpose/models/heads/ae_simple_head.py b/mmpose/models/heads/ae_simple_head.py
index 6b90eecd3e..058e2303e6 100644
--- a/mmpose/models/heads/ae_simple_head.py
+++ b/mmpose/models/heads/ae_simple_head.py
@@ -1,13 +1,9 @@
-import torch.nn as nn
-from mmcv.cnn import (build_conv_layer, build_upsample_layer, constant_init,
-                      normal_init)
-
-from mmpose.models.builder import build_loss
 from ..builder import HEADS
+from .deconv_head import DeconvHead
 
 
 @HEADS.register_module()
-class AESimpleHead(nn.Module):
+class AESimpleHead(DeconvHead):
     """Associative embedding simple head.
     paper ref: Alejandro Newell et al. "Associative
     Embedding: End-to-end Learning for Joint Detection
@@ -39,53 +35,23 @@ def __init__(self,
                  with_ae_loss=None,
                  extra=None,
                  loss_keypoint=None):
-        super().__init__()
-
-        self.loss = build_loss(loss_keypoint)
 
-        self.in_channels = in_channels
         dim_tag = num_joints if tag_per_joint else 1
         if with_ae_loss[0]:
             out_channels = num_joints + dim_tag
         else:
             out_channels = num_joints
 
-        if extra is not None and not isinstance(extra, dict):
-            raise TypeError('extra should be dict or None.')
-
-        if num_deconv_layers > 0:
-            self.deconv_layers = self._make_deconv_layer(
-                num_deconv_layers,
-                num_deconv_filters,
-                num_deconv_kernels,
-            )
-        elif num_deconv_layers == 0:
-            self.deconv_layers = nn.Identity()
-        else:
-            raise ValueError(
-                f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
-
-        if extra is not None and 'final_conv_kernel' in extra:
-            assert extra['final_conv_kernel'] in [1, 3]
-            if extra['final_conv_kernel'] == 3:
-                padding = 1
-            else:
-                padding = 0
-            kernel_size = extra['final_conv_kernel']
-        else:
-            kernel_size = 1
-            padding = 0
-
-        self.final_layer = build_conv_layer(
-            cfg=dict(type='Conv2d'),
-            in_channels=num_deconv_filters[-1]
-            if num_deconv_layers > 0 else in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=1,
-            padding=padding)
+        super().__init__(
+            in_channels,
+            out_channels,
+            num_deconv_layers=num_deconv_layers,
+            num_deconv_filters=num_deconv_filters,
+            num_deconv_kernels=num_deconv_kernels,
+            extra=extra,
+            loss_keypoint=loss_keypoint)
 
-    def get_loss(self, output, targets, masks, joints):
+    def get_loss(self, outputs, targets, masks, joints):
         """Calculate bottom-up keypoint loss.
 
         Note:
@@ -96,18 +62,18 @@ def get_loss(self, output, targets, masks, joints):
             heatmaps weight: W
 
         Args:
-            output (torch.Tensor[NxKxHxW]): Output heatmaps.
-            targets(List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
-            masks(List(torch.Tensor[NxHxW])): Masks of multi-scale target
-                                              heatmaps
+            outputs (list(torch.Tensor[NxKxHxW])): Multi-scale output heatmaps.
+            targets (List(torch.Tensor[NxKxHxW])): Multi-scale target heatmaps.
+            masks (List(torch.Tensor[NxHxW])): Masks of multi-scale target
+                heatmaps
             joints(List(torch.Tensor[NxMxKx2])): Joints of multi-scale target
-                                                 heatmaps for ae loss
+                heatmaps for ae loss
         """
 
         losses = dict()
 
         heatmaps_losses, push_losses, pull_losses = self.loss(
-            output, targets, masks, joints)
+            outputs, targets, masks, joints)
 
         for idx in range(len(targets)):
             if heatmaps_losses[idx] is not None:
@@ -130,74 +96,3 @@ def get_loss(self, output, targets, masks, joints):
                     losses['pull_loss'] += pull_loss
 
         return losses
-
-    def forward(self, x):
-        """Forward function."""
-        if isinstance(x, list):
-            x = x[0]
-        final_outputs = []
-        x = self.deconv_layers(x)
-        y = self.final_layer(x)
-        final_outputs.append(y)
-        return final_outputs
-
-    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
-        """Make deconv layers."""
-        if num_layers != len(num_filters):
-            error_msg = f'num_layers({num_layers}) ' \
-                        f'!= length of num_filters({len(num_filters)})'
-            raise ValueError(error_msg)
-        if num_layers != len(num_kernels):
-            error_msg = f'num_layers({num_layers}) ' \
-                        f'!= length of num_kernels({len(num_kernels)})'
-            raise ValueError(error_msg)
-
-        layers = []
-        for i in range(num_layers):
-            kernel, padding, output_padding = \
-                self._get_deconv_cfg(num_kernels[i])
-
-            planes = num_filters[i]
-            layers.append(
-                build_upsample_layer(
-                    dict(type='deconv'),
-                    in_channels=self.in_channels,
-                    out_channels=planes,
-                    kernel_size=kernel,
-                    stride=2,
-                    padding=padding,
-                    output_padding=output_padding,
-                    bias=False))
-            layers.append(nn.BatchNorm2d(planes))
-            layers.append(nn.ReLU(inplace=True))
-            self.in_channels = planes
-
-        return nn.Sequential(*layers)
-
-    @staticmethod
-    def _get_deconv_cfg(deconv_kernel):
-        """Get configurations for deconv layers."""
-        if deconv_kernel == 4:
-            padding = 1
-            output_padding = 0
-        elif deconv_kernel == 3:
-            padding = 1
-            output_padding = 1
-        elif deconv_kernel == 2:
-            padding = 0
-            output_padding = 0
-        else:
-            raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
-
-        return deconv_kernel, padding, output_padding
-
-    def init_weights(self):
-        """Initialize model weights."""
-        for _, m in self.deconv_layers.named_modules():
-            if isinstance(m, nn.ConvTranspose2d):
-                normal_init(m, std=0.001)
-            elif isinstance(m, nn.BatchNorm2d):
-                constant_init(m, 1)
-        for m in self.final_layer.modules():
-            if isinstance(m, nn.Conv2d):
-                normal_init(m, std=0.001, bias=0)
diff --git a/mmpose/models/heads/deconv_head.py b/mmpose/models/heads/deconv_head.py
new file mode 100644
index 0000000000..5f1fd79f9e
--- /dev/null
+++ b/mmpose/models/heads/deconv_head.py
@@ -0,0 +1,292 @@
+import torch
+import torch.nn as nn
+from mmcv.cnn import (build_conv_layer, build_norm_layer, build_upsample_layer,
+                      constant_init, normal_init)
+
+from mmpose.models.builder import HEADS, build_loss
+from mmpose.models.utils.ops import resize
+
+
+@HEADS.register_module()
+class DeconvHead(nn.Module):
+    """Simple deconv head.
+
+    Args:
+        in_channels (int): Number of input channels.
+        out_channels (int): Number of output channels.
+        num_deconv_layers (int): Number of deconv layers.
+            num_deconv_layers should >= 0. Note that 0 means
+            no deconv layers.
+        num_deconv_filters (list|tuple): Number of filters.
+            If num_deconv_layers > 0, the length of
+        num_deconv_kernels (list|tuple): Kernel sizes.
+        in_index (int|Sequence[int]): Input feature index. Default: 0
+        input_transform (str|None): Transformation type of input features.
+            Options: 'resize_concat', 'multiple_select', None.
+            'resize_concat': Multiple feature maps will be resized to the
+                same size as the first one and then concat together.
+                Usually used in FCN head of HRNet.
+            'multiple_select': Multiple feature maps will be bundle into
+                a list and passed into decode head.
+            None: Only one select feature map is allowed.
+            Default: None.
+        align_corners (bool): align_corners argument of F.interpolate.
+            Default: False.
+        loss_keypoint (dict): Config for loss. Default: None.
+    """
+
+    def __init__(self,
+                 in_channels=3,
+                 out_channels=17,
+                 num_deconv_layers=3,
+                 num_deconv_filters=(256, 256, 256),
+                 num_deconv_kernels=(4, 4, 4),
+                 extra=None,
+                 in_index=0,
+                 input_transform=None,
+                 align_corners=False,
+                 loss_keypoint=None):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.loss = build_loss(loss_keypoint)
+
+        self._init_inputs(in_channels, in_index, input_transform)
+        self.in_index = in_index
+        self.align_corners = align_corners
+
+        if extra is not None and not isinstance(extra, dict):
+            raise TypeError('extra should be dict or None.')
+
+        if num_deconv_layers > 0:
+            self.deconv_layers = self._make_deconv_layer(
+                num_deconv_layers,
+                num_deconv_filters,
+                num_deconv_kernels,
+            )
+        elif num_deconv_layers == 0:
+            self.deconv_layers = nn.Identity()
+        else:
+            raise ValueError(
+                f'num_deconv_layers ({num_deconv_layers}) should >= 0.')
+
+        identity_final_layer = False
+        if extra is not None and 'final_conv_kernel' in extra:
+            assert extra['final_conv_kernel'] in [0, 1, 3]
+            if extra['final_conv_kernel'] == 3:
+                padding = 1
+            elif extra['final_conv_kernel'] == 1:
+                padding = 0
+            else:
+                # 0 for Identity mapping.
+                identity_final_layer = True
+            kernel_size = extra['final_conv_kernel']
+        else:
+            kernel_size = 1
+            padding = 0
+
+        if identity_final_layer:
+            self.final_layer = nn.Identity()
+        else:
+            conv_channels = num_deconv_filters[
+                -1] if num_deconv_layers > 0 else self.in_channels
+
+            layers = []
+            if extra is not None:
+                num_conv_layers = extra.get('num_conv_layers', 0)
+                num_conv_kernels = extra.get('num_conv_kernels',
+                                             [1] * num_conv_layers)
+
+                for i in range(num_conv_layers):
+                    layers.append(
+                        build_conv_layer(
+                            dict(type='Conv2d'),
+                            in_channels=conv_channels,
+                            out_channels=conv_channels,
+                            kernel_size=num_conv_kernels[i],
+                            stride=1,
+                            padding=(num_conv_kernels[i] - 1) // 2))
+                    layers.append(
+                        build_norm_layer(dict(type='BN'), conv_channels)[1])
+                    layers.append(nn.ReLU(inplace=True))
+
+            layers.append(
+                build_conv_layer(
+                    cfg=dict(type='Conv2d'),
+                    in_channels=conv_channels,
+                    out_channels=out_channels,
+                    kernel_size=kernel_size,
+                    stride=1,
+                    padding=padding))
+
+            if len(layers) > 1:
+                self.final_layer = nn.Sequential(*layers)
+            else:
+                self.final_layer = layers[0]
+
+    def _init_inputs(self, in_channels, in_index, input_transform):
+        """Check and initialize input transforms.
+
+        The in_channels, in_index and input_transform must match.
+        Specifically, when input_transform is None, only single feature map
+        will be selected. So in_channels and in_index must be of type int.
+        When input_transform is not None, in_channels and in_index must be
+        list or tuple, with the same length.
+
+        Args:
+            in_channels (int|Sequence[int]): Input channels.
+            in_index (int|Sequence[int]): Input feature index.
+            input_transform (str|None): Transformation type of input features.
+                Options: 'resize_concat', 'multiple_select', None.
+                'resize_concat': Multiple feature maps will be resize to the
+                    same size as first one and than concat together.
+                    Usually used in FCN head of HRNet.
+                'multiple_select': Multiple feature maps will be bundle into
+                    a list and passed into decode head.
+                None: Only one select feature map is allowed.
+        """
+
+        if input_transform is not None:
+            assert input_transform in ['resize_concat', 'multiple_select']
+        self.input_transform = input_transform
+        self.in_index = in_index
+        if input_transform is not None:
+            assert isinstance(in_channels, (list, tuple))
+            assert isinstance(in_index, (list, tuple))
+            assert len(in_channels) == len(in_index)
+            if input_transform == 'resize_concat':
+                self.in_channels = sum(in_channels)
+            else:
+                self.in_channels = in_channels
+        else:
+            assert isinstance(in_channels, int)
+            assert isinstance(in_index, int)
+            self.in_channels = in_channels
+
+    def _transform_inputs(self, inputs):
+        """Transform inputs for decoder.
+
+        Args:
+            inputs (list[Tensor] | Tensor): multi-level img features.
+
+        Returns:
+            Tensor: The transformed inputs
+        """
+        if not isinstance(inputs, list):
+            return inputs
+
+        if self.input_transform == 'resize_concat':
+            inputs = [inputs[i] for i in self.in_index]
+            upsampled_inputs = [
+                resize(
+                    input=x,
+                    size=inputs[0].shape[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners) for x in inputs
+            ]
+            inputs = torch.cat(upsampled_inputs, dim=1)
+        elif self.input_transform == 'multiple_select':
+            inputs = [inputs[i] for i in self.in_index]
+        else:
+            inputs = inputs[self.in_index]
+
+        return inputs
+
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        """Make deconv layers."""
+        if num_layers != len(num_filters):
+            error_msg = f'num_layers({num_layers}) ' \
+                        f'!= length of num_filters({len(num_filters)})'
+            raise ValueError(error_msg)
+        if num_layers != len(num_kernels):
+            error_msg = f'num_layers({num_layers}) ' \
+                        f'!= length of num_kernels({len(num_kernels)})'
+            raise ValueError(error_msg)
+
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = \
+                self._get_deconv_cfg(num_kernels[i])
+
+            planes = num_filters[i]
+            layers.append(
+                build_upsample_layer(
+                    dict(type='deconv'),
+                    in_channels=self.in_channels,
+                    out_channels=planes,
+                    kernel_size=kernel,
+                    stride=2,
+                    padding=padding,
+                    output_padding=output_padding,
+                    bias=False))
+            layers.append(nn.BatchNorm2d(planes))
+            layers.append(nn.ReLU(inplace=True))
+            self.in_channels = planes
+
+        return nn.Sequential(*layers)
+
+    @staticmethod
+    def _get_deconv_cfg(deconv_kernel):
+        """Get configurations for deconv layers."""
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+        else:
+            raise ValueError(f'Not supported num_kernels ({deconv_kernel}).')
+
+        return deconv_kernel, padding, output_padding
+
+    def get_loss(self, outputs, targets, masks):
+        """Calculate bottom-up masked mse loss.
+
+        Note:
+            batch_size: N
+            num_channels: C
+            heatmaps height: H
+            heatmaps weight: W
+
+        Args:
+            outputs (List(torch.Tensor[NxCxHxW])): Multi-scale outputs.
+            targets (List(torch.Tensor[NxCxHxW])): Multi-scale targets.
+            masks (List(torch.Tensor[NxHxW])): Masks of multi-scale targets.
+        """
+
+        losses = dict()
+
+        for idx in range(len(targets)):
+            if 'loss' not in losses:
+                losses['loss'] = self.loss(outputs[idx], targets[idx],
+                                           masks[idx])
+            else:
+                losses['loss'] += self.loss(outputs[idx], targets[idx],
+                                            masks[idx])
+
+        return losses
+
+    def forward(self, x):
+        """Forward function."""
+        x = self._transform_inputs(x)
+        final_outputs = []
+        x = self.deconv_layers(x)
+        y = self.final_layer(x)
+        final_outputs.append(y)
+        return final_outputs
+
+    def init_weights(self):
+        """Initialize model weights."""
+        for _, m in self.deconv_layers.named_modules():
+            if isinstance(m, nn.ConvTranspose2d):
+                normal_init(m, std=0.001)
+            elif isinstance(m, nn.BatchNorm2d):
+                constant_init(m, 1)
+        for m in self.final_layer.modules():
+            if isinstance(m, nn.Conv2d):
+                normal_init(m, std=0.001, bias=0)
+            elif isinstance(m, nn.BatchNorm2d):
+                constant_init(m, 1)
diff --git a/mmpose/models/heads/paf_head.py b/mmpose/models/heads/paf_head.py
new file mode 100644
index 0000000000..f652df9729
--- /dev/null
+++ b/mmpose/models/heads/paf_head.py
@@ -0,0 +1,113 @@
+import torch.nn as nn
+
+from mmpose.models.builder import HEADS, build_head
+
+
+@HEADS.register_module()
+class PAFHead(nn.Module):
+    """Bottom-up PAF (Part Affinity Fields) head.
+
+    Paper ref: Cao, Zhe, et al. "OpenPose: realtime multi-person
+    2D pose estimation using Part Affinity Fields." (TPAMI'2019)
+
+    Args:
+        heatmap_heads_cfg (list(dict)): Configs of heatmap heads.
+        paf_heads_cfg (list(dict)): Configs of paf heads.
+        heatmap_index (list(int)): The correspondence between heatmap heads
+            and input features.
+        paf_index (list(int)): The correspondence between paf heads
+            and input features.
+    """
+
+    def __init__(self, heatmap_heads_cfg, paf_heads_cfg, heatmap_index,
+                 paf_index):
+        super().__init__()
+
+        assert len(heatmap_heads_cfg) == len(heatmap_index)
+        assert len(paf_heads_cfg) == len(paf_index)
+
+        # build heatmap heads
+        self.heatmap_heads_list = nn.ModuleList()
+        for head_cfg in heatmap_heads_cfg:
+            self.heatmap_heads_list.append(build_head(head_cfg))
+
+        # build paf heads
+        self.paf_heads_list = nn.ModuleList()
+        for head_cfg in paf_heads_cfg:
+            self.paf_heads_list.append(build_head(head_cfg))
+
+        self.heatmap_index = heatmap_index
+        self.paf_index = paf_index
+
+    def get_loss(self, outputs, targets, masks):
+        """Calculate heatmap and paf loss.
+
+        Note:
+            batch_size: N
+            num_channels: C
+            heatmaps height: H
+            heatmaps weight: W
+
+        Args:
+            outputs (dict): Outputs of network, including heatmaps and pafs.
+            targets (list(list)): List of heatmaps
+                and pafs, each of which multi-scale targets.
+            masks (list(list(torch.Tensor[NxHxW]))): Masks of multi-scale
+                target heatmaps.
+        """
+
+        losses = dict()
+
+        heatmap_outputs = outputs['heatmaps']
+        heatmap_targets = targets[:len(self.heatmap_heads_list)]
+        heatmap_masks = masks[:len(self.heatmap_heads_list)]
+        for idx, head in enumerate(self.heatmap_heads_list):
+            heatmap_losses = head.get_loss(heatmap_outputs[idx],
+                                           heatmap_targets[idx],
+                                           heatmap_masks[idx])
+            if 'heatmap_loss' not in losses:
+                losses['heatmap_loss'] = heatmap_losses['loss']
+            else:
+                losses['heatmap_loss'] += heatmap_losses['loss']
+
+        paf_outputs = outputs['pafs']
+        paf_targets = targets[len(self.heatmap_heads_list):]
+        paf_masks = masks[len(self.heatmap_heads_list):]
+        for idx, head in enumerate(self.paf_heads_list):
+            paf_losses = head.get_loss(paf_outputs[idx], paf_targets[idx],
+                                       paf_masks[idx])
+            if 'paf_loss' not in losses:
+                losses['paf_loss'] = paf_losses['loss']
+            else:
+                losses['paf_loss'] += paf_losses['loss']
+
+        return losses
+
+    def forward(self, x):
+        """Forward function."""
+        if not isinstance(x, list):
+            x = [x]
+
+        assert max(self.heatmap_index) < len(x)
+        assert max(self.paf_index) < len(x)
+
+        final_outputs = {'heatmaps': [], 'pafs': []}
+
+        for idx, head in enumerate(self.heatmap_heads_list):
+            features = x[self.heatmap_index[idx]]
+            output = head(features)
+            final_outputs['heatmaps'].append(output)
+
+        for idx, head in enumerate(self.paf_heads_list):
+            features = x[self.paf_index[idx]]
+            output = head(features)
+            final_outputs['pafs'].append(output)
+
+        return final_outputs
+
+    def init_weights(self):
+        for head in self.heatmap_heads_list:
+            head.init_weights()
+
+        for head in self.paf_heads_list:
+            head.init_weights()
diff --git a/mmpose/models/heads/topdown_heatmap_simple_head.py b/mmpose/models/heads/topdown_heatmap_simple_head.py
index 6f253fede8..7cf02e1d97 100644
--- a/mmpose/models/heads/topdown_heatmap_simple_head.py
+++ b/mmpose/models/heads/topdown_heatmap_simple_head.py
@@ -28,11 +28,11 @@ class TopdownHeatmapSimpleHead(TopdownHeatmapBaseHead):
         num_deconv_filters (list|tuple): Number of filters.
             If num_deconv_layers > 0, the length of
         num_deconv_kernels (list|tuple): Kernel sizes.
-        in_index (int|Sequence[int]): Input feature index. Default: -1
+        in_index (int|Sequence[int]): Input feature index. Default: 0
         input_transform (str|None): Transformation type of input features.
             Options: 'resize_concat', 'multiple_select', None.
-            'resize_concat': Multiple feature maps will be resize to the
-                same size as first one and than concat together.
+            'resize_concat': Multiple feature maps will be resized to the
+                same size as the first one and then concat together.
                 Usually used in FCN head of HRNet.
             'multiple_select': Multiple feature maps will be bundle into
                 a list and passed into decode head.
diff --git a/mmpose/models/losses/mse_loss.py b/mmpose/models/losses/mse_loss.py
index 4232d13590..910d97fcf5 100644
--- a/mmpose/models/losses/mse_loss.py
+++ b/mmpose/models/losses/mse_loss.py
@@ -150,3 +150,40 @@ def forward(self, output, target, target_weight):
         losses = torch.cat(losses, dim=1)
 
         return self._ohkm(losses) * self.loss_weight
+
+
+@LOSSES.register_module()
+class MaskedMSELoss(nn.Module):
+    """MSE loss for the bottom-up outputs with mask.
+
+    Args:
+        use_mask (bool): Option to use mask of target. Default: True.
+        loss_weight (float): Weight of the loss. Default: 1.0.
+        supervise_empty (bool): Whether to supervise empty channels.
+    """
+
+    def __init__(self, use_mask=True, loss_weight=1., supervise_empty=True):
+        super().__init__()
+        self.criterion = nn.MSELoss()
+        self.use_mask = use_mask
+        self.loss_weight = loss_weight
+        self.supervise_empty = supervise_empty
+
+    def forward(self, output, target, mask):
+        """Forward function."""
+        assert output.size() == target.size()
+
+        if self.use_mask:
+            loss = self.criterion(
+                output, target) * mask[:, None, :, :].expand_as(output)
+            # if not self.supervise_empty:
+            #     empty_mask = (target.sum(dim=[2, 3], keepdim=True) > 0).float()
+            #     mask = empty_mask.expand_as(
+            #         output) * mask[:, None, :, :].expand_as(output)
+            # else:
+            #     mask = mask[:, None, :, :].expand_as(output)
+            # loss = self.criterion(output * mask, target * mask)
+        else:
+            loss = self.criterion(output, target)
+
+        return loss * self.loss_weight
diff --git a/tests/test_backbones/test_cpm.py b/tests/test_backbones/test_cpm.py
index 3a9481539a..dc6dc45dea 100644
--- a/tests/test_backbones/test_cpm.py
+++ b/tests/test_backbones/test_cpm.py
@@ -2,6 +2,24 @@
 import torch
 
 from mmpose.models import CPM
+from mmpose.models.backbones.cpm import CpmBlock
+
+
+def test_cpm_block():
+    with pytest.raises(AssertionError):
+        # len(channels) == len(kernels)
+        CpmBlock(
+            3, channels=[3, 3, 3], kernels=[
+                1,
+            ])
+
+    # Test CPM Block
+    model = CpmBlock(3, channels=[3, 3, 3], kernels=[1, 1, 1])
+    model.train()
+
+    imgs = torch.randn(1, 3, 10, 10)
+    feat = model(imgs)
+    assert feat.shape == torch.Size([1, 3, 10, 10])
 
 
 def test_cpm_backbone():
diff --git a/tests/test_backbones/test_lightweight_openpose.py b/tests/test_backbones/test_lightweight_openpose.py
new file mode 100644
index 0000000000..18ac28c300
--- /dev/null
+++ b/tests/test_backbones/test_lightweight_openpose.py
@@ -0,0 +1,25 @@
+import pytest
+import torch
+
+from mmpose.models import LightweightOpenPoseNetwork
+
+
+def test_lightweight_openpose_network_backbone():
+    with pytest.raises(AssertionError):
+        # OpenPoseNetwork's num_stacks should larger than 0
+        LightweightOpenPoseNetwork(in_channels=3, num_stages=-1)
+
+    with pytest.raises(AssertionError):
+        # OpenPoseNetwork's in_channels should be 3
+        LightweightOpenPoseNetwork(in_channels=2)
+
+    # Test OpenPoseNetwork
+    model = LightweightOpenPoseNetwork(in_channels=3)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 368, 368)
+    feat = model(imgs)
+    assert len(feat) == 4
+    assert feat[0].shape == torch.Size([1, 19, 46, 46])
+    assert feat[-1].shape == torch.Size([1, 38, 46, 46])
diff --git a/tests/test_backbones/test_openpose_v1.py b/tests/test_backbones/test_openpose_v1.py
new file mode 100644
index 0000000000..1599fd58d4
--- /dev/null
+++ b/tests/test_backbones/test_openpose_v1.py
@@ -0,0 +1,25 @@
+import pytest
+import torch
+
+from mmpose.models import OpenPoseNetworkV1
+
+
+def test_openpose_network_v1_backbone():
+    with pytest.raises(AssertionError):
+        # OpenPoseNetwork's num_stacks should larger than 0
+        OpenPoseNetworkV1(in_channels=3, num_stages=-1)
+
+    with pytest.raises(AssertionError):
+        # OpenPoseNetwork's in_channels should be 3
+        OpenPoseNetworkV1(in_channels=2)
+
+    # Test OpenPoseNetwork
+    model = OpenPoseNetworkV1(in_channels=3)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 368, 368)
+    feat = model(imgs)
+    assert len(feat) == 12
+    assert feat[0].shape == torch.Size([1, 19, 46, 46])
+    assert feat[-1].shape == torch.Size([1, 38, 46, 46])
diff --git a/tests/test_backbones/test_openpose_v2.py b/tests/test_backbones/test_openpose_v2.py
new file mode 100644
index 0000000000..0af920acb2
--- /dev/null
+++ b/tests/test_backbones/test_openpose_v2.py
@@ -0,0 +1,38 @@
+import pytest
+import torch
+
+from mmpose.models import OpenPoseNetworkV2
+
+
+def test_openpose_network_v2_backbone():
+    with pytest.raises(AssertionError):
+        # OpenPoseNetwork's num_stacks should larger than 0
+        OpenPoseNetworkV2(in_channels=3, num_stages=-1)
+
+    with pytest.raises(AssertionError):
+        # OpenPoseNetwork's in_channels should be 3
+        OpenPoseNetworkV2(in_channels=2)
+
+    with pytest.raises(AssertionError):
+        # len(stage_types) == num_stages
+        OpenPoseNetworkV2(
+            in_channels=3, num_stages=3, stage_types=('PAF', 'CM'))
+
+    with pytest.raises(ValueError):
+        # stage_type should be either 'CM' or 'PAF'.
+        OpenPoseNetworkV2(
+            in_channels=3, num_stages=2, stage_types=('PAF', 'CC'))
+
+    # Test OpenPoseNetwork
+    model = OpenPoseNetworkV2(in_channels=3)
+    model.init_weights()
+    model.train()
+
+    imgs = torch.randn(1, 3, 368, 368)
+    feat = model(imgs)
+    assert len(feat) == 6
+    assert feat[0].shape == torch.Size([1, 38, 46, 46])
+    assert feat[-1].shape == torch.Size([1, 19, 46, 46])
+
+
+test_openpose_network_v2_backbone()
diff --git a/tests/test_evaluation/test_bottom_up_eval.py b/tests/test_evaluation/test_bottom_up_eval.py
index bcb3788ade..9993219880 100644
--- a/tests/test_evaluation/test_bottom_up_eval.py
+++ b/tests/test_evaluation/test_bottom_up_eval.py
@@ -1,117 +1,96 @@
-import copy
-
 import numpy as np
+import pytest
 import torch
 
-from mmpose.core import (aggregate_results, get_group_preds,
-                         get_multi_stage_outputs)
+from mmpose.core import (aggregate_scale, aggregate_stage_flip,
+                         flip_feature_maps, flip_part_affinity_fields,
+                         get_group_preds, split_ae_outputs)
+
+
+def test_split_ae_outputs():
+    fake_outputs = [torch.zeros((1, 4, 2, 2))]
+    heatmaps, tags = split_ae_outputs(
+        fake_outputs, num_joints=4, with_heatmaps=[False], with_ae=[True])
+
+
+def test_flip_feature_maps():
+    fake_outputs = [torch.zeros((1, 4, 2, 2))]
+    _ = flip_feature_maps(fake_outputs, None)
+    _ = flip_feature_maps(fake_outputs, flip_index=[1, 0])
 
 
-def test_get_multi_stage_outputs():
+def test_flip_part_affinity_fields():
+    fake_outputs = [torch.zeros((1, 4, 2, 2))]
+
+    _ = flip_part_affinity_fields(fake_outputs, None, skeleton=[])
+    _ = flip_part_affinity_fields(
+        fake_outputs, flip_index=[1, 0], skeleton=[[0, 1]])
+
+    with pytest.raises(ValueError):
+        _ = flip_part_affinity_fields(
+            fake_outputs, flip_index=[1, 0], skeleton=[[0, 0]])
+
+
+def test_aggregate_stage_flip():
     fake_outputs = [torch.zeros((1, 4, 2, 2))]
     fake_flip_outputs = [torch.ones((1, 4, 2, 2))]
-    # outputs_flip
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=None,
-                                num_joints=4, with_heatmaps=[False],
-                                with_ae=[True])
-    assert heatmaps == []
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=None,
-                                num_joints=2, with_heatmaps=[True],
-                                with_ae=[True])
-    assert len(heatmaps) == 1
-    flip_index = [1, 0]
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=fake_flip_outputs,
-                                num_joints=2, with_heatmaps=[True],
-                                with_ae=[True], flip_index=flip_index)
-    assert len(heatmaps) == 2
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                tag_per_joint=False,
-                                outputs_flip=fake_flip_outputs,
-                                num_joints=2, with_heatmaps=[True],
-                                with_ae=[True], flip_index=flip_index)
-    assert len(heatmaps) == 2
-    # with heatmaps & with ae
-    fake_outputs = [torch.zeros((1, 4, 2, 2)), torch.ones((1, 2, 4, 4))]
-    fake_flip_outputs = [torch.ones((1, 4, 2, 2)), torch.ones((1, 2, 4, 4))]
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=None,
-                                num_joints=2, with_heatmaps=[True, False],
-                                with_ae=[True, True])
-    assert torch.allclose(heatmaps[0], torch.tensor(0.))
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=fake_flip_outputs,
-                                num_joints=2, with_heatmaps=[True, True],
-                                with_ae=[True, False])
-    assert torch.allclose(heatmaps[0], torch.tensor(0.5))
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=fake_flip_outputs,
-                                num_joints=2, with_heatmaps=[True, False],
-                                with_ae=[True, False], flip_index=flip_index)
-    assert torch.allclose(heatmaps[0], torch.tensor(0.))
-    # size_projected
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=None,
-                                num_joints=2, with_heatmaps=[True, True],
-                                with_ae=[True, False],
-                                size_projected=(8, 8))
-    assert heatmaps[0].shape == torch.Size([1, 2, 8, 8])
-    outputs, heatmaps, tags = \
-        get_multi_stage_outputs(outputs=copy.deepcopy(fake_outputs),
-                                outputs_flip=fake_flip_outputs,
-                                num_joints=2, with_heatmaps=[True, True],
-                                with_ae=[True, False],
-                                align_corners=True)
-    assert torch.allclose(heatmaps[0], torch.tensor(0.5))
-
-
-def test_aggregate_results():
-    fake_heatmaps = [torch.zeros((1, 2, 2, 2))]
-    fake_tags = [torch.zeros((1, 2, 2, 2))]
-    aggregated_heatmaps, tags_list = \
-        aggregate_results(scale=1, aggregated_heatmaps=None, tags_list=[],
-                          heatmaps=fake_heatmaps, tags=fake_tags,
-                          test_scale_factor=[1], project2image=True,
-                          flip_test=False)
-    assert torch.allclose(aggregated_heatmaps, torch.tensor(0.))
-    fake_aggr_heatmaps = torch.ones(1, 2, 2, 2)
-    aggregated_heatmaps, tags_list = \
-        aggregate_results(scale=1, aggregated_heatmaps=fake_aggr_heatmaps,
-                          tags_list=[], heatmaps=fake_heatmaps,
-                          tags=fake_tags, test_scale_factor=[1],
-                          project2image=True, flip_test=False)
-    assert torch.allclose(aggregated_heatmaps, torch.tensor(1.))
-    aggregated_heatmaps, tags_list = \
-        aggregate_results(scale=1, aggregated_heatmaps=fake_aggr_heatmaps,
-                          tags_list=[], heatmaps=fake_heatmaps,
-                          tags=fake_tags, test_scale_factor=[1],
-                          project2image=True, flip_test=False,
-                          align_corners=True)
-    assert torch.allclose(aggregated_heatmaps, torch.tensor(1.))
-    fake_heatmaps = [torch.zeros((1, 2, 2, 2)), torch.ones((1, 2, 2, 2))]
-    fake_aggr_heatmaps = torch.ones(1, 2, 4, 4)
-    aggregated_heatmaps, tags_list = \
-        aggregate_results(scale=1, aggregated_heatmaps=fake_aggr_heatmaps,
-                          tags_list=[], heatmaps=fake_heatmaps,
-                          tags=fake_tags, test_scale_factor=[1],
-                          project2image=False, flip_test=True)
-    assert aggregated_heatmaps.shape == torch.Size((1, 2, 4, 4))
-    aggregated_heatmaps, tags_list = \
-        aggregate_results(scale=2, aggregated_heatmaps=fake_aggr_heatmaps,
-                          tags_list=[], heatmaps=fake_heatmaps,
-                          tags=fake_tags, test_scale_factor=[1, 2],
-                          project2image=False, flip_test=True)
-    assert aggregated_heatmaps.shape == torch.Size((1, 2, 4, 4))
+    output = aggregate_stage_flip(
+        fake_outputs,
+        fake_flip_outputs,
+        index=-1,
+        project2image=True,
+        size_projected=(4, 4),
+        align_corners=False,
+        aggregate_stage='concat',
+        aggregate_flip='average')
+    assert isinstance(output, list)
+
+    output = aggregate_stage_flip(
+        fake_outputs,
+        fake_flip_outputs,
+        index=-1,
+        project2image=True,
+        size_projected=(4, 4),
+        align_corners=False,
+        aggregate_stage='average',
+        aggregate_flip='average')
+    assert isinstance(output, list)
+
+    output = aggregate_stage_flip(
+        fake_outputs,
+        fake_flip_outputs,
+        index=-1,
+        project2image=True,
+        size_projected=(4, 4),
+        align_corners=False,
+        aggregate_stage='average',
+        aggregate_flip='concat')
+    assert isinstance(output, list)
+
+    output = aggregate_stage_flip(
+        fake_outputs,
+        fake_flip_outputs,
+        index=-1,
+        project2image=True,
+        size_projected=(4, 4),
+        align_corners=False,
+        aggregate_stage='concat',
+        aggregate_flip='concat')
+    assert isinstance(output, list)
+
+
+def test_aggregate_scale():
+    fake_outputs = [torch.zeros((1, 4, 2, 2)), torch.zeros((1, 4, 2, 2))]
+    output = aggregate_scale(
+        fake_outputs, align_corners=False, aggregate_scale='average')
+    assert isinstance(output, torch.Tensor)
+    assert output.shape == fake_outputs[0].shape
+
+    output = aggregate_scale(
+        fake_outputs, align_corners=False, aggregate_scale='unsqueeze_concat')
+
+    assert isinstance(output, torch.Tensor)
+    assert len(output.shape) == len(fake_outputs[0].shape) + 1
 
 
 def test_get_group_preds():
diff --git a/tests/test_model/test_bottom_up_forward.py b/tests/test_model/test_bottom_up_forward.py
index c72bf8601a..a8ffe4617c 100644
--- a/tests/test_model/test_bottom_up_forward.py
+++ b/tests/test_model/test_bottom_up_forward.py
@@ -4,7 +4,7 @@
 from mmpose.models.detectors import AssociativeEmbedding
 
 
-def test_bottomup_forward():
+def test_ae_forward():
     model_cfg = dict(
         type='AssociativeEmbedding',
         pretrained=None,
diff --git a/tests/test_model/test_bottom_up_head.py b/tests/test_model/test_bottom_up_head.py
index 41b2f4ef1e..df9b0093ba 100644
--- a/tests/test_model/test_bottom_up_head.py
+++ b/tests/test_model/test_bottom_up_head.py
@@ -31,7 +31,7 @@ def test_ae_simple_head():
             in_channels=512,
             num_joints=17,
             with_ae_loss=[True],
-            extra={'final_conv_kernel': 0},
+            extra={'final_conv_kernel': -1},
             loss_keypoint=dict(
                 type='MultiLossFactory',
                 num_joints=17,
diff --git a/tests/test_pipelines/test_bottom_up_pipelines.py b/tests/test_pipelines/test_bottom_up_pipelines.py
index ec855a4b79..b1b14eea36 100644
--- a/tests/test_pipelines/test_bottom_up_pipelines.py
+++ b/tests/test_pipelines/test_bottom_up_pipelines.py
@@ -276,15 +276,15 @@ def test_BottomUpGenerateHeatmapTarget():
 
     generate_heatmap_target = BottomUpGenerateHeatmapTarget(2)
     results_generate_heatmap_target = generate_heatmap_target(results)
-    assert 'target' in results_generate_heatmap_target
-    assert len(results_generate_heatmap_target['target']
+    assert 'targets' in results_generate_heatmap_target
+    assert len(results_generate_heatmap_target['targets']
                ) == results['ann_info']['num_scales']
 
 
 def test_BottomUpGeneratePAFTarget():
 
     ann_info = {}
-    ann_info['skeleton'] = [[1, 2], [3, 4]]
+    ann_info['skeleton'] = [[0, 1], [2, 3]]
     ann_info['heatmap_size'] = np.array([5])
     ann_info['num_joints'] = 4
     ann_info['num_scales'] = 1
@@ -305,7 +305,7 @@ def test_BottomUpGeneratePAFTarget():
     generate_paf_target = BottomUpGeneratePAFTarget(1)
     results_generate_paf_target = generate_paf_target(results)
     sqrt = np.sqrt(2) / 2
-    assert (results_generate_paf_target['target'] == np.array(
+    assert (results_generate_paf_target['targets'] == np.array(
         [[[sqrt, sqrt, 0, sqrt, sqrt], [sqrt, sqrt, sqrt, sqrt, sqrt],
           [0, sqrt, sqrt, sqrt, 0], [sqrt, sqrt, sqrt, sqrt, sqrt],
           [sqrt, sqrt, 0, sqrt, sqrt]],