Skip to content

Commit

Permalink
ae inference align with master
Browse files Browse the repository at this point in the history
  • Loading branch information
ly015 committed Feb 7, 2023
1 parent d83c4ba commit 4740c5c
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
decoder=dict(codec, heatmap_size=codec['input_size'])),
test_cfg=dict(
multiscale_test=False,
flip_test=True,
flip_test=False,
shift_heatmap=True,
restore_heatmap_size=True,
align_corners=False))
Expand All @@ -113,9 +113,14 @@
dict(
type='BottomupResize',
input_size=codec['input_size'],
size_factor=32,
size_factor=64,
resize_mode='expand'),
dict(type='PackPoseInputs')
dict(
type='PackPoseInputs',
meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
'img_shape', 'input_size', 'input_center', 'input_scale',
'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
'skeleton_links'))
]

# data loaders
Expand All @@ -142,7 +147,7 @@
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_val2017.json',
ann_file='annotations/person_keypoints_val2017_tiny_clean.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=val_pipeline,
Expand All @@ -152,7 +157,8 @@
# evaluators
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/person_keypoints_val2017.json',
ann_file=data_root +
'annotations/person_keypoints_val2017_tiny_clean.json',
nms_mode='none',
score_mode='keypoint',
)
Expand Down
107 changes: 58 additions & 49 deletions demo/bottomup_demo.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Copyright (c) OpenMMLab. All rights reserved.

import mimetypes
import os
import os.path as osp
import tempfile
from argparse import ArgumentParser

Expand Down Expand Up @@ -120,57 +122,64 @@ def main():
visualizer = VISUALIZERS.build(model.cfg.visualizer)
visualizer.set_dataset_meta(model.dataset_meta)

input_type = mimetypes.guess_type(args.input)[0].split('/')[0]
if input_type == 'image':
pred_instances = process_one_image(
args, args.input, model, visualizer, show_interval=0)
pred_instances_list = split_instances(pred_instances)

elif input_type == 'video':
tmp_folder = tempfile.TemporaryDirectory()
video = mmcv.VideoReader(args.input)
progressbar = mmengine.ProgressBar(len(video))
video.cvt2frames(tmp_folder.name, show_progress=False)
output_root = args.output_root
args.output_root = tmp_folder.name
pred_instances_list = []

for frame_id, img_fname in enumerate(os.listdir(tmp_folder.name)):
pred_instances = process_one_image(
args,
f'{tmp_folder.name}/{img_fname}',
model,
visualizer,
show_interval=1)
progressbar.update()
pred_instances_list.append(
dict(
frame_id=frame_id,
instances=split_instances(pred_instances)))

if output_root:
mmcv.frames2video(
tmp_folder.name,
f'{output_root}/{os.path.basename(args.input)}',
fps=video.fps,
fourcc='mp4v',
show_progress=False)
tmp_folder.cleanup()

if osp.isfile(args.input):
inputs = [args.input]
else:
args.save_predictions = False
raise ValueError(
f'file {os.path.basename(args.input)} has invalid format.')
inputs = [osp.join(args.input, fn) for fn in os.listdir(args.input)]

if args.save_predictions:
with open(args.pred_save_path, 'w') as f:
json.dump(
dict(
meta_info=model.dataset_meta,
instance_info=pred_instances_list),
f,
indent='\t')
print(f'predictions have been saved at {args.pred_save_path}')
for fn in inputs:

input_type = mimetypes.guess_type(fn)[0].split('/')[0]
if input_type == 'image':
pred_instances = process_one_image(
args, fn, model, visualizer, show_interval=0)
pred_instances_list = split_instances(pred_instances)

elif input_type == 'video':
tmp_folder = tempfile.TemporaryDirectory()
video = mmcv.VideoReader(fn)
progressbar = mmengine.ProgressBar(len(video))
video.cvt2frames(tmp_folder.name, show_progress=False)
output_root = args.output_root
args.output_root = tmp_folder.name
pred_instances_list = []

for frame_id, img_fname in enumerate(os.listdir(tmp_folder.name)):
pred_instances = process_one_image(
args,
f'{tmp_folder.name}/{img_fname}',
model,
visualizer,
show_interval=1)
progressbar.update()
pred_instances_list.append(
dict(
frame_id=frame_id,
instances=split_instances(pred_instances)))

if output_root:
mmcv.frames2video(
tmp_folder.name,
f'{output_root}/{os.path.basename(fn)}',
fps=video.fps,
fourcc='mp4v',
show_progress=False)
tmp_folder.cleanup()

else:
args.save_predictions = False
raise ValueError(
f'file {os.path.basename(fn)} has invalid format.')

if args.save_predictions:
with open(args.pred_save_path, 'w') as f:
json.dump(
dict(
meta_info=model.dataset_meta,
instance_info=pred_instances_list),
f,
indent='\t')
print(f'predictions have been saved at {args.pred_save_path}')


if __name__ == '__main__':
Expand Down
78 changes: 57 additions & 21 deletions mmpose/codecs/associative_embedding.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# Copyright (c) OpenMMLab. All rights reserved.
from collections import namedtuple
from copy import deepcopy
from itertools import product
from typing import Any, List, Optional, Tuple

import numpy as np
import torch
from mmengine import dump
from munkres import Munkres
from torch import Tensor

Expand Down Expand Up @@ -75,7 +77,9 @@ def _init_group():
tag_list=[])
return _group

for i in keypoint_order:
group_history = []

for idx, i in enumerate(keypoint_order):
# Get all valid candidate of the i-th keypoints
valid = vals[i] > val_thr
if not valid.any():
Expand All @@ -87,12 +91,22 @@ def _init_group():

if len(groups) == 0: # Initialize the group pool
for tag, val, loc in zip(tags_i, vals_i, locs_i):

# Check if the keypoint belongs to existing groups
if len(groups):
prev_tags = np.stack([g.tag_list[0] for g in groups])
dists = np.linalg.norm(prev_tags - tag, ord=2, axis=1)
if dists.min() < 1:
continue

group = _init_group()
group.kpts[i] = loc
group.scores[i] = val
group.tag_list.append(tag)

groups.append(group)
costs_copy = None
matches = None

else: # Match keypoints to existing groups
groups = groups[:max_groups]
Expand All @@ -101,17 +115,18 @@ def _init_group():
# Calculate distance matrix between group tags and tag candidates
# of the i-th keypoint
# Shape: (M', 1, L) , (1, G, L) -> (M', G, L)
diff = tags_i[:, None] - np.array(group_tags)[None]
diff = (tags_i[:, None] -
np.array(group_tags)[None]).astype(np.float64)
dists = np.linalg.norm(diff, ord=2, axis=2)
num_kpts, num_groups = dists.shape[:2]

# Experimental cost function for keypoint-group matching
# Experimental cost function for keypoint-group matching2
costs = np.round(dists) * 100 - vals_i[..., None]

if num_kpts > num_groups:
padding = np.full((num_kpts, num_kpts - num_groups),
1e10,
dtype=np.float32)
padding = np.full((num_kpts, num_kpts - num_groups), 1e10)
costs = np.concatenate((costs, padding), axis=1)
costs_copy = costs.copy()

# Match keypoints and groups by Munkres algorithm
matches = munkres.compute(costs)
Expand All @@ -121,13 +136,30 @@ def _init_group():
# Add the keypoint to the matched group
group = groups[group_idx]
else:
# Initialize a new group with unmatched keypoint
group = _init_group()
groups.append(group)

group.kpts[i] = locs_i[kpt_idx]
group.scores[i] = vals_i[kpt_idx]
group.tag_list.append(tags_i[kpt_idx])
# if dists[kpt_idx].min() < 0.2:
if False:
group = None
else:
# Initialize a new group with unmatched keypoint
group = _init_group()
groups.append(group)
if group is not None:
group.kpts[i] = locs_i[kpt_idx]
group.scores[i] = vals_i[kpt_idx]
group.tag_list.append(tags_i[kpt_idx])

out = {
'idx': idx,
'i': i,
'costs': costs_copy,
'matches': matches,
'kpts': np.array([g.kpts for g in groups]),
'scores': np.array([g.scores for g in groups]),
'tag_list': [np.array(g.tag_list) for g in groups],
}
group_history.append(deepcopy(out))

dump(group_history, 'group_history.pkl')

groups = groups[:max_groups]
if groups:
Expand Down Expand Up @@ -210,7 +242,7 @@ def __init__(
decode_gaussian_kernel: int = 3,
decode_keypoint_thr: float = 0.1,
decode_tag_thr: float = 1.0,
decode_topk: int = 20,
decode_topk: int = 30,
decode_max_instances: Optional[int] = None,
) -> None:
super().__init__()
Expand Down Expand Up @@ -336,6 +368,12 @@ def _get_batch_topk(self, batch_heatmaps: Tensor, batch_tags: Tensor,
B, K, H, W = batch_heatmaps.shape
L = batch_tags.shape[1] // K

# Heatmap NMS
dump(batch_heatmaps.cpu().numpy(), 'heatmaps.pkl')
batch_heatmaps = batch_heatmap_nms(batch_heatmaps,
self.decode_nms_kernel)
dump(batch_heatmaps.cpu().numpy(), 'heatmaps_nms.pkl')

# shape of topk_val, top_indices: (B, K, TopK)
topk_vals, topk_indices = batch_heatmaps.flatten(-2, -1).topk(
k, dim=-1)
Expand Down Expand Up @@ -433,9 +471,8 @@ def _fill_missing_keypoints(self, keypoints: np.ndarray,
cost_map = np.round(dist_map) * 100 - heatmaps[k] # H, W
y, x = np.unravel_index(np.argmin(cost_map), shape=(H, W))
keypoints[n, k] = [x, y]
keypoint_scores[n, k] = heatmaps[k, y, x]

return keypoints, keypoint_scores
return keypoints

def batch_decode(self, batch_heatmaps: Tensor, batch_tags: Tensor
) -> Tuple[List[np.ndarray], List[np.ndarray]]:
Expand All @@ -457,15 +494,12 @@ def batch_decode(self, batch_heatmaps: Tensor, batch_tags: Tensor
batch, each is in shape (N, K). It usually represents the
confidience of the keypoint prediction
"""

B, _, H, W = batch_heatmaps.shape
assert batch_tags.shape[0] == B and batch_tags.shape[2:4] == (H, W), (
f'Mismatched shapes of heatmap ({batch_heatmaps.shape}) and '
f'tagging map ({batch_tags.shape})')

# Heatmap NMS
batch_heatmaps = batch_heatmap_nms(batch_heatmaps,
self.decode_nms_kernel)

# Get top-k in each heatmap and and convert to numpy
batch_topk_vals, batch_topk_tags, batch_topk_locs = to_numpy(
self._get_batch_topk(
Expand All @@ -489,7 +523,7 @@ def batch_decode(self, batch_heatmaps: Tensor, batch_tags: Tensor

if keypoints.size > 0:
# identify missing keypoints
keypoints, scores = self._fill_missing_keypoints(
keypoints = self._fill_missing_keypoints(
keypoints, scores, heatmaps, tags)

# refine keypoint coordinates according to heatmap distribution
Expand All @@ -500,6 +534,8 @@ def batch_decode(self, batch_heatmaps: Tensor, batch_tags: Tensor
blur_kernel_size=self.decode_gaussian_kernel)
else:
keypoints = refine_keypoints(keypoints, heatmaps)
# keypoints += 0.75
keypoints += 0.5

batch_keypoints.append(keypoints)
batch_keypoint_scores.append(scores)
Expand Down
12 changes: 10 additions & 2 deletions mmpose/datasets/transforms/bottomup_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ def transform(self, results: Dict) -> Optional[dict]:
output_size=actual_input_size)
else:
center = np.array([img_w / 2, img_h / 2], dtype=np.float32)
center = np.round(center)
scale = np.array([
img_w * padded_input_size[0] / actual_input_size[0],
img_h * padded_input_size[1] / actual_input_size[1]
Expand All @@ -489,11 +490,18 @@ def transform(self, results: Dict) -> Optional[dict]:
rot=0,
output_size=padded_input_size)

_img = cv2.warpAffine(
img, warp_mat, padded_input_size, flags=cv2.INTER_LINEAR)
_img = cv2.warpAffine(img, warp_mat, padded_input_size)

imgs.append(_img)

# print('#' * 20)
# print('w,h: ', img_w, img_h, 'center: ', center, 'scale: ',
# scale,
# 'actual_input_size: ', actual_input_size,
# 'padded_input_size: ', padded_input_size)
# print(warp_mat)
# print('#' * 20)

# Store the transform information w.r.t. the main input size
if i == 0:
results['img_shape'] = padded_input_size[::-1]
Expand Down
Loading

0 comments on commit 4740c5c

Please sign in to comment.