Skip to content

Commit

Permalink
[Refactor] Support CID (open-mmlab#1907)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ben-Louis authored Feb 17, 2023
1 parent 6dc6106 commit 77a52a0
Show file tree
Hide file tree
Showing 25 changed files with 2,089 additions and 140 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
_base_ = ['../../../_base_/default_runtime.py']

# runtime
train_cfg = dict(max_epochs=140, val_interval=10)

# optimizer
optim_wrapper = dict(optimizer=dict(
type='Adam',
lr=1e-3,
))

# learning policy
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=140,
milestones=[90, 120],
gamma=0.1,
by_epoch=True)
]

# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=160)

# hooks
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))

# codec settings
codec = dict(
type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128))

# model settings
model = dict(
type='BottomupPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256),
multiscale_output=True)),
init_cfg=dict(
type='Pretrained',
checkpoint='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w32-36af842e.pth'),
),
head=dict(
type='CIDHead',
in_channels=(32, 64, 128, 256),
num_keypoints=17,
gfd_channels=32,
input_transform='resize_concat',
input_index=(0, 1, 2, 3),
coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0),
decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0),
contrastive_loss=dict(
type='InfoNCELoss', temperature=0.05, loss_weight=1.0),
decoder=codec,
),
train_cfg=dict(max_train_instances=200),
test_cfg=dict(
multiscale_test=False,
flip_test=True,
shift_heatmap=False,
align_corners=False))

# base dataset settings
dataset_type = 'CocoDataset'
data_mode = 'bottomup'
data_root = 'data/coco/'

# pipelines
train_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(type='BottomupRandomAffine', input_size=codec['input_size']),
dict(type='RandomFlip', direction='horizontal'),
dict(type='GenerateTarget', encoder=codec),
dict(type='BottomupGetHeatmapMask'),
dict(type='PackPoseInputs'),
]
val_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(
type='BottomupResize',
input_size=codec['input_size'],
size_factor=64,
resize_mode='expand'),
dict(
type='PackPoseInputs',
meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
'img_shape', 'input_size', 'input_center', 'input_scale',
'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
'skeleton_links'))
]

# data loaders
train_dataloader = dict(
batch_size=20,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_val2017.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader

# evaluators
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/person_keypoints_val2017.json',
nms_thr=0.8,
score_mode='keypoint',
)
test_evaluator = val_evaluator
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
_base_ = ['../../../_base_/default_runtime.py']

# runtime
train_cfg = dict(max_epochs=140, val_interval=10)

# optimizer
optim_wrapper = dict(optimizer=dict(
type='Adam',
lr=1e-3,
))

# learning policy
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=140,
milestones=[90, 120],
gamma=0.1,
by_epoch=True)
]

# automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=160)

# hooks
default_hooks = dict(checkpoint=dict(save_best='coco/AP', rule='greater'))

# codec settings
codec = dict(
type='DecoupledHeatmap', input_size=(512, 512), heatmap_size=(128, 128))

# model settings
model = dict(
type='BottomupPoseEstimator',
data_preprocessor=dict(
type='PoseDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True),
backbone=dict(
type='HRNet',
in_channels=3,
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(48, 96)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(48, 96, 192)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(48, 96, 192, 384),
multiscale_output=True)),
init_cfg=dict(
type='Pretrained',
checkpoint='https://download.openmmlab.com/mmpose/'
'pretrain_models/hrnet_w48-8ef0771d.pth'),
),
head=dict(
type='CIDHead',
in_channels=(48, 96, 192, 384),
num_keypoints=17,
gfd_channels=48,
input_transform='resize_concat',
input_index=(0, 1, 2, 3),
coupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=1.0),
decoupled_heatmap_loss=dict(type='FocalHeatmapLoss', loss_weight=4.0),
contrastive_loss=dict(
type='InfoNCELoss', temperature=0.05, loss_weight=1.0),
decoder=codec,
),
train_cfg=dict(max_train_instances=200),
test_cfg=dict(
multiscale_test=False,
flip_test=True,
shift_heatmap=False,
align_corners=False))

# base dataset settings
dataset_type = 'CocoDataset'
data_mode = 'bottomup'
data_root = 'data/coco/'

# pipelines
train_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(type='BottomupRandomAffine', input_size=codec['input_size']),
dict(type='RandomFlip', direction='horizontal'),
dict(type='GenerateTarget', encoder=codec),
dict(type='BottomupGetHeatmapMask'),
dict(type='PackPoseInputs'),
]
val_pipeline = [
dict(type='LoadImage', file_client_args={{_base_.file_client_args}}),
dict(
type='BottomupResize',
input_size=codec['input_size'],
size_factor=64,
resize_mode='expand'),
dict(
type='PackPoseInputs',
meta_keys=('id', 'img_id', 'img_path', 'crowd_index', 'ori_shape',
'img_shape', 'input_size', 'input_center', 'input_scale',
'flip', 'flip_direction', 'flip_indices', 'raw_ann_info',
'skeleton_links'))
]

# data loaders
train_dataloader = dict(
batch_size=20,
num_workers=2,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_train2017.json',
data_prefix=dict(img='train2017/'),
pipeline=train_pipeline,
))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_mode=data_mode,
ann_file='annotations/person_keypoints_val2017.json',
data_prefix=dict(img='val2017/'),
test_mode=True,
pipeline=val_pipeline,
))
test_dataloader = val_dataloader

# evaluators
val_evaluator = dict(
type='CocoMetric',
ann_file=data_root + 'annotations/person_keypoints_val2017.json',
nms_thr=0.8,
score_mode='keypoint',
)
test_evaluator = val_evaluator
42 changes: 42 additions & 0 deletions configs/body_2d_keypoint/cid/coco/hrnet_coco.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<!-- [ALGORITHM] -->

<details>
<summary align="right"><a href="https://openaccess.thecvf.com/content/CVPR2022/html/Wang_Contextual_Instance_Decoupling_for_Robust_Multi-Person_Pose_Estimation_CVPR_2022_paper.html">CID (CVPR'2022)</a></summary>

```bibtex
@InProceedings{Wang_2022_CVPR,
author = {Wang, Dongkai and Zhang, Shiliang},
title = {Contextual Instance Decoupling for Robust Multi-Person Pose Estimation},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2022},
pages = {11060-11068}
}
```

</details>

<!-- [DATASET] -->

<details>
<summary align="right"><a href="https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48">COCO (ECCV'2014)</a></summary>

```bibtex
@inproceedings{lin2014microsoft,
title={Microsoft coco: Common objects in context},
author={Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Doll{\'a}r, Piotr and Zitnick, C Lawrence},
booktitle={European conference on computer vision},
pages={740--755},
year={2014},
organization={Springer}
}
```

</details>

Results on COCO val2017 without multi-scale test

| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------------------------------------- | :--------: | :---: | :-------------: | :-------------: | :---: | :-------------: | :-------------------------------------------: | :-------------------------------------------: |
| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512.py) | 512x512 | 0.704 | 0.894 | 0.775 | 0.753 | 0.928 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_42b7e6e6-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w32_8xb20-140e_coco-512x512_20230207.json) |
| [CID](/configs/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512.py) | 512x512 | 0.715 | 0.900 | 0.782 | 0.765 | 0.935 | [ckpt](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_a36c3ecf-20230207.pth) | [log](https://download.openmmlab.com/mmpose/v1/body_2d_keypoint/cid/coco/cid_hrnet-w48_8xb20-140e_coco-512x512_20230207.json) |
4 changes: 3 additions & 1 deletion mmpose/codecs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) OpenMMLab. All rights reserved.
from .associative_embedding import AssociativeEmbedding
from .decoupled_heatmap import DecoupledHeatmap
from .integral_regression_label import IntegralRegressionLabel
from .megvii_heatmap import MegviiHeatmap
from .msra_heatmap import MSRAHeatmap
Expand All @@ -10,5 +11,6 @@

__all__ = [
'MSRAHeatmap', 'MegviiHeatmap', 'UDPHeatmap', 'RegressionLabel',
'SimCCLabel', 'IntegralRegressionLabel', 'AssociativeEmbedding', 'SPR'
'SimCCLabel', 'IntegralRegressionLabel', 'AssociativeEmbedding', 'SPR',
'DecoupledHeatmap'
]
4 changes: 4 additions & 0 deletions mmpose/codecs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ class BaseKeypointCodec(metaclass=ABCMeta):
the methods :meth:`encode` and :meth:`decode`.
"""

# pass additional encoding arguments to the `encode` method, beyond the
# mandatory `keypoints` and `keypoints_visible` arguments.
auxiliary_encode_keys = set()

@abstractmethod
def encode(self,
keypoints: np.ndarray,
Expand Down
Loading

0 comments on commit 77a52a0

Please sign in to comment.