add markdown

open-mmlab · Jul 20, 2023 · 2e97a63 · 2e97a63
1 parent 3559e99
commit 2e97a63
Show file tree

Hide file tree

Showing 5 changed files with 426 additions and 4 deletions.
diff --git a/configs/body_3d_keypoint/pose_lift/h36m/motionbert_h36m.md b/configs/body_3d_keypoint/pose_lift/h36m/motionbert_h36m.md
@@ -43,11 +43,13 @@ Testing results on Human3.6M dataset with ground truth 2D detections
 | [MotionBERT\*](/configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m.py) | 34.5  |     34.6      |  27.1   | [ckpt](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_h36m-f554954f_20230531.pth) |
 | [MotionBERT-finetuned\*](/configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m.py) | 26.9  |     26.8      |  21.0   | [ckpt](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_ft_h36m-d80af323_20230531.pth) |
 
-Testing results on Human3.6M dataset from the [official repo](https://github.com/Walter0807/MotionBERT) with ground truth 2D detections
+Testing results on Human3.6M dataset converted from the [official repo](https://github.com/Walter0807/MotionBERT)<sup>1</sup> with ground truth 2D detections
 
 | Arch                                                                                    | MPJPE | average MPJPE | P-MPJPE |                                           ckpt                                           |
 | :-------------------------------------------------------------------------------------- | :---: | :-----------: | :-----: | :--------------------------------------------------------------------------------------: |
-| [MotionBERT\*](/configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m.py) | 39.8  |     39.2      |  33.4   | [ckpt](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_h36m-f554954f_20230531.pth) |
-| [MotionBERT-finetuned\*](/configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m.py) | 37.7  |     37.2      |  32.2   | [ckpt](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_ft_h36m-d80af323_20230531.pth) |
+| [MotionBERT\*](/configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m-original.py) | 39.8  |     39.2      |  33.4   | [ckpt](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_h36m-f554954f_20230531.pth) |
+| [MotionBERT-finetuned\*](/configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m-original.py) | 37.7  |     37.2      |  32.2   | [ckpt](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_ft_h36m-d80af323_20230531.pth) |
+
+<sup>1</sup> To test with the dataset from official repo, please download the [test annotation file](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/h36m_test_original.npz), [train annotation file](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/h36m_train_original.npz) and [factors](https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/h36m_factors.npy) under `$MMPOSE/data/h36m/annotation_body3d/fps50`.
 
 *Models with * are converted from the [official repo](https://github.com/Walter0807/MotionBERT). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
diff --git a/configs/body_3d_keypoint/pose_lift/h36m/motionbert_h36m.yml b/configs/body_3d_keypoint/pose_lift/h36m/motionbert_h36m.yml
@@ -19,7 +19,7 @@ Models:
       P-MPJPE: 27.1
     Task: Body 3D Keypoint
   Weights: https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/pose_lift/h36m/motionbert_h36m-f554954f_20230531.pth
-- Config: configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert_8xb32-120e_h36m.py
+- Config: configs/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-ft_8xb32-120e_h36m.py
   In Collection: MotionBERT
   Metadata:
     Architecture: *id001

diff --git a/...s/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m-original.py b/...s/body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-243frm_8xb32-120e_h36m-original.py
@@ -0,0 +1,137 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    type='Pose3dLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+
+# runtime
+train_cfg = dict(max_epochs=120, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='AdamW', lr=0.0002, weight_decay=0.01))
+
+# learning policy
+param_scheduler = [
+    dict(type='ExponentialLR', gamma=0.99, end=120, by_epoch=True)
+]
+
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+    checkpoint=dict(
+        type='CheckpointHook',
+        save_best='MPJPE',
+        rule='less',
+        max_keep_ckpts=1),
+    logger=dict(type='LoggerHook', interval=20),
+)
+
+# codec settings
+train_codec = dict(
+    type='MotionBERTLabel', num_keypoints=17, concat_vis=True, mode='train')
+val_codec = dict(
+    type='MotionBERTLabel', num_keypoints=17, concat_vis=True, rootrel=True)
+
+# model settings
+model = dict(
+    type='PoseLifter',
+    backbone=dict(
+        type='DSTFormer',
+        in_channels=3,
+        feat_size=512,
+        depth=5,
+        num_heads=8,
+        mlp_ratio=2,
+        seq_len=243,
+        att_fuse=True,
+    ),
+    head=dict(
+        type='MotionRegressionHead',
+        in_channels=512,
+        out_channels=3,
+        embedding_size=512,
+        loss=dict(type='MPJPEVelocityJointLoss'),
+        decoder=val_codec,
+    ),
+    test_cfg=dict(flip_test=True))
+
+# base dataset settings
+dataset_type = 'Human36mDataset'
+data_root = 'data/h36m/'
+
+# pipelines
+train_pipeline = [
+    dict(type='GenerateTarget', encoder=train_codec),
+    dict(
+        type='RandomFlipAroundRoot',
+        keypoints_flip_cfg=dict(center_mode='static', center_x=0.),
+        target_flip_cfg=dict(center_mode='static', center_x=0.),
+        flip_label=True),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
+                   'factor', 'camera_param'))
+]
+val_pipeline = [
+    dict(type='GenerateTarget', encoder=val_codec),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
+                   'factor', 'camera_param'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=32,
+    prefetch_factor=4,
+    pin_memory=True,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        ann_file='annotation_body3d/fps50/h36m_train_original.npz',
+        seq_len=1,
+        multiple_target=243,
+        multiple_target_step=81,
+        camera_param_file='annotation_body3d/cameras.pkl',
+        data_root=data_root,
+        data_prefix=dict(img='images/'),
+        pipeline=train_pipeline,
+    ))
+
+val_dataloader = dict(
+    batch_size=32,
+    prefetch_factor=4,
+    pin_memory=True,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        ann_file='annotation_body3d/fps50/h36m_test_original.npz',
+        factor_file='annotation_body3d/fps50/h36m_factors.npy',
+        seq_len=1,
+        seq_step=1,
+        multiple_target=243,
+        camera_param_file='annotation_body3d/cameras.pkl',
+        data_root=data_root,
+        data_prefix=dict(img='images/'),
+        pipeline=val_pipeline,
+        test_mode=True,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+skip_list = [
+    'S9_Greet', 'S9_SittingDown', 'S9_Wait_1', 'S9_Greeting', 'S9_Waiting_1'
+]
+val_evaluator = [
+    dict(type='MPJPE', mode='mpjpe', skip_list=skip_list),
+    dict(type='MPJPE', mode='p-mpjpe', skip_list=skip_list)
+]
+test_evaluator = val_evaluator
diff --git a/...body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-ft-243frm_8xb32-60e_h36m-original.py b/...body_3d_keypoint/pose_lift/h36m/pose-lift_motionbert-ft-243frm_8xb32-60e_h36m-original.py
@@ -0,0 +1,142 @@
+_base_ = ['../../../_base_/default_runtime.py']
+
+vis_backends = [
+    dict(type='LocalVisBackend'),
+]
+visualizer = dict(
+    type='Pose3dLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+
+# runtime
+train_cfg = dict(max_epochs=60, val_interval=10)
+
+# optimizer
+optim_wrapper = dict(
+    optimizer=dict(type='AdamW', lr=0.0002, weight_decay=0.01))
+
+# learning policy
+param_scheduler = [
+    dict(type='ExponentialLR', gamma=0.99, end=60, by_epoch=True)
+]
+
+auto_scale_lr = dict(base_batch_size=512)
+
+# hooks
+default_hooks = dict(
+    checkpoint=dict(
+        type='CheckpointHook',
+        save_best='MPJPE',
+        rule='less',
+        max_keep_ckpts=1),
+    logger=dict(type='LoggerHook', interval=20),
+)
+
+# codec settings
+train_codec = dict(
+    type='MotionBERTLabel', num_keypoints=17, concat_vis=True, mode='train')
+val_codec = dict(
+    type='MotionBERTLabel', num_keypoints=17, concat_vis=True, rootrel=True)
+
+# model settings
+model = dict(
+    type='PoseLifter',
+    backbone=dict(
+        type='DSTFormer',
+        in_channels=3,
+        feat_size=512,
+        depth=5,
+        num_heads=8,
+        mlp_ratio=2,
+        seq_len=243,
+        att_fuse=True,
+    ),
+    head=dict(
+        type='MotionRegressionHead',
+        in_channels=512,
+        out_channels=3,
+        embedding_size=512,
+        loss=dict(type='MPJPEVelocityJointLoss'),
+        decoder=val_codec,
+    ),
+    test_cfg=dict(flip_test=True),
+    init_cfg=dict(
+        type='Pretrained',
+        checkpoint='https://download.openmmlab.com/mmpose/v1/body_3d_keypoint/'
+        'pose_lift/h36m/motionbert_pretrain_h36m-29ffebf5_20230719.pth'),
+)
+
+# base dataset settings
+dataset_type = 'Human36mDataset'
+data_root = 'data/h36m/'
+
+# pipelines
+train_pipeline = [
+    dict(type='GenerateTarget', encoder=train_codec),
+    dict(
+        type='RandomFlipAroundRoot',
+        keypoints_flip_cfg=dict(center_mode='static', center_x=0.),
+        target_flip_cfg=dict(center_mode='static', center_x=0.),
+        flip_label=True),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
+                   'factor', 'camera_param'))
+]
+val_pipeline = [
+    dict(type='GenerateTarget', encoder=val_codec),
+    dict(
+        type='PackPoseInputs',
+        meta_keys=('id', 'category_id', 'target_img_path', 'flip_indices',
+                   'factor', 'camera_param'))
+]
+
+# data loaders
+train_dataloader = dict(
+    batch_size=32,
+    prefetch_factor=4,
+    pin_memory=True,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        ann_file='annotation_body3d/fps50/h36m_train_original.npz',
+        seq_len=1,
+        multiple_target=243,
+        multiple_target_step=81,
+        camera_param_file='annotation_body3d/cameras.pkl',
+        data_root=data_root,
+        data_prefix=dict(img='images/'),
+        pipeline=train_pipeline,
+    ))
+
+val_dataloader = dict(
+    batch_size=32,
+    prefetch_factor=4,
+    pin_memory=True,
+    num_workers=2,
+    persistent_workers=True,
+    sampler=dict(type='DefaultSampler', shuffle=False, round_up=False),
+    dataset=dict(
+        type=dataset_type,
+        ann_file='annotation_body3d/fps50/h36m_test_original.npz',
+        factor_file='annotation_body3d/fps50/h36m_factors.npy',
+        seq_len=1,
+        seq_step=1,
+        multiple_target=243,
+        camera_param_file='annotation_body3d/cameras.pkl',
+        data_root=data_root,
+        data_prefix=dict(img='images/'),
+        pipeline=val_pipeline,
+        test_mode=True,
+    ))
+test_dataloader = val_dataloader
+
+# evaluators
+skip_list = [
+    'S9_Greet', 'S9_SittingDown', 'S9_Wait_1', 'S9_Greeting', 'S9_Waiting_1'
+]
+val_evaluator = [
+    dict(type='MPJPE', mode='mpjpe', skip_list=skip_list),
+    dict(type='MPJPE', mode='p-mpjpe', skip_list=skip_list)
+]
+test_evaluator = val_evaluator