Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Postprocessor extension. #57

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
98 changes: 98 additions & 0 deletions config/coco_panoptic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
# dataset settings
dataset_type = "CocoPanopticDataset"
# data_root = 'data/coco/'

# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)

data_root = "s3://openmmlab/datasets/detection/coco/"

# Method 2: Use `backend_args`, `file_client_args` in versions before 3.0.0rc6
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection/',
# 'data/': 's3://openmmlab/datasets/detection/'
# }))
backend_args = None

train_pipeline = [
dict(type="LoadImageFromFile", backend_args=backend_args),
dict(type="LoadPanopticAnnotations", backend_args=backend_args),
dict(type="Resize", scale=(1333, 800), keep_ratio=True),
dict(type="RandomFlip", prob=0.5),
dict(type="PackDetInputs"),
]
test_pipeline = [
dict(type="LoadImageFromFile", backend_args=backend_args),
dict(type="Resize", scale=(1333, 800), keep_ratio=True),
dict(type="LoadPanopticAnnotations", backend_args=backend_args),
dict(
type="PackDetInputs",
meta_keys=("img_id", "img_path", "ori_shape", "img_shape", "scale_factor"),
),
]

train_dataloader = dict(
batch_size=2,
num_workers=2,
persistent_workers=True,
sampler=dict(type="DefaultSampler", shuffle=True),
batch_sampler=dict(type="AspectRatioBatchSampler"),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file="annotations/panoptic_train2017.json",
data_prefix=dict(img="train2017/", seg="annotations/panoptic_train2017/"),
filter_cfg=dict(filter_empty_gt=True, min_size=32),
pipeline=train_pipeline,
backend_args=backend_args,
),
)
val_dataloader = dict(
batch_size=1,
num_workers=2,
persistent_workers=True,
drop_last=False,
sampler=dict(type="DefaultSampler", shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file="annotations/panoptic_val2017.json",
data_prefix=dict(img="val2017/", seg="annotations/panoptic_val2017/"),
test_mode=True,
pipeline=test_pipeline,
backend_args=backend_args,
),
)
test_dataloader = val_dataloader

val_evaluator = dict(
type="CocoPanopticMetric",
ann_file=data_root + "annotations/panoptic_val2017.json",
seg_prefix=data_root + "annotations/panoptic_val2017/",
backend_args=backend_args,
)
test_evaluator = val_evaluator

# inference on test dataset and
# format the output results for submission.
# test_dataloader = dict(
# batch_size=1,
# num_workers=1,
# persistent_workers=True,
# drop_last=False,
# sampler=dict(type='DefaultSampler', shuffle=False),
# dataset=dict(
# type=dataset_type,
# data_root=data_root,
# ann_file='annotations/panoptic_image_info_test-dev2017.json',
# data_prefix=dict(img='test2017/'),
# test_mode=True,
# pipeline=test_pipeline))
# test_evaluator = dict(
# type='CocoPanopticMetric',
# format_only=True,
# ann_file=data_root + 'annotations/panoptic_image_info_test-dev2017.json',
# outfile_prefix='./work_dirs/coco_panoptic/test')
265 changes: 265 additions & 0 deletions config/mask2former_r50_8xb2-lsj-50e_coco-panoptic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
_base_ = ["./coco_panoptic.py"]
image_size = (1024, 1024)
batch_augments = [
dict(
type="BatchFixedSizePad",
size=image_size,
img_pad_value=0,
pad_mask=True,
mask_pad_value=0,
pad_seg=True,
seg_pad_value=255,
)
]
data_preprocessor = dict(
type="DetDataPreprocessor",
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32,
pad_mask=True,
mask_pad_value=0,
pad_seg=True,
seg_pad_value=255,
batch_augments=batch_augments,
)

num_things_classes = 80
num_stuff_classes = 0
num_classes = num_things_classes + num_stuff_classes
model = dict(
type="Mask2Former",
data_preprocessor=data_preprocessor,
backbone=dict(
type="ResNet",
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type="BN", requires_grad=False),
norm_eval=True,
style="pytorch",
init_cfg=dict(type="Pretrained", checkpoint="torchvision://resnet50"),
),
panoptic_head=dict(
type="Mask2FormerHead",
in_channels=[256, 512, 1024, 2048], # pass to pixel_decoder inside
strides=[4, 8, 16, 32],
feat_channels=256,
out_channels=256,
num_things_classes=num_things_classes,
num_stuff_classes=num_stuff_classes,
num_queries=100,
num_transformer_feat_level=3,
pixel_decoder=dict(
type="MSDeformAttnPixelDecoder",
num_outs=3,
norm_cfg=dict(type="GN", num_groups=32),
act_cfg=dict(type="ReLU"),
encoder=dict( # DeformableDetrTransformerEncoder
num_layers=6,
layer_cfg=dict( # DeformableDetrTransformerEncoderLayer
self_attn_cfg=dict( # MultiScaleDeformableAttention
embed_dims=256,
num_heads=8,
num_levels=3,
num_points=4,
dropout=0.0,
batch_first=True,
),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=1024,
num_fcs=2,
ffn_drop=0.0,
act_cfg=dict(type="ReLU", inplace=True),
),
),
),
positional_encoding=dict(num_feats=128, normalize=True),
),
enforce_decoder_input_project=False,
positional_encoding=dict(num_feats=128, normalize=True),
transformer_decoder=dict( # Mask2FormerTransformerDecoder
return_intermediate=True,
num_layers=9,
layer_cfg=dict( # Mask2FormerTransformerDecoderLayer
self_attn_cfg=dict( # MultiheadAttention
embed_dims=256, num_heads=8, dropout=0.0, batch_first=True
),
cross_attn_cfg=dict( # MultiheadAttention
embed_dims=256, num_heads=8, dropout=0.0, batch_first=True
),
ffn_cfg=dict(
embed_dims=256,
feedforward_channels=2048,
num_fcs=2,
ffn_drop=0.0,
act_cfg=dict(type="ReLU", inplace=True),
),
),
init_cfg=None,
),
loss_cls=dict(
type="CrossEntropyLoss",
use_sigmoid=False,
loss_weight=2.0,
reduction="mean",
class_weight=[1.0] * num_classes + [0.1],
),
loss_mask=dict(
type="CrossEntropyLoss", use_sigmoid=True, reduction="mean", loss_weight=5.0
),
loss_dice=dict(
type="DiceLoss",
use_sigmoid=True,
activate=True,
reduction="mean",
naive_dice=True,
eps=1.0,
loss_weight=5.0,
),
),
panoptic_fusion_head=dict(
type="MaskFormerFusionHead",
num_things_classes=num_things_classes,
num_stuff_classes=num_stuff_classes,
loss_panoptic=None,
init_cfg=None,
),
train_cfg=dict(
num_points=12544,
oversample_ratio=3.0,
importance_sample_ratio=0.75,
assigner=dict(
type="HungarianAssigner",
match_costs=[
dict(type="ClassificationCost", weight=2.0),
dict(type="CrossEntropyLossCost", weight=5.0, use_sigmoid=True),
dict(type="DiceCost", weight=5.0, pred_act=True, eps=1.0),
],
),
sampler=dict(type="MaskPseudoSampler"),
),
test_cfg=dict(
panoptic_on=True,
# For now, the dataset does not support
# evaluating semantic segmentation metric.
semantic_on=False,
instance_on=True,
# max_per_image is for instance segmentation.
max_per_image=100,
iou_thr=0.8,
# In Mask2Former's panoptic postprocessing,
# it will filter mask area where score is less than 0.5 .
filter_low_score=True,
),
init_cfg=None,
)

# dataset settings
data_root = "data/coco/"
train_pipeline = [
dict(
type="LoadImageFromFile", to_float32=True, backend_args={{_base_.backend_args}}
),
dict(
type="LoadPanopticAnnotations",
with_bbox=True,
with_mask=True,
with_seg=True,
backend_args={{_base_.backend_args}},
),
dict(type="RandomFlip", prob=0.5),
# large scale jittering
dict(
type="RandomResize", scale=image_size, ratio_range=(0.1, 2.0), keep_ratio=True
),
dict(
type="RandomCrop",
crop_size=image_size,
crop_type="absolute",
recompute_bbox=True,
allow_negative_crop=True,
),
dict(type="PackDetInputs"),
]

train_dataloader = dict(dataset=dict(pipeline=train_pipeline))

val_evaluator = [
dict(
type="CocoPanopticMetric",
ann_file=data_root + "annotations/panoptic_val2017.json",
seg_prefix=data_root + "annotations/panoptic_val2017/",
backend_args={{_base_.backend_args}},
),
dict(
type="CocoMetric",
ann_file=data_root + "annotations/instances_val2017.json",
metric=["bbox", "segm"],
backend_args={{_base_.backend_args}},
),
]
test_evaluator = val_evaluator

# optimizer
embed_multi = dict(lr_mult=1.0, decay_mult=0.0)
optim_wrapper = dict(
type="OptimWrapper",
optimizer=dict(
type="AdamW", lr=0.0001, weight_decay=0.05, eps=1e-8, betas=(0.9, 0.999)
),
paramwise_cfg=dict(
custom_keys={
"backbone": dict(lr_mult=0.1, decay_mult=1.0),
"query_embed": embed_multi,
"query_feat": embed_multi,
"level_embed": embed_multi,
},
norm_decay_mult=0.0,
),
clip_grad=dict(max_norm=0.01, norm_type=2),
)

# learning policy
max_iters = 368750
param_scheduler = dict(
type="MultiStepLR",
begin=0,
end=max_iters,
by_epoch=False,
milestones=[327778, 355092],
gamma=0.1,
)

# Before 365001th iteration, we do evaluation every 5000 iterations.
# After 365000th iteration, we do evaluation every 368750 iterations,
# which means that we do evaluation at the end of training.
interval = 5000
dynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]
train_cfg = dict(
type="IterBasedTrainLoop",
max_iters=max_iters,
val_interval=interval,
dynamic_intervals=dynamic_intervals,
)
val_cfg = dict(type="ValLoop")
test_cfg = dict(type="TestLoop")

default_hooks = dict(
checkpoint=dict(
type="CheckpointHook",
by_epoch=False,
save_last=True,
max_keep_ckpts=3,
interval=interval,
)
)
log_processor = dict(type="LogProcessor", window_size=50, by_epoch=False)

# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
Loading