Commit ccda3790 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] MaskRCNN head with GN

parent a50f2952
......@@ -63,8 +63,8 @@ _C.DATA.CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by da
_C.BACKBONE.WEIGHTS = '' # /path/to/weights.npz
_C.BACKBONE.RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
_C.BACKBONE.FREEZE_AFFINE = False # do not train affine parameters inside BN
_C.BACKBONE.NORM = 'FreezeBN' # options: FreezeBN, SyncBN
_C.BACKBONE.FREEZE_AFFINE = False # do not train affine parameters inside norm layers
_C.BACKBONE.NORM = 'FreezeBN' # options: FreezeBN, SyncBN, GN
# Use a base model with TF-preferred padding mode,
# which may pad more pixels on right/bottom than top/left.
......@@ -99,15 +99,15 @@ _C.PREPROC.PIXEL_STD = [58.395, 57.12, 57.375]
_C.RPN.ANCHOR_STRIDE = 16
_C.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512) # sqrtarea of the anchor box
_C.RPN.ANCHOR_RATIOS = (0.5, 1., 2.)
_C.RPN.POSITIVE_ANCHOR_THRES = 0.7
_C.RPN.NEGATIVE_ANCHOR_THRES = 0.3
_C.RPN.POSITIVE_ANCHOR_THRESH = 0.7
_C.RPN.NEGATIVE_ANCHOR_THRESH = 0.3
# rpn training -------------------------
_C.RPN.FG_RATIO = 0.5 # fg ratio among selected RPN anchors
_C.RPN.BATCH_PER_IM = 256 # total (across FPN levels) number of anchors that are marked valid
_C.RPN.MIN_SIZE = 0
_C.RPN.PROPOSAL_NMS_THRESH = 0.7
_C.RPN.CROWD_OVERLAP_THRES = 0.7 # boxes overlapping crowd will be ignored.
_C.RPN.CROWD_OVERLAP_THRESH = 0.7 # boxes overlapping crowd will be ignored.
_C.RPN.HEAD_DIM = 1024 # used in C4 only
# RPN proposal selection -------------------------------
......@@ -134,9 +134,11 @@ _C.FPN.NUM_CHANNEL = 256
# conv head and fc head are only used in FPN.
# For C4 models, the head is C5
_C.FPN.FRCNN_HEAD_FUNC = 'fastrcnn_2fc_head'
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_head, fastrcnn_4conv1fc_gn_head
# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_{,gn_}head
_C.FPN.FRCNN_CONV_HEAD_DIM = 256
_C.FPN.FRCNN_FC_HEAD_DIM = 1024
_C.FPN.MRCNN_HEAD_FUNC = 'maskrcnn_up4conv_head'
# choices: maskrcnn_up4conv_{,gn_}head
# Mask-RCNN
_C.MRCNN.HEAD_DIM = 256
......@@ -168,6 +170,7 @@ def finalize_configs(is_training):
_C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult
assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint']
assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head')
assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head')
if is_training:
os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
......
......@@ -128,8 +128,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# the order of setting neg/pos labels matter
anchor_labels[anchors_with_max_iou_per_gt] = 1
anchor_labels[ious_max_per_anchor >= cfg.RPN.POSITIVE_ANCHOR_THRES] = 1
anchor_labels[ious_max_per_anchor < cfg.RPN.NEGATIVE_ANCHOR_THRES] = 0
anchor_labels[ious_max_per_anchor >= cfg.RPN.POSITIVE_ANCHOR_THRESH] = 1
anchor_labels[ious_max_per_anchor < cfg.RPN.NEGATIVE_ANCHOR_THRESH] = 0
# We can label all non-ignore candidate boxes which overlap crowd as ignore
# But detectron did not do this.
......@@ -137,7 +137,7 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
# cand_inds = np.where(anchor_labels >= 0)[0]
# cand_anchors = anchors[cand_inds]
# ious = np_iou(cand_anchors, crowd_boxes)
# overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRES]
# overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRESH]
# anchor_labels[overlap_with_crowd] = -1
# Subsample fg labels: ignore some fg if fg is too many
......
......@@ -247,6 +247,7 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs, norm=None):
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
assert norm in [None, 'GN'], norm
l = feature
with argscope(Conv2D, data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
......
......@@ -8,32 +8,10 @@ from tensorpack.models import (
from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.tfutils.summary import add_moving_summary
from basemodel import GroupNorm
from config import config as cfg
@layer_register(log_shape=True)
def maskrcnn_upXconv_head(feature, num_category, num_convs):
"""
Args:
feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
num_category(int):
num_convs (int): number of convolution layers
Returns:
mask_logits (N x num_category x 2s x 2s):
"""
l = feature
with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
# c2's MSRAFill is fan_out
for k in range(num_convs):
l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('conv', l, num_category, 1)
return l
@under_name_scope()
def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
"""
......@@ -71,3 +49,38 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
add_moving_summary(loss, accuracy, fg_pixel_ratio, pos_accuracy)
return loss
@layer_register(log_shape=True)
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None):
"""
Args:
feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
num_category(int):
num_convs (int): number of convolution layers
norm (str or None): either None or 'GN'
Returns:
mask_logits (N x num_category x 2s x 2s):
"""
assert norm in [None, 'GN'], norm
l = feature
with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
# c2's MSRAFill is fan_out
for k in range(num_convs):
l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
if norm is not None:
l = GroupNorm('gn{}'.format(k), l)
l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('conv', l, num_category, 1)
return l
def maskrcnn_up4conv_head(*args, **kwargs):
return maskrcnn_upXconv_head(*args, num_convs=4, **kwargs)
def maskrcnn_up4conv_gn_head(*args, **kwargs):
return maskrcnn_upXconv_head(*args, num_convs=4, norm='GN', **kwargs)
......@@ -31,6 +31,7 @@ from basemodel import (
resnet_fpn_backbone)
import model_frcnn
import model_mrcnn
from model_frcnn import (
sample_fast_rcnn_targets,
fastrcnn_outputs, fastrcnn_losses, fastrcnn_predictions)
......@@ -357,8 +358,9 @@ class ResNetFPNModel(DetectionModel):
roi_feature_maskrcnn = multilevel_roi_align(
p23456[:4], fg_sampled_boxes, 14,
name_scope='multilevel_roi_align_mask')
mask_logits = maskrcnn_upXconv_head(
'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 4) # #fg x #cat x 28 x 28
maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC)
mask_logits = maskrcnn_head_func(
'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28
target_masks_for_fg = crop_and_resize(
tf.expand_dims(gt_masks, 1),
......@@ -386,8 +388,9 @@ class ResNetFPNModel(DetectionModel):
if cfg.MODE_MASK:
# Cascade inference needs roi transform with refined boxes.
roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14)
mask_logits = maskrcnn_upXconv_head(
'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 4) # #fg x #cat x 28 x 28
maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC)
mask_logits = maskrcnn_head_func(
'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY) # #fg x #cat x 28 x 28
indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28
tf.sigmoid(final_mask_logits, name='final_masks')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment