[MaskRCNN] MaskRCNN head with GN

ccda3790 · Yuxin Wu · a50f2952 · ccda3790 · ccda3790 · ccda3790
Commit ccda3790 authored Jul 09, 2018 by Yuxin Wu
5 changed files
--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -63,8 +63,8 @@ _C.DATA.CLASS_NAMES = []  # NUM_CLASS strings. Needs to be populated later by da
 _C.BACKBONE.WEIGHTS = ''   # /path/to/weights.npz
 _C.BACKBONE.RESNET_NUM_BLOCK = [3, 4, 6, 3]     # for resnet50
 # RESNET_NUM_BLOCK = [3, 4, 23, 3]    # for resnet101
-_C.BACKBONE.FREEZE_AFFINE = False   # do not train affine parameters inside BN
+_C.BACKBONE.FREEZE_AFFINE = False   # do not train affine parameters inside norm layers
-_C.BACKBONE.NORM = 'FreezeBN'  # options: FreezeBN, SyncBN
+_C.BACKBONE.NORM = 'FreezeBN'  # options: FreezeBN, SyncBN, GN
 # Use a base model with TF-preferred padding mode,
 # which may pad more pixels on right/bottom than top/left.
@@ -99,15 +99,15 @@ _C.PREPROC.PIXEL_STD = [58.395, 57.12, 57.375]
 _C.RPN.ANCHOR_STRIDE = 16
 _C.RPN.ANCHOR_SIZES = (32, 64, 128, 256, 512)   # sqrtarea of the anchor box
 _C.RPN.ANCHOR_RATIOS = (0.5, 1., 2.)
-_C.RPN.POSITIVE_ANCHOR_THRES = 0.7
+_C.RPN.POSITIVE_ANCHOR_THRESH = 0.7
-_C.RPN.NEGATIVE_ANCHOR_THRES = 0.3
+_C.RPN.NEGATIVE_ANCHOR_THRESH = 0.3
 # rpn training -------------------------
 _C.RPN.FG_RATIO = 0.5  # fg ratio among selected RPN anchors
 _C.RPN.BATCH_PER_IM = 256  # total (across FPN levels) number of anchors that are marked valid
 _C.RPN.MIN_SIZE = 0
 _C.RPN.PROPOSAL_NMS_THRESH = 0.7
-_C.RPN.CROWD_OVERLAP_THRES = 0.7  # boxes overlapping crowd will be ignored.
+_C.RPN.CROWD_OVERLAP_THRESH = 0.7  # boxes overlapping crowd will be ignored.
 _C.RPN.HEAD_DIM = 1024      # used in C4 only
 # RPN proposal selection -------------------------------
@@ -134,9 +134,11 @@ _C.FPN.NUM_CHANNEL = 256
 # conv head and fc head are only used in FPN.
 # For C4 models, the head is C5
 _C.FPN.FRCNN_HEAD_FUNC = 'fastrcnn_2fc_head'
-# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_head, fastrcnn_4conv1fc_gn_head
+# choices: fastrcnn_2fc_head, fastrcnn_4conv1fc_{,gn_}head
 _C.FPN.FRCNN_CONV_HEAD_DIM = 256
 _C.FPN.FRCNN_FC_HEAD_DIM = 1024
+_C.FPN.MRCNN_HEAD_FUNC = 'maskrcnn_up4conv_head'
+# choices: maskrcnn_up4conv_{,gn_}head
 # Mask-RCNN
 _C.MRCNN.HEAD_DIM = 256
@@ -168,6 +170,7 @@ def finalize_configs(is_training):
        _C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult
        assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint']
        assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head')
+        assert _C.FPN.MRCNN_HEAD_FUNC.endswith('_head')
    if is_training:
        os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'

--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -128,8 +128,8 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
    # the order of setting neg/pos labels matter
    anchor_labels[anchors_with_max_iou_per_gt] = 1
-    anchor_labels[ious_max_per_anchor >= cfg.RPN.POSITIVE_ANCHOR_THRES] = 1
+    anchor_labels[ious_max_per_anchor >= cfg.RPN.POSITIVE_ANCHOR_THRESH] = 1
-    anchor_labels[ious_max_per_anchor < cfg.RPN.NEGATIVE_ANCHOR_THRES] = 0
+    anchor_labels[ious_max_per_anchor < cfg.RPN.NEGATIVE_ANCHOR_THRESH] = 0
    # We can label all non-ignore candidate boxes which overlap crowd as ignore
    # But detectron did not do this.
@@ -137,7 +137,7 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
    #     cand_inds = np.where(anchor_labels >= 0)[0]
    #     cand_anchors = anchors[cand_inds]
    #     ious = np_iou(cand_anchors, crowd_boxes)
-    #     overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRES]
+    #     overlap_with_crowd = cand_inds[ious.max(axis=1) > cfg.RPN.CROWD_OVERLAP_THRESH]
    #     anchor_labels[overlap_with_crowd] = -1
    # Subsample fg labels: ignore some fg if fg is too many

--- a/examples/FasterRCNN/model_frcnn.py
+++ b/examples/FasterRCNN/model_frcnn.py
@@ -247,6 +247,7 @@ def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs, norm=None):
    Returns:
        cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
    """
+    assert norm in [None, 'GN'], norm
    l = feature
    with argscope(Conv2D, data_format='channels_first',
                  kernel_initializer=tf.variance_scaling_initializer(

--- a/examples/FasterRCNN/model_mrcnn.py
+++ b/examples/FasterRCNN/model_mrcnn.py
@@ -8,32 +8,10 @@ from tensorpack.models import (
 from tensorpack.tfutils.scope_utils import under_name_scope
 from tensorpack.tfutils.summary import add_moving_summary
+from basemodel import GroupNorm
 from config import config as cfg
-@layer_register(log_shape=True)
-def maskrcnn_upXconv_head(feature, num_category, num_convs):
-    """
-    Args:
-        feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
-        num_category(int):
-        num_convs (int): number of convolution layers
-    Returns:
-        mask_logits (N x num_category x 2s x 2s):
-    """
-    l = feature
-    with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
-                  kernel_initializer=tf.variance_scaling_initializer(
-                      scale=2.0, mode='fan_out', distribution='normal')):
-        # c2's MSRAFill is fan_out
-        for k in range(num_convs):
-            l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
-        l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
-        l = Conv2D('conv', l, num_category, 1)
-    return l
 @under_name_scope()
 def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
    """
@@ -71,3 +49,38 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
    add_moving_summary(loss, accuracy, fg_pixel_ratio, pos_accuracy)
    return loss
+@layer_register(log_shape=True)
+def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None):
+    """
+    Args:
+        feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
+        num_category(int):
+        num_convs (int): number of convolution layers
+        norm (str or None): either None or 'GN'
+    Returns:
+        mask_logits (N x num_category x 2s x 2s):
+    """
+    assert norm in [None, 'GN'], norm
+    l = feature
+    with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
+                  kernel_initializer=tf.variance_scaling_initializer(
+                      scale=2.0, mode='fan_out', distribution='normal')):
+        # c2's MSRAFill is fan_out
+        for k in range(num_convs):
+            l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
+            if norm is not None:
+                l = GroupNorm('gn{}'.format(k), l)
+        l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
+        l = Conv2D('conv', l, num_category, 1)
+    return l
+def maskrcnn_up4conv_head(*args, **kwargs):
+    return maskrcnn_upXconv_head(*args, num_convs=4, **kwargs)
+def maskrcnn_up4conv_gn_head(*args, **kwargs):
+    return maskrcnn_upXconv_head(*args, num_convs=4, norm='GN', **kwargs)
--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -31,6 +31,7 @@ from basemodel import (
    resnet_fpn_backbone)
 import model_frcnn
+import model_mrcnn
 from model_frcnn import (
    sample_fast_rcnn_targets,
    fastrcnn_outputs, fastrcnn_losses, fastrcnn_predictions)
@@ -357,8 +358,9 @@ class ResNetFPNModel(DetectionModel):
                roi_feature_maskrcnn = multilevel_roi_align(
                    p23456[:4], fg_sampled_boxes, 14,
                    name_scope='multilevel_roi_align_mask')
-                mask_logits = maskrcnn_upXconv_head(
+                maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC)
-                    'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 4)   # #fg x #cat x 28 x 28
+                mask_logits = maskrcnn_head_func(
+                    'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY)   # #fg x #cat x 28 x 28
                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
@@ -386,8 +388,9 @@ class ResNetFPNModel(DetectionModel):
            if cfg.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
                roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14)
-                mask_logits = maskrcnn_upXconv_head(
+                maskrcnn_head_func = getattr(model_mrcnn, cfg.FPN.MRCNN_HEAD_FUNC)
-                    'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 4)   # #fg x #cat x 28 x 28
+                mask_logits = maskrcnn_head_func(
+                    'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY)   # #fg x #cat x 28 x 28
                indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
                final_mask_logits = tf.gather_nd(mask_logits, indices)   # #resultx28x28
                tf.sigmoid(final_mask_logits, name='final_masks')