[FasterRCNN] add fpn feature layers

7d0152ca · Yuxin Wu · aa96b7ca · 7d0152ca · 7d0152ca · 7d0152ca
Commit 7d0152ca authored May 12, 2018 by Yuxin Wu
5 changed files
--- a/examples/FasterRCNN/basemodel.py
+++ b/examples/FasterRCNN/basemodel.py
@@ -92,7 +92,7 @@ def resnet_group(l, name, block_func, features, count, stride):
    return l
-def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True):
+def pretrained_resnet_c4_backbone(image, num_blocks, freeze_c2=True):
    assert len(num_blocks) == 3
    with resnet_argscope():
        l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
@@ -114,3 +114,20 @@ def resnet_conv5(image, num_block):
    with resnet_argscope():
        l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, 2)
        return l
+def pretrained_resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
+    assert len(num_blocks) == 4
+    with resnet_argscope():
+        l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
+        l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
+        l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
+        l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
+        c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
+        if freeze_c2:
+            c2 = tf.stop_gradient(c2)
+        c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
+        c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
+        c5 = resnet_group(c4, 'group3', resnet_bottleneck, 512, num_blocks[3], 2)
+    # 32x downsampling up to now
+    return c2, c3, c4, c5
--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -71,3 +71,5 @@ RESULTS_PER_IM = 100
 # TODO Not Functioning. Don't USE
 MODE_FPN = False
+FPN_NUM_CHANNEL = 256
+FPN_SIZE_REQUIREMENT = 32
--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -312,6 +312,7 @@ def get_train_dataflow(add_mask=False):
            return None
        ret = [im] + list(anchor_inputs) + [boxes, klass]
+        # TODO pad im when FPN
        if add_mask:
            # augmentation will modify the polys in-place

--- a/examples/FasterRCNN/model.py
+++ b/examples/FasterRCNN/model.py
@@ -7,7 +7,8 @@ from tensorpack.tfutils.summary import add_moving_summary
 from tensorpack.tfutils.argscope import argscope
 from tensorpack.tfutils.scope_utils import under_name_scope
 from tensorpack.models import (
-    Conv2D, FullyConnected, GlobalAvgPooling, layer_register, Conv2DTranspose)
+    Conv2D, FullyConnected, GlobalAvgPooling, MaxPooling,
+    layer_register, Conv2DTranspose, FixedUnPooling)
 from utils.box_ops import pairwise_iou
 import config
@@ -554,6 +555,32 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
    return loss
+def fpn_model(features):
+    assert len(features) == 4, features
+    num_channel = config.FPN_NUM_CHANNEL
+    def upsample2x(x):
+        # TODO may not be optimal in speed or math
+        return FixedUnPooling(x, 2, data_format='channels_first')
+    with argscope(Conv2D, data_format='channels_first',
+                  nl=tf.identity, use_bias=True,
+                  kernel_initializer=tf.variance_scaling_initializer(scale=1.)):
+        lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1)
+                    for i, c in enumerate(features)]
+        lat_sum_5432 = []
+        for idx, lat in enumerate(lat_2345[::-1]):
+            if idx == 0:
+                lat_sum_5432.append(lat)
+            else:
+                lat = lat + upsample2x(lat_sum_5432[-1])
+                lat_sum_5432.append(lat)
+        p2345 = [Conv2D('fpn_3x3_p{}'.format(i + 2), c, num_channel, 3)
+                 for i, c in enumerate(lat_sum_5432[::-1])]
+        p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2)
+        return p2345 + [p6]
 if __name__ == '__main__':
    """
    Demonstrate what's wrong with tf.image.crop_and_resize:

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -17,6 +17,7 @@ assert six.PY3, "FasterRCNN requires Python 3!"
 from tensorpack import *
 from tensorpack.tfutils.summary import add_moving_summary
+from tensorpack.tfutils.scope_utils import under_name_scope
 from tensorpack.tfutils import optimizer
 import tensorpack.utils.viz as tpviz
 from tensorpack.utils.gpu import get_nr_gpu
@@ -24,7 +25,7 @@ from tensorpack.utils.gpu import get_nr_gpu
 from coco import COCODetection
 from basemodel import (
-    image_preprocess, pretrained_resnet_conv4, resnet_conv5)
+    image_preprocess, pretrained_resnet_c4_backbone, resnet_conv5)
 from model import (
    clip_boxes, decode_bbox_target, encode_bbox_target, crop_and_resize,
    rpn_head, rpn_losses,
@@ -75,18 +76,22 @@ class Model(ModelDesc):
        image = image_preprocess(image, bgr=True)
        return tf.transpose(image, [0, 3, 1, 2])
-    def _get_anchors(self, shape2d):
+    @under_name_scope()
+    def slice_to_featuremap(self, featuremap, anchors, anchor_labels, anchor_boxes):
        """
-        Returns:
+        Args:
-            FSxFSxNAx4 anchors,
+            Slice anchors/anchor_labels/anchor_boxes to the spatial size of this featuremap.
+            anchors (FS x FS x NA x 4):
+            anchor_labels (FS x FS x NA):
+            anchor_boxes (FS x FS x NA x 4):
        """
-        # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE)
+        shape2d = tf.shape(featuremap)[2:]  # h,w
-        with tf.name_scope('anchors'):
+        slice3d = tf.concat([shape2d, [-1]], axis=0)
-            all_anchors = tf.constant(get_all_anchors(), name='all_anchors', dtype=tf.float32)
+        slice4d = tf.concat([shape2d, [-1, -1]], axis=0)
-            fm_anchors = tf.slice(
+        anchors = tf.slice(anchors, [0, 0, 0, 0], slice4d)
-                all_anchors, [0, 0, 0, 0], tf.stack([
+        anchor_labels = tf.slice(anchor_labels, [0, 0, 0], slice3d)
-                    shape2d[0], shape2d[1], -1, -1]), name='fm_anchors')
+        anchor_boxes = tf.slice(anchor_boxes, [0, 0, 0, 0], slice4d)
-            return fm_anchors
+        return anchors, anchor_labels, anchor_boxes
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
@@ -96,19 +101,11 @@ class Model(ModelDesc):
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self._preprocess(image)     # 1CHW
-        featuremap = pretrained_resnet_conv4(image, config.RESNET_NUM_BLOCK[:3])
+        featuremap = pretrained_resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR)
-        fm_shape = tf.shape(featuremap)[2:]    # h,w
+        fm_anchors, anchor_labels, anchor_boxes = self.slice_to_featuremap(
-        fm_anchors = self._get_anchors(fm_shape)
+            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
-        anchor_labels = tf.slice(
-            anchor_labels, [0, 0, 0],
-            tf.stack([fm_shape[0], fm_shape[1], -1]),
-            name='sliced_anchor_labels')
-        anchor_boxes = tf.slice(
-            anchor_boxes, [0, 0, 0, 0],
-            tf.stack([fm_shape[0], fm_shape[1], -1, -1]),
-            name='sliced_anchor_boxes')
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
        image_shape2d = tf.shape(image)[2:]     # h,w