[MaskRCNN] Slice FPN p2-4; Set config from cmdline

e9fa7eb6 · Yuxin Wu · c0ae913e · e9fa7eb6 · e9fa7eb6 · e9fa7eb6
Commit e9fa7eb6 authored Jun 24, 2018 by Yuxin Wu
5 changed files
--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -57,7 +57,6 @@ MaskRCNN results contain both bbox and segm mAP.
 |Backbone|`FASTRCNN_BATCH`|resolution |schedule|mAP (bbox/segm)|Time          |
 |   -    |    -           |    -      |   -    |   -           |   -          |
 |R50-C4  |64              |(600, 1024)|280k    |33.1           |18h on 8 V100s|
-|R50-C4  |512             |(800, 1333)|280k    |35.6           |55h on 8 P100s|
 |R50-C4  |512             |(800, 1333)|360k    |36.6           |49h on 8 V100s|
 |R50-FPN |512             |(800, 1333)|360k    |37.5           |28h on 8 V100s|
 |R50-C4  |256             |(800, 1333)|280k    |36.8/32.1      |39h on 8 P100s|
@@ -69,7 +68,6 @@ MaskRCNN results contain both bbox and segm mAP.
 The two R50-C4 360k models have the same configuration __and mAP__
 as the `R50-C4-2x` entries in
 [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
-<!-- So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. -->
 The other models listed here do not correspond to any configurations in Detectron.
 ## Notes

--- a/examples/FasterRCNN/basemodel.py
+++ b/examples/FasterRCNN/basemodel.py
@@ -146,4 +146,5 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
        c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
    # 32x downsampling up to now
+    # size of c5: ceil(input/32)
    return c2, c3, c4, c5
--- a/examples/FasterRCNN/common.py
+++ b/examples/FasterRCNN/common.py
@@ -2,6 +2,7 @@
 # File: common.py
 import numpy as np
+import six
 import cv2
 from tensorpack.dataflow import RNGDataFlow
@@ -139,6 +140,16 @@ def filter_boxes_inside_shape(boxes, shape):
    return indices, boxes[indices, :]
+def write_config_from_args(configs):
+    for cfg in configs:
+        k, v = cfg.split('=', maxsplit=1)
+        assert k in dir(config), "Unknown config key: {}".format(k)
+        oldv = getattr(config, k)
+        if not isinstance(oldv, six.text_type):
+            v = eval(v)
+        setattr(config, k, v)
 def print_config():
    logger.info("Config: ------------------------------------------")
    for k in dir(config):

--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -20,9 +20,11 @@ CLASS_NAMES = []  # NUM_CLASS strings. Needs to be populated later by data loade
 RESNET_NUM_BLOCK = [3, 4, 6, 3]     # for resnet50
 # RESNET_NUM_BLOCK = [3, 4, 23, 3]    # for resnet101
 FREEZE_AFFINE = False   # do not train affine parameters inside BN
-# Use a base model with TF-preferred pad mode
+# Use a base model with TF-preferred pad mode,
 # which may pad more pixels on right/bottom than top/left.
-# This is probably not good for alignment but we'll have to live with it.
+# TF_PAD_MODE=False is better for performance but will require a different base model.
+# See https://github.com/tensorflow/tensorflow/issues/18213
 TF_PAD_MODE = True
 # schedule -----------------------
@@ -41,7 +43,7 @@ MAX_SIZE = 1333
 # anchors -------------------------
 ANCHOR_STRIDE = 16
 ANCHOR_STRIDES_FPN = (4, 8, 16, 32, 64)  # strides for each FPN level. Must be the same length as ANCHOR_SIZES
-FPN_RESOLUTION_REQUIREMENT = 32    # image size into the backbone has to be multiple of this number
+FPN_RESOLUTION_REQUIREMENT = 32          # image size into the backbone has to be multiple of this number
 ANCHOR_SIZES = (32, 64, 128, 256, 512)   # sqrtarea of the anchor box
 ANCHOR_RATIOS = (0.5, 1., 2.)
 NUM_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS)

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -45,7 +45,7 @@ from data import (
 from viz import (
    draw_annotation, draw_proposal_recall,
    draw_predictions, draw_final_outputs)
-from common import print_config
+from common import print_config, write_config_from_args
 from eval import (
    eval_coco, detect_one_image, print_evaluation_scores, DetectionResult)
 import config
@@ -332,6 +332,14 @@ class ResNetFPNModel(DetectionModel):
        c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK)
        p23456 = fpn_model('fpn', c2345)
+        # images are padded for p5, which are too large for p2-p4
+        for i, stride in enumerate(config.ANCHOR_STRIDES_FPN[:3]):
+            pi = p23456[i]
+            target_shape = tf.to_int32(tf.ceil(tf.to_float(image_shape2d) * (1.0 / stride)))
+            p23456[i] = tf.slice(pi, [0, 0, 0, 0],
+                                 tf.concat([[-1, -1], target_shape], axis=0))
+            p23456[i].set_shape([1, pi.shape[1], None, None])
        # Multi-Level RPN Proposals
        multilevel_proposals = []
        rpn_loss_collection = []
@@ -429,8 +437,7 @@ class ResNetFPNModel(DetectionModel):
                image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits)
            if config.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
-                roi_feature_maskrcnn = multilevel_roi_align(
+                roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14)
-                    p23456[:4], final_boxes, 14)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4)   # #fg x #cat x 28 x 28
                indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
@@ -560,15 +567,16 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--load', help='load a model for evaluation or training')
    parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn')
-    parser.add_argument('--datadir', help='override config.BASEDIR')
    parser.add_argument('--visualize', action='store_true', help='visualize intermediate results')
    parser.add_argument('--evaluate', help="Run evaluation on COCO. "
                                           "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict', help="Run prediction on a given image. "
                                          "This argument is the path to the input image file")
+    parser.add_argument('--config', help="A list of key=value to overwrite those defined in config.py",
+                        nargs='+')
    args = parser.parse_args()
-    if args.datadir:
+    write_config_from_args(args.config)
-        config.BASEDIR = args.datadir
    if args.visualize or args.evaluate or args.predict:
        # autotune is too slow for inference
@@ -619,8 +627,8 @@ if __name__ == '__main__':
            mult = 0.1 ** (idx + 1)
            lr_schedule.append(
                (steps * factor // stepnum, config.BASE_LR * mult))
-        logger.info("Warmup Up Schedule: " + str(warmup_schedule))
+        logger.info("Warm Up Schedule (steps, value): " + str(warmup_schedule))
-        logger.info("LR Schedule: " + str(lr_schedule))
+        logger.info("LR Schedule (epochs, value): " + str(lr_schedule))
        callbacks = [
            PeriodicCallback(