[MaskRCNN] small inference-time changes

2c6af2d1 · Yuxin Wu · 1aaadca9 · 2c6af2d1 · 2c6af2d1 · 2c6af2d1
Commit 2c6af2d1 authored Aug 25, 2019 by Yuxin Wu
5 changed files
--- a/examples/FasterRCNN/NOTES.md
+++ b/examples/FasterRCNN/NOTES.md
@@ -68,7 +68,7 @@ Efficiency:
 	If all images have the same spatial size (in which case the per-GPU computation is *still different*),
 	then a 85%~90% scaling efficiency is observed when using 8 V100s and `HorovodTrainer`.

-1. This implementation does not use specialized CUDA ops (e.g. AffineChannel, ROIAlign).
+1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
   Therefore it might be slower than other highly-optimized implementations.

 1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as

--- a/examples/FasterRCNN/dataset/coco.py
+++ b/examples/FasterRCNN/dataset/coco.py
@@ -15,10 +15,11 @@ __all__ = ['register_coco']


 class COCODetection(DatasetSplit):
-    # handle the weird (but standard) split of train and val
+    # handle a few special splits whose names do not match the directory names
    _INSTANCE_TO_BASEDIR = {
        'valminusminival2014': 'val2014',
        'minival2014': 'val2014',
+        'val2017_100': 'val2017',
    }

    """
@@ -230,7 +231,7 @@ def register_coco(basedir):
    class_names = ["BG"] + class_names

    for split in ["train2017", "val2017", "train2014", "val2014",
-                  "valminusminival2014", "minival2014", "trainsingle"]:
+                  "valminusminival2014", "minival2014", "val2017_100"]:
        name = "coco_" + split
        DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x))
        DatasetRegistry.register_metadata(name, 'class_names', class_names)

--- a/examples/FasterRCNN/modeling/model_fpn.py
+++ b/examples/FasterRCNN/modeling/model_fpn.py
@@ -33,17 +33,18 @@ def fpn_model(features):
    use_gn = cfg.FPN.NORM == 'GN'

    def upsample2x(name, x):
-        return FixedUnPooling(
-            name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
-            data_format='channels_first')
-
-        # tf.image.resize is, again, not aligned.
-        # with tf.name_scope(name):
-        #     shape2d = tf.shape(x)[2:]
-        #     x = tf.transpose(x, [0, 2, 3, 1])
-        #     x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
-        #     x = tf.transpose(x, [0, 3, 1, 2])
-        #     return x
+        try:
+            resize = tf.compat.v2.image.resize_images
+            with tf.name_scope(name):
+                shp2d = tf.shape(x)[2:]
+                x = tf.transpose(x, [0, 2, 3, 1])
+                x = resize(x, shp2d * 2, 'nearest')
+                x = tf.transpose(x, [0, 3, 1, 2])
+                return x
+        except AttributeError:
+            return FixedUnPooling(
+                name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
+                data_format='channels_first')

    with argscope(Conv2D, data_format='channels_first',
                  activation=tf.identity, use_bias=True,

--- a/examples/FasterRCNN/modeling/model_frcnn.py
+++ b/examples/FasterRCNN/modeling/model_frcnn.py
@@ -201,9 +201,10 @@ def fastrcnn_predictions(boxes, scores):
    filtered_scores = tf.gather_nd(scores, filtered_ids)  # F,
    cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
    offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1)  # F,1
+    nms_boxes = filtered_boxes + offsets
    with tf.device('/cpu:0'):
        selection = tf.image.non_max_suppression(
-            filtered_boxes + offsets,
+            nms_boxes,
            filtered_scores,
            cfg.TEST.RESULTS_PER_IM,
            cfg.TEST.FRCNN_NMS_THRESH)

--- a/examples/FasterRCNN/modeling/model_rpn.py
+++ b/examples/FasterRCNN/modeling/model_rpn.py
@@ -130,26 +130,25 @@ def generate_rpn_proposals(boxes, scores, img_shape,
    topk_boxes = tf.gather(boxes, topk_indices)
    topk_boxes = clip_boxes(topk_boxes, img_shape)

-    topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
-    topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
-    # nx1x2 each
-    wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
-    valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1)  # n,
-    topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
-    topk_valid_scores = tf.boolean_mask(topk_scores, valid)
-
-    # TODO not needed
-    topk_valid_boxes_y1x1y2x2 = tf.reshape(
-        tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
-        (-1, 4), name='nms_input_boxes')
-    with tf.device('/cpu:0'):
+    if cfg.RPN.MIN_SIZE > 0:
+        topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
+        topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
+        # nx1x2 each
+        wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
+        valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1)  # n,
+        topk_valid_boxes = tf.boolean_mask(topk_boxes, valid)
+        topk_valid_scores = tf.boolean_mask(topk_scores, valid)
+    else:
+        topk_valid_boxes = topk_boxes
+        topk_valid_scores = topk_scores
+
+    with tf.device('/cpu:0'):  # TODO try the GPU kernel
        nms_indices = tf.image.non_max_suppression(
-            topk_valid_boxes_y1x1y2x2,
+            topk_valid_boxes,
            topk_valid_scores,
            max_output_size=post_nms_topk,
            iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)

-    topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
    proposal_boxes = tf.gather(topk_valid_boxes, nms_indices)
    proposal_scores = tf.gather(topk_valid_scores, nms_indices)
    tf.sigmoid(proposal_scores, name='probs')  # for visualization