[MaskRCNN] make accurate_paste optional

82a8953e · Yuxin Wu · 2c6af2d1 · 82a8953e · 82a8953e · 82a8953e
Commit 82a8953e authored Aug 26, 2019 by Yuxin Wu
9 changed files
--- a/examples/FasterRCNN/NOTES.md
+++ b/examples/FasterRCNN/NOTES.md
@@ -70,6 +70,7 @@ Efficiency:

 1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
   Therefore it might be slower than other highly-optimized implementations.
+	 (CUDA kernel of NMS is currently only available in TF master)

 1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
   set in `train.py`; (2) reduce `buffer_size` or `NUM_WORKERS` in `data.py`

--- a/examples/FasterRCNN/config.py
+++ b/examples/FasterRCNN/config.py
@@ -202,6 +202,7 @@ _C.FPN.MRCNN_HEAD_FUNC = 'maskrcnn_up4conv_head'   # choices: maskrcnn_up4conv_{

 # Mask R-CNN
 _C.MRCNN.HEAD_DIM = 256
+_C.MRCNN.ACCURATE_PASTE = True  # slightly more aligned results, but very slow on numpy

 # Cascade R-CNN, only available in FPN mode
 _C.FPN.CASCADE = False

--- a/examples/FasterRCNN/eval.py
+++ b/examples/FasterRCNN/eval.py
@@ -70,7 +70,7 @@ def _paste_mask(box, mask, shape):
    """
    assert mask.shape[0] == mask.shape[1], mask.shape

-    if True:
+    if cfg.MRCNN.ACCURATE_PASTE:
        # This method is accurate but much slower.
        mask = np.pad(mask, [(1, 1), (1, 1)], mode='constant')
        box = _scale_box(box, float(mask.shape[0]) / (mask.shape[0] - 2))
@@ -82,6 +82,7 @@ def _paste_mask(box, mask, shape):
        xs = np.arange(0.0, w) + 0.5
        ys = (ys - box[1]) / (box[3] - box[1]) * mask.shape[0]
        xs = (xs - box[0]) / (box[2] - box[0]) * mask.shape[1]
+        # Waste a lot of compute since most indices are out-of-border
        res = mask_continuous(xs, ys)
        return (res >= 0.5).astype('uint8')
    else:
@@ -124,12 +125,12 @@ def predict_image(img, model_func):
    resized_img = resizer.augment(img)
    scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1])
    boxes, probs, labels, *masks = model_func(resized_img)
+
+    # Some slow numpy postprocessing:
    boxes = boxes / scale
    # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
    boxes = clip_boxes(boxes, orig_shape)
-
    if masks:
-        # has mask
        full_masks = [_paste_mask(box, mask, orig_shape)
                      for box, mask in zip(boxes, masks[0])]
        masks = full_masks

--- a/examples/FasterRCNN/modeling/model_box.py
+++ b/examples/FasterRCNN/modeling/model_box.py
@@ -134,16 +134,8 @@ def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True):

        return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1)

-    # Expand bbox to a minium size of 1
-    # boxes_x1y1, boxes_x2y2 = tf.split(boxes, 2, axis=1)
-    # boxes_wh = boxes_x2y2 - boxes_x1y1
-    # boxes_center = tf.reshape((boxes_x2y2 + boxes_x1y1) * 0.5, [-1, 2])
-    # boxes_newwh = tf.maximum(boxes_wh, 1.)
-    # boxes_x1y1new = boxes_center - boxes_newwh * 0.5
-    # boxes_x2y2new = boxes_center + boxes_newwh * 0.5
-    # boxes = tf.concat([boxes_x1y1new, boxes_x2y2new], axis=1)
-
    image_shape = tf.shape(image)[2:]
+
    boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size])
    image = tf.transpose(image, [0, 2, 3, 1])   # nhwc
    ret = tf.image.crop_and_resize(
@@ -169,7 +161,11 @@ def roi_align(featuremap, boxes, resolution):
        featuremap, boxes,
        tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32),
        resolution * 2)
-    ret = tf.nn.avg_pool(ret, [1, 1, 2, 2], [1, 1, 2, 2], padding='SAME', data_format='NCHW')
+    try:
+        avgpool = tf.nn.avg_pool2d
+    except AttributeError:
+        avgpool = tf.nn.avg_pool
+    ret = avgpool(ret, [1, 1, 2, 2], [1, 1, 2, 2], padding='SAME', data_format='NCHW')
    return ret



--- a/examples/FasterRCNN/modeling/model_frcnn.py
+++ b/examples/FasterRCNN/modeling/model_frcnn.py
@@ -151,10 +151,7 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
    num_fg = tf.size(fg_inds, out_type=tf.int64)
    empty_fg = tf.equal(num_fg, 0)
    if int(fg_box_logits.shape[1]) > 1:
-        indices = tf.stack(
-            [tf.range(num_fg), fg_labels], axis=1)  # #fgx2
-        fg_box_logits = tf.gather_nd(fg_box_logits, indices)
-    else:
+        fg_box_logits = tf.batch_gather(fg_box_logits, tf.expand_dims(fg_labels, axis=1))
    fg_box_logits = tf.reshape(fg_box_logits, [-1, 4])

    with tf.name_scope('label_metrics'), tf.device('/cpu:0'):
@@ -202,7 +199,6 @@ def fastrcnn_predictions(boxes, scores):
    cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
    offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1)  # F,1
    nms_boxes = filtered_boxes + offsets
-    with tf.device('/cpu:0'):
    selection = tf.image.non_max_suppression(
        nms_boxes,
        filtered_scores,

--- a/examples/FasterRCNN/modeling/model_mrcnn.py
+++ b/examples/FasterRCNN/modeling/model_mrcnn.py
@@ -20,9 +20,8 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
        fg_labels: #fg, in 1~#class, int64
        fg_target_masks: #fgxhxw, float32
    """
-    num_fg = tf.size(fg_labels, out_type=tf.int64)
-    indices = tf.stack([tf.range(num_fg), fg_labels - 1], axis=1)  # #fgx2
-    mask_logits = tf.gather_nd(mask_logits, indices)  # #fgxhxw
+    mask_logits = tf.batch_gather(mask_logits, tf.reshape(fg_labels, [-1, 1]) - 1)
+    mask_logits = tf.squeeze(mask_logits, axis=1)
    mask_probs = tf.sigmoid(mask_logits)

    # add some training visualizations to tensorboard

--- a/examples/FasterRCNN/modeling/model_rpn.py
+++ b/examples/FasterRCNN/modeling/model_rpn.py
@@ -142,7 +142,6 @@ def generate_rpn_proposals(boxes, scores, img_shape,
        topk_valid_boxes = topk_boxes
        topk_valid_scores = topk_scores

-    with tf.device('/cpu:0'):  # TODO try the GPU kernel
    nms_indices = tf.image.non_max_suppression(
        topk_valid_boxes,
        topk_valid_scores,

--- a/examples/FasterRCNN/predict.py
+++ b/examples/FasterRCNN/predict.py
@@ -166,7 +166,7 @@ if __name__ == '__main__':
            df = get_eval_dataflow(cfg.DATA.VAL[0])
            df.reset_state()
            predictor = OfflinePredictor(predcfg)
-            for img in tqdm.tqdm(df, total=len(df)):
-                # This include post-processing time, which is done on CPU and not optimized
+            for _, img in enumerate(tqdm.tqdm(df, total=len(df), smoothing=0.5)):
+                # This includes post-processing time, which is done on CPU and not optimized
                # To exclude it, modify `predict_image`.
                predict_image(img[0], predictor)
--- a/tensorpack/predict/base.py
+++ b/tensorpack/predict/base.py
@@ -79,17 +79,16 @@ class AsyncPredictorBase(PredictorBase):


 class OnlinePredictor(PredictorBase):
-    """ A predictor which directly use an existing session and given tensors.
+    """
+    A predictor which directly use an existing session and given tensors.
+
+    Attributes:
+        sess: The tf.Session object associated with this predictor.
    """

    ACCEPT_OPTIONS = False
    """ See Session.make_callable """

-    sess = None
-    """
-    The tf.Session object associated with this predictor.
-    """
-
    def __init__(self, input_tensors, output_tensors,
                 return_input=False, sess=None):
        """