Commit 2c6af2d1 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] small inference-time changes

parent 1aaadca9
......@@ -68,7 +68,7 @@ Efficiency:
If all images have the same spatial size (in which case the per-GPU computation is *still different*),
then a 85%~90% scaling efficiency is observed when using 8 V100s and `HorovodTrainer`.
1. This implementation does not use specialized CUDA ops (e.g. AffineChannel, ROIAlign).
1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
Therefore it might be slower than other highly-optimized implementations.
1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
......
......@@ -15,10 +15,11 @@ __all__ = ['register_coco']
class COCODetection(DatasetSplit):
# handle the weird (but standard) split of train and val
# handle a few special splits whose names do not match the directory names
_INSTANCE_TO_BASEDIR = {
'valminusminival2014': 'val2014',
'minival2014': 'val2014',
'val2017_100': 'val2017',
}
"""
......@@ -230,7 +231,7 @@ def register_coco(basedir):
class_names = ["BG"] + class_names
for split in ["train2017", "val2017", "train2014", "val2014",
"valminusminival2014", "minival2014", "trainsingle"]:
"valminusminival2014", "minival2014", "val2017_100"]:
name = "coco_" + split
DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x))
DatasetRegistry.register_metadata(name, 'class_names', class_names)
......
......@@ -33,17 +33,18 @@ def fpn_model(features):
use_gn = cfg.FPN.NORM == 'GN'
def upsample2x(name, x):
return FixedUnPooling(
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first')
# tf.image.resize is, again, not aligned.
# with tf.name_scope(name):
# shape2d = tf.shape(x)[2:]
# x = tf.transpose(x, [0, 2, 3, 1])
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
# x = tf.transpose(x, [0, 3, 1, 2])
# return x
try:
resize = tf.compat.v2.image.resize_images
with tf.name_scope(name):
shp2d = tf.shape(x)[2:]
x = tf.transpose(x, [0, 2, 3, 1])
x = resize(x, shp2d * 2, 'nearest')
x = tf.transpose(x, [0, 3, 1, 2])
return x
except AttributeError:
return FixedUnPooling(
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first')
with argscope(Conv2D, data_format='channels_first',
activation=tf.identity, use_bias=True,
......
......@@ -201,9 +201,10 @@ def fastrcnn_predictions(boxes, scores):
filtered_scores = tf.gather_nd(scores, filtered_ids) # F,
cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1
nms_boxes = filtered_boxes + offsets
with tf.device('/cpu:0'):
selection = tf.image.non_max_suppression(
filtered_boxes + offsets,
nms_boxes,
filtered_scores,
cfg.TEST.RESULTS_PER_IM,
cfg.TEST.FRCNN_NMS_THRESH)
......
......@@ -130,26 +130,25 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_boxes = tf.gather(boxes, topk_indices)
topk_boxes = clip_boxes(topk_boxes, img_shape)
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
# TODO not needed
topk_valid_boxes_y1x1y2x2 = tf.reshape(
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
(-1, 4), name='nms_input_boxes')
with tf.device('/cpu:0'):
if cfg.RPN.MIN_SIZE > 0:
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes = tf.boolean_mask(topk_boxes, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
else:
topk_valid_boxes = topk_boxes
topk_valid_scores = topk_scores
with tf.device('/cpu:0'): # TODO try the GPU kernel
nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2,
topk_valid_boxes,
topk_valid_scores,
max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
proposal_boxes = tf.gather(topk_valid_boxes, nms_indices)
proposal_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(proposal_scores, name='probs') # for visualization
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment