Commit 2c6af2d1 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] small inference-time changes

parent 1aaadca9
...@@ -68,7 +68,7 @@ Efficiency: ...@@ -68,7 +68,7 @@ Efficiency:
If all images have the same spatial size (in which case the per-GPU computation is *still different*), If all images have the same spatial size (in which case the per-GPU computation is *still different*),
then a 85%~90% scaling efficiency is observed when using 8 V100s and `HorovodTrainer`. then a 85%~90% scaling efficiency is observed when using 8 V100s and `HorovodTrainer`.
1. This implementation does not use specialized CUDA ops (e.g. AffineChannel, ROIAlign). 1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
Therefore it might be slower than other highly-optimized implementations. Therefore it might be slower than other highly-optimized implementations.
1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as 1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
......
...@@ -15,10 +15,11 @@ __all__ = ['register_coco'] ...@@ -15,10 +15,11 @@ __all__ = ['register_coco']
class COCODetection(DatasetSplit): class COCODetection(DatasetSplit):
# handle the weird (but standard) split of train and val # handle a few special splits whose names do not match the directory names
_INSTANCE_TO_BASEDIR = { _INSTANCE_TO_BASEDIR = {
'valminusminival2014': 'val2014', 'valminusminival2014': 'val2014',
'minival2014': 'val2014', 'minival2014': 'val2014',
'val2017_100': 'val2017',
} }
""" """
...@@ -230,7 +231,7 @@ def register_coco(basedir): ...@@ -230,7 +231,7 @@ def register_coco(basedir):
class_names = ["BG"] + class_names class_names = ["BG"] + class_names
for split in ["train2017", "val2017", "train2014", "val2014", for split in ["train2017", "val2017", "train2014", "val2014",
"valminusminival2014", "minival2014", "trainsingle"]: "valminusminival2014", "minival2014", "val2017_100"]:
name = "coco_" + split name = "coco_" + split
DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x))
DatasetRegistry.register_metadata(name, 'class_names', class_names) DatasetRegistry.register_metadata(name, 'class_names', class_names)
......
...@@ -33,18 +33,19 @@ def fpn_model(features): ...@@ -33,18 +33,19 @@ def fpn_model(features):
use_gn = cfg.FPN.NORM == 'GN' use_gn = cfg.FPN.NORM == 'GN'
def upsample2x(name, x): def upsample2x(name, x):
try:
resize = tf.compat.v2.image.resize_images
with tf.name_scope(name):
shp2d = tf.shape(x)[2:]
x = tf.transpose(x, [0, 2, 3, 1])
x = resize(x, shp2d * 2, 'nearest')
x = tf.transpose(x, [0, 3, 1, 2])
return x
except AttributeError:
return FixedUnPooling( return FixedUnPooling(
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first') data_format='channels_first')
# tf.image.resize is, again, not aligned.
# with tf.name_scope(name):
# shape2d = tf.shape(x)[2:]
# x = tf.transpose(x, [0, 2, 3, 1])
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
# x = tf.transpose(x, [0, 3, 1, 2])
# return x
with argscope(Conv2D, data_format='channels_first', with argscope(Conv2D, data_format='channels_first',
activation=tf.identity, use_bias=True, activation=tf.identity, use_bias=True,
kernel_initializer=tf.variance_scaling_initializer(scale=1.)): kernel_initializer=tf.variance_scaling_initializer(scale=1.)):
......
...@@ -201,9 +201,10 @@ def fastrcnn_predictions(boxes, scores): ...@@ -201,9 +201,10 @@ def fastrcnn_predictions(boxes, scores):
filtered_scores = tf.gather_nd(scores, filtered_ids) # F, filtered_scores = tf.gather_nd(scores, filtered_ids) # F,
cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1]) cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1 offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1
nms_boxes = filtered_boxes + offsets
with tf.device('/cpu:0'): with tf.device('/cpu:0'):
selection = tf.image.non_max_suppression( selection = tf.image.non_max_suppression(
filtered_boxes + offsets, nms_boxes,
filtered_scores, filtered_scores,
cfg.TEST.RESULTS_PER_IM, cfg.TEST.RESULTS_PER_IM,
cfg.TEST.FRCNN_NMS_THRESH) cfg.TEST.FRCNN_NMS_THRESH)
......
...@@ -130,26 +130,25 @@ def generate_rpn_proposals(boxes, scores, img_shape, ...@@ -130,26 +130,25 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_boxes = tf.gather(boxes, topk_indices) topk_boxes = tf.gather(boxes, topk_indices)
topk_boxes = clip_boxes(topk_boxes, img_shape) topk_boxes = clip_boxes(topk_boxes, img_shape)
if cfg.RPN.MIN_SIZE > 0:
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2)) topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each # nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n, valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) topk_valid_boxes = tf.boolean_mask(topk_boxes, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid) topk_valid_scores = tf.boolean_mask(topk_scores, valid)
else:
topk_valid_boxes = topk_boxes
topk_valid_scores = topk_scores
# TODO not needed with tf.device('/cpu:0'): # TODO try the GPU kernel
topk_valid_boxes_y1x1y2x2 = tf.reshape(
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
(-1, 4), name='nms_input_boxes')
with tf.device('/cpu:0'):
nms_indices = tf.image.non_max_suppression( nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2, topk_valid_boxes,
topk_valid_scores, topk_valid_scores,
max_output_size=post_nms_topk, max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH) iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
proposal_boxes = tf.gather(topk_valid_boxes, nms_indices) proposal_boxes = tf.gather(topk_valid_boxes, nms_indices)
proposal_scores = tf.gather(topk_valid_scores, nms_indices) proposal_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(proposal_scores, name='probs') # for visualization tf.sigmoid(proposal_scores, name='probs') # for visualization
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment