Commit 2c6af2d1 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] small inference-time changes

parent 1aaadca9
...@@ -68,7 +68,7 @@ Efficiency: ...@@ -68,7 +68,7 @@ Efficiency:
If all images have the same spatial size (in which case the per-GPU computation is *still different*), If all images have the same spatial size (in which case the per-GPU computation is *still different*),
then a 85%~90% scaling efficiency is observed when using 8 V100s and `HorovodTrainer`. then a 85%~90% scaling efficiency is observed when using 8 V100s and `HorovodTrainer`.
1. This implementation does not use specialized CUDA ops (e.g. AffineChannel, ROIAlign). 1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
Therefore it might be slower than other highly-optimized implementations. Therefore it might be slower than other highly-optimized implementations.
1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as 1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
......
...@@ -15,10 +15,11 @@ __all__ = ['register_coco'] ...@@ -15,10 +15,11 @@ __all__ = ['register_coco']
class COCODetection(DatasetSplit): class COCODetection(DatasetSplit):
# handle the weird (but standard) split of train and val # handle a few special splits whose names do not match the directory names
_INSTANCE_TO_BASEDIR = { _INSTANCE_TO_BASEDIR = {
'valminusminival2014': 'val2014', 'valminusminival2014': 'val2014',
'minival2014': 'val2014', 'minival2014': 'val2014',
'val2017_100': 'val2017',
} }
""" """
...@@ -230,7 +231,7 @@ def register_coco(basedir): ...@@ -230,7 +231,7 @@ def register_coco(basedir):
class_names = ["BG"] + class_names class_names = ["BG"] + class_names
for split in ["train2017", "val2017", "train2014", "val2014", for split in ["train2017", "val2017", "train2014", "val2014",
"valminusminival2014", "minival2014", "trainsingle"]: "valminusminival2014", "minival2014", "val2017_100"]:
name = "coco_" + split name = "coco_" + split
DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x)) DatasetRegistry.register(name, lambda x=split: COCODetection(basedir, x))
DatasetRegistry.register_metadata(name, 'class_names', class_names) DatasetRegistry.register_metadata(name, 'class_names', class_names)
......
...@@ -33,17 +33,18 @@ def fpn_model(features): ...@@ -33,17 +33,18 @@ def fpn_model(features):
use_gn = cfg.FPN.NORM == 'GN' use_gn = cfg.FPN.NORM == 'GN'
def upsample2x(name, x): def upsample2x(name, x):
return FixedUnPooling( try:
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), resize = tf.compat.v2.image.resize_images
data_format='channels_first') with tf.name_scope(name):
shp2d = tf.shape(x)[2:]
# tf.image.resize is, again, not aligned. x = tf.transpose(x, [0, 2, 3, 1])
# with tf.name_scope(name): x = resize(x, shp2d * 2, 'nearest')
# shape2d = tf.shape(x)[2:] x = tf.transpose(x, [0, 3, 1, 2])
# x = tf.transpose(x, [0, 2, 3, 1]) return x
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) except AttributeError:
# x = tf.transpose(x, [0, 3, 1, 2]) return FixedUnPooling(
# return x name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first')
with argscope(Conv2D, data_format='channels_first', with argscope(Conv2D, data_format='channels_first',
activation=tf.identity, use_bias=True, activation=tf.identity, use_bias=True,
......
...@@ -201,9 +201,10 @@ def fastrcnn_predictions(boxes, scores): ...@@ -201,9 +201,10 @@ def fastrcnn_predictions(boxes, scores):
filtered_scores = tf.gather_nd(scores, filtered_ids) # F, filtered_scores = tf.gather_nd(scores, filtered_ids) # F,
cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1]) cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1 offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1
nms_boxes = filtered_boxes + offsets
with tf.device('/cpu:0'): with tf.device('/cpu:0'):
selection = tf.image.non_max_suppression( selection = tf.image.non_max_suppression(
filtered_boxes + offsets, nms_boxes,
filtered_scores, filtered_scores,
cfg.TEST.RESULTS_PER_IM, cfg.TEST.RESULTS_PER_IM,
cfg.TEST.FRCNN_NMS_THRESH) cfg.TEST.FRCNN_NMS_THRESH)
......
...@@ -130,26 +130,25 @@ def generate_rpn_proposals(boxes, scores, img_shape, ...@@ -130,26 +130,25 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_boxes = tf.gather(boxes, topk_indices) topk_boxes = tf.gather(boxes, topk_indices)
topk_boxes = clip_boxes(topk_boxes, img_shape) topk_boxes = clip_boxes(topk_boxes, img_shape)
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2)) if cfg.RPN.MIN_SIZE > 0:
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1) topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
# nx1x2 each topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1) # nx1x2 each
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n, wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid) valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_scores = tf.boolean_mask(topk_scores, valid) topk_valid_boxes = tf.boolean_mask(topk_boxes, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
# TODO not needed else:
topk_valid_boxes_y1x1y2x2 = tf.reshape( topk_valid_boxes = topk_boxes
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]), topk_valid_scores = topk_scores
(-1, 4), name='nms_input_boxes')
with tf.device('/cpu:0'): with tf.device('/cpu:0'): # TODO try the GPU kernel
nms_indices = tf.image.non_max_suppression( nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2, topk_valid_boxes,
topk_valid_scores, topk_valid_scores,
max_output_size=post_nms_topk, max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH) iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
proposal_boxes = tf.gather(topk_valid_boxes, nms_indices) proposal_boxes = tf.gather(topk_valid_boxes, nms_indices)
proposal_scores = tf.gather(topk_valid_scores, nms_indices) proposal_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(proposal_scores, name='probs') # for visualization tf.sigmoid(proposal_scores, name='probs') # for visualization
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment