Commit 82a8953e authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] make accurate_paste optional

parent 2c6af2d1
...@@ -70,6 +70,7 @@ Efficiency: ...@@ -70,6 +70,7 @@ Efficiency:
1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign). 1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
Therefore it might be slower than other highly-optimized implementations. Therefore it might be slower than other highly-optimized implementations.
(CUDA kernel of NMS is currently only available in TF master)
1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as 1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
set in `train.py`; (2) reduce `buffer_size` or `NUM_WORKERS` in `data.py` set in `train.py`; (2) reduce `buffer_size` or `NUM_WORKERS` in `data.py`
......
...@@ -202,6 +202,7 @@ _C.FPN.MRCNN_HEAD_FUNC = 'maskrcnn_up4conv_head' # choices: maskrcnn_up4conv_{ ...@@ -202,6 +202,7 @@ _C.FPN.MRCNN_HEAD_FUNC = 'maskrcnn_up4conv_head' # choices: maskrcnn_up4conv_{
# Mask R-CNN # Mask R-CNN
_C.MRCNN.HEAD_DIM = 256 _C.MRCNN.HEAD_DIM = 256
_C.MRCNN.ACCURATE_PASTE = True # slightly more aligned results, but very slow on numpy
# Cascade R-CNN, only available in FPN mode # Cascade R-CNN, only available in FPN mode
_C.FPN.CASCADE = False _C.FPN.CASCADE = False
......
...@@ -70,7 +70,7 @@ def _paste_mask(box, mask, shape): ...@@ -70,7 +70,7 @@ def _paste_mask(box, mask, shape):
""" """
assert mask.shape[0] == mask.shape[1], mask.shape assert mask.shape[0] == mask.shape[1], mask.shape
if True: if cfg.MRCNN.ACCURATE_PASTE:
# This method is accurate but much slower. # This method is accurate but much slower.
mask = np.pad(mask, [(1, 1), (1, 1)], mode='constant') mask = np.pad(mask, [(1, 1), (1, 1)], mode='constant')
box = _scale_box(box, float(mask.shape[0]) / (mask.shape[0] - 2)) box = _scale_box(box, float(mask.shape[0]) / (mask.shape[0] - 2))
...@@ -82,6 +82,7 @@ def _paste_mask(box, mask, shape): ...@@ -82,6 +82,7 @@ def _paste_mask(box, mask, shape):
xs = np.arange(0.0, w) + 0.5 xs = np.arange(0.0, w) + 0.5
ys = (ys - box[1]) / (box[3] - box[1]) * mask.shape[0] ys = (ys - box[1]) / (box[3] - box[1]) * mask.shape[0]
xs = (xs - box[0]) / (box[2] - box[0]) * mask.shape[1] xs = (xs - box[0]) / (box[2] - box[0]) * mask.shape[1]
# Waste a lot of compute since most indices are out-of-border
res = mask_continuous(xs, ys) res = mask_continuous(xs, ys)
return (res >= 0.5).astype('uint8') return (res >= 0.5).astype('uint8')
else: else:
...@@ -124,12 +125,12 @@ def predict_image(img, model_func): ...@@ -124,12 +125,12 @@ def predict_image(img, model_func):
resized_img = resizer.augment(img) resized_img = resizer.augment(img)
scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1]) scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1])
boxes, probs, labels, *masks = model_func(resized_img) boxes, probs, labels, *masks = model_func(resized_img)
# Some slow numpy postprocessing:
boxes = boxes / scale boxes = boxes / scale
# boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more. # boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
boxes = clip_boxes(boxes, orig_shape) boxes = clip_boxes(boxes, orig_shape)
if masks: if masks:
# has mask
full_masks = [_paste_mask(box, mask, orig_shape) full_masks = [_paste_mask(box, mask, orig_shape)
for box, mask in zip(boxes, masks[0])] for box, mask in zip(boxes, masks[0])]
masks = full_masks masks = full_masks
......
...@@ -134,16 +134,8 @@ def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True): ...@@ -134,16 +134,8 @@ def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True):
return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1) return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1)
# Expand bbox to a minium size of 1
# boxes_x1y1, boxes_x2y2 = tf.split(boxes, 2, axis=1)
# boxes_wh = boxes_x2y2 - boxes_x1y1
# boxes_center = tf.reshape((boxes_x2y2 + boxes_x1y1) * 0.5, [-1, 2])
# boxes_newwh = tf.maximum(boxes_wh, 1.)
# boxes_x1y1new = boxes_center - boxes_newwh * 0.5
# boxes_x2y2new = boxes_center + boxes_newwh * 0.5
# boxes = tf.concat([boxes_x1y1new, boxes_x2y2new], axis=1)
image_shape = tf.shape(image)[2:] image_shape = tf.shape(image)[2:]
boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size]) boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size])
image = tf.transpose(image, [0, 2, 3, 1]) # nhwc image = tf.transpose(image, [0, 2, 3, 1]) # nhwc
ret = tf.image.crop_and_resize( ret = tf.image.crop_and_resize(
...@@ -169,7 +161,11 @@ def roi_align(featuremap, boxes, resolution): ...@@ -169,7 +161,11 @@ def roi_align(featuremap, boxes, resolution):
featuremap, boxes, featuremap, boxes,
tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32), tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32),
resolution * 2) resolution * 2)
ret = tf.nn.avg_pool(ret, [1, 1, 2, 2], [1, 1, 2, 2], padding='SAME', data_format='NCHW') try:
avgpool = tf.nn.avg_pool2d
except AttributeError:
avgpool = tf.nn.avg_pool
ret = avgpool(ret, [1, 1, 2, 2], [1, 1, 2, 2], padding='SAME', data_format='NCHW')
return ret return ret
......
...@@ -151,10 +151,7 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits): ...@@ -151,10 +151,7 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
num_fg = tf.size(fg_inds, out_type=tf.int64) num_fg = tf.size(fg_inds, out_type=tf.int64)
empty_fg = tf.equal(num_fg, 0) empty_fg = tf.equal(num_fg, 0)
if int(fg_box_logits.shape[1]) > 1: if int(fg_box_logits.shape[1]) > 1:
indices = tf.stack( fg_box_logits = tf.batch_gather(fg_box_logits, tf.expand_dims(fg_labels, axis=1))
[tf.range(num_fg), fg_labels], axis=1) # #fgx2
fg_box_logits = tf.gather_nd(fg_box_logits, indices)
else:
fg_box_logits = tf.reshape(fg_box_logits, [-1, 4]) fg_box_logits = tf.reshape(fg_box_logits, [-1, 4])
with tf.name_scope('label_metrics'), tf.device('/cpu:0'): with tf.name_scope('label_metrics'), tf.device('/cpu:0'):
...@@ -202,7 +199,6 @@ def fastrcnn_predictions(boxes, scores): ...@@ -202,7 +199,6 @@ def fastrcnn_predictions(boxes, scores):
cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1]) cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1 offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1
nms_boxes = filtered_boxes + offsets nms_boxes = filtered_boxes + offsets
with tf.device('/cpu:0'):
selection = tf.image.non_max_suppression( selection = tf.image.non_max_suppression(
nms_boxes, nms_boxes,
filtered_scores, filtered_scores,
......
...@@ -20,9 +20,8 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks): ...@@ -20,9 +20,8 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
fg_labels: #fg, in 1~#class, int64 fg_labels: #fg, in 1~#class, int64
fg_target_masks: #fgxhxw, float32 fg_target_masks: #fgxhxw, float32
""" """
num_fg = tf.size(fg_labels, out_type=tf.int64) mask_logits = tf.batch_gather(mask_logits, tf.reshape(fg_labels, [-1, 1]) - 1)
indices = tf.stack([tf.range(num_fg), fg_labels - 1], axis=1) # #fgx2 mask_logits = tf.squeeze(mask_logits, axis=1)
mask_logits = tf.gather_nd(mask_logits, indices) # #fgxhxw
mask_probs = tf.sigmoid(mask_logits) mask_probs = tf.sigmoid(mask_logits)
# add some training visualizations to tensorboard # add some training visualizations to tensorboard
......
...@@ -142,7 +142,6 @@ def generate_rpn_proposals(boxes, scores, img_shape, ...@@ -142,7 +142,6 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_valid_boxes = topk_boxes topk_valid_boxes = topk_boxes
topk_valid_scores = topk_scores topk_valid_scores = topk_scores
with tf.device('/cpu:0'): # TODO try the GPU kernel
nms_indices = tf.image.non_max_suppression( nms_indices = tf.image.non_max_suppression(
topk_valid_boxes, topk_valid_boxes,
topk_valid_scores, topk_valid_scores,
......
...@@ -166,7 +166,7 @@ if __name__ == '__main__': ...@@ -166,7 +166,7 @@ if __name__ == '__main__':
df = get_eval_dataflow(cfg.DATA.VAL[0]) df = get_eval_dataflow(cfg.DATA.VAL[0])
df.reset_state() df.reset_state()
predictor = OfflinePredictor(predcfg) predictor = OfflinePredictor(predcfg)
for img in tqdm.tqdm(df, total=len(df)): for _, img in enumerate(tqdm.tqdm(df, total=len(df), smoothing=0.5)):
# This include post-processing time, which is done on CPU and not optimized # This includes post-processing time, which is done on CPU and not optimized
# To exclude it, modify `predict_image`. # To exclude it, modify `predict_image`.
predict_image(img[0], predictor) predict_image(img[0], predictor)
...@@ -79,17 +79,16 @@ class AsyncPredictorBase(PredictorBase): ...@@ -79,17 +79,16 @@ class AsyncPredictorBase(PredictorBase):
class OnlinePredictor(PredictorBase): class OnlinePredictor(PredictorBase):
""" A predictor which directly use an existing session and given tensors. """
A predictor which directly use an existing session and given tensors.
Attributes:
sess: The tf.Session object associated with this predictor.
""" """
ACCEPT_OPTIONS = False ACCEPT_OPTIONS = False
""" See Session.make_callable """ """ See Session.make_callable """
sess = None
"""
The tf.Session object associated with this predictor.
"""
def __init__(self, input_tensors, output_tensors, def __init__(self, input_tensors, output_tensors,
return_input=False, sess=None): return_input=False, sess=None):
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment