Commit 82a8953e authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] make accurate_paste optional

parent 2c6af2d1
......@@ -70,6 +70,7 @@ Efficiency:
1. This implementation does not use specialized CUDA ops (e.g. NMS, ROIAlign).
Therefore it might be slower than other highly-optimized implementations.
(CUDA kernel of NMS is currently only available in TF master)
1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
set in `train.py`; (2) reduce `buffer_size` or `NUM_WORKERS` in `data.py`
......
......@@ -202,6 +202,7 @@ _C.FPN.MRCNN_HEAD_FUNC = 'maskrcnn_up4conv_head' # choices: maskrcnn_up4conv_{
# Mask R-CNN
_C.MRCNN.HEAD_DIM = 256
_C.MRCNN.ACCURATE_PASTE = True # slightly more aligned results, but very slow on numpy
# Cascade R-CNN, only available in FPN mode
_C.FPN.CASCADE = False
......
......@@ -70,7 +70,7 @@ def _paste_mask(box, mask, shape):
"""
assert mask.shape[0] == mask.shape[1], mask.shape
if True:
if cfg.MRCNN.ACCURATE_PASTE:
# This method is accurate but much slower.
mask = np.pad(mask, [(1, 1), (1, 1)], mode='constant')
box = _scale_box(box, float(mask.shape[0]) / (mask.shape[0] - 2))
......@@ -82,6 +82,7 @@ def _paste_mask(box, mask, shape):
xs = np.arange(0.0, w) + 0.5
ys = (ys - box[1]) / (box[3] - box[1]) * mask.shape[0]
xs = (xs - box[0]) / (box[2] - box[0]) * mask.shape[1]
# Waste a lot of compute since most indices are out-of-border
res = mask_continuous(xs, ys)
return (res >= 0.5).astype('uint8')
else:
......@@ -124,12 +125,12 @@ def predict_image(img, model_func):
resized_img = resizer.augment(img)
scale = np.sqrt(resized_img.shape[0] * 1.0 / img.shape[0] * resized_img.shape[1] / img.shape[1])
boxes, probs, labels, *masks = model_func(resized_img)
# Some slow numpy postprocessing:
boxes = boxes / scale
# boxes are already clipped inside the graph, but after the floating point scaling, this may not be true any more.
boxes = clip_boxes(boxes, orig_shape)
if masks:
# has mask
full_masks = [_paste_mask(box, mask, orig_shape)
for box, mask in zip(boxes, masks[0])]
masks = full_masks
......
......@@ -134,16 +134,8 @@ def crop_and_resize(image, boxes, box_ind, crop_size, pad_border=True):
return tf.concat([ny0, nx0, ny0 + nh, nx0 + nw], axis=1)
# Expand bbox to a minium size of 1
# boxes_x1y1, boxes_x2y2 = tf.split(boxes, 2, axis=1)
# boxes_wh = boxes_x2y2 - boxes_x1y1
# boxes_center = tf.reshape((boxes_x2y2 + boxes_x1y1) * 0.5, [-1, 2])
# boxes_newwh = tf.maximum(boxes_wh, 1.)
# boxes_x1y1new = boxes_center - boxes_newwh * 0.5
# boxes_x2y2new = boxes_center + boxes_newwh * 0.5
# boxes = tf.concat([boxes_x1y1new, boxes_x2y2new], axis=1)
image_shape = tf.shape(image)[2:]
boxes = transform_fpcoor_for_tf(boxes, image_shape, [crop_size, crop_size])
image = tf.transpose(image, [0, 2, 3, 1]) # nhwc
ret = tf.image.crop_and_resize(
......@@ -169,7 +161,11 @@ def roi_align(featuremap, boxes, resolution):
featuremap, boxes,
tf.zeros([tf.shape(boxes)[0]], dtype=tf.int32),
resolution * 2)
ret = tf.nn.avg_pool(ret, [1, 1, 2, 2], [1, 1, 2, 2], padding='SAME', data_format='NCHW')
try:
avgpool = tf.nn.avg_pool2d
except AttributeError:
avgpool = tf.nn.avg_pool
ret = avgpool(ret, [1, 1, 2, 2], [1, 1, 2, 2], padding='SAME', data_format='NCHW')
return ret
......
......@@ -151,11 +151,8 @@ def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
num_fg = tf.size(fg_inds, out_type=tf.int64)
empty_fg = tf.equal(num_fg, 0)
if int(fg_box_logits.shape[1]) > 1:
indices = tf.stack(
[tf.range(num_fg), fg_labels], axis=1) # #fgx2
fg_box_logits = tf.gather_nd(fg_box_logits, indices)
else:
fg_box_logits = tf.reshape(fg_box_logits, [-1, 4])
fg_box_logits = tf.batch_gather(fg_box_logits, tf.expand_dims(fg_labels, axis=1))
fg_box_logits = tf.reshape(fg_box_logits, [-1, 4])
with tf.name_scope('label_metrics'), tf.device('/cpu:0'):
prediction = tf.argmax(label_logits, axis=1, name='label_prediction')
......@@ -202,12 +199,11 @@ def fastrcnn_predictions(boxes, scores):
cls_per_box = tf.slice(filtered_ids, [0, 0], [-1, 1])
offsets = tf.cast(cls_per_box, tf.float32) * (max_coord + 1) # F,1
nms_boxes = filtered_boxes + offsets
with tf.device('/cpu:0'):
selection = tf.image.non_max_suppression(
nms_boxes,
filtered_scores,
cfg.TEST.RESULTS_PER_IM,
cfg.TEST.FRCNN_NMS_THRESH)
selection = tf.image.non_max_suppression(
nms_boxes,
filtered_scores,
cfg.TEST.RESULTS_PER_IM,
cfg.TEST.FRCNN_NMS_THRESH)
final_scores = tf.gather(filtered_scores, selection, name='scores')
final_labels = tf.add(tf.gather(cls_per_box[:, 0], selection), 1, name='labels')
final_boxes = tf.gather(filtered_boxes, selection, name='boxes')
......
......@@ -20,9 +20,8 @@ def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
fg_labels: #fg, in 1~#class, int64
fg_target_masks: #fgxhxw, float32
"""
num_fg = tf.size(fg_labels, out_type=tf.int64)
indices = tf.stack([tf.range(num_fg), fg_labels - 1], axis=1) # #fgx2
mask_logits = tf.gather_nd(mask_logits, indices) # #fgxhxw
mask_logits = tf.batch_gather(mask_logits, tf.reshape(fg_labels, [-1, 1]) - 1)
mask_logits = tf.squeeze(mask_logits, axis=1)
mask_probs = tf.sigmoid(mask_logits)
# add some training visualizations to tensorboard
......
......@@ -142,12 +142,11 @@ def generate_rpn_proposals(boxes, scores, img_shape,
topk_valid_boxes = topk_boxes
topk_valid_scores = topk_scores
with tf.device('/cpu:0'): # TODO try the GPU kernel
nms_indices = tf.image.non_max_suppression(
topk_valid_boxes,
topk_valid_scores,
max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
nms_indices = tf.image.non_max_suppression(
topk_valid_boxes,
topk_valid_scores,
max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
proposal_boxes = tf.gather(topk_valid_boxes, nms_indices)
proposal_scores = tf.gather(topk_valid_scores, nms_indices)
......
......@@ -166,7 +166,7 @@ if __name__ == '__main__':
df = get_eval_dataflow(cfg.DATA.VAL[0])
df.reset_state()
predictor = OfflinePredictor(predcfg)
for img in tqdm.tqdm(df, total=len(df)):
# This include post-processing time, which is done on CPU and not optimized
for _, img in enumerate(tqdm.tqdm(df, total=len(df), smoothing=0.5)):
# This includes post-processing time, which is done on CPU and not optimized
# To exclude it, modify `predict_image`.
predict_image(img[0], predictor)
......@@ -79,17 +79,16 @@ class AsyncPredictorBase(PredictorBase):
class OnlinePredictor(PredictorBase):
""" A predictor which directly use an existing session and given tensors.
"""
A predictor which directly use an existing session and given tensors.
Attributes:
sess: The tf.Session object associated with this predictor.
"""
ACCEPT_OPTIONS = False
""" See Session.make_callable """
sess = None
"""
The tf.Session object associated with this predictor.
"""
def __init__(self, input_tensors, output_tensors,
return_input=False, sess=None):
"""
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment