Commit 08b0dfb6 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] split models into separate files

parent 70b70736
......@@ -6,7 +6,7 @@ This is a minimal implementation that simply contains these files:
+ common.py: common data preparation utilities
+ basemodel.py: implement backbones
+ model_box.py: implement box-related symbolic functions
+ model.py: implement RPN/Faster-RCNN/FPN/Mask-RCNN
+ model_{fpn,rpn,mrcnn,frcnn}.py: implement FPN,RPN,Mask-/Fast-RCNN models.
+ train.py: main training script
+ utils/: third-party helper functions
+ eval.py: evaluation utilities
......
# -*- coding: utf-8 -*-
import numpy as np
import tensorflow as tf
import itertools
from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.models import (
Conv2D, layer_register, FixedUnPooling, MaxPooling)
from model_rpn import rpn_losses, generate_rpn_proposals
from model_box import roi_align
from utils.box_ops import area as tf_area
from config import config as cfg
@layer_register(log_shape=True)
def fpn_model(features):
"""
Args:
features ([tf.Tensor]): ResNet features c2-c5
Returns:
[tf.Tensor]: FPN features p2-p6
"""
assert len(features) == 4, features
num_channel = cfg.FPN.NUM_CHANNEL
def upsample2x(name, x):
return FixedUnPooling(
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first')
# tf.image.resize is, again, not aligned.
# with tf.name_scope(name):
# shape2d = tf.shape(x)[2:]
# x = tf.transpose(x, [0, 2, 3, 1])
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
# x = tf.transpose(x, [0, 3, 1, 2])
# return x
with argscope(Conv2D, data_format='channels_first',
activation=tf.identity, use_bias=True,
kernel_initializer=tf.variance_scaling_initializer(scale=1.)):
lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1)
for i, c in enumerate(features)]
lat_sum_5432 = []
for idx, lat in enumerate(lat_2345[::-1]):
if idx == 0:
lat_sum_5432.append(lat)
else:
lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1])
lat_sum_5432.append(lat)
p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3)
for i, c in enumerate(lat_sum_5432[::-1])]
p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first')
return p2345 + [p6]
@under_name_scope()
def fpn_map_rois_to_levels(boxes):
"""
Assign boxes to level 2~5.
Args:
boxes (nx4):
Returns:
[tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level.
[tf.Tensor]: 4 tensors, the gathered boxes in each level.
Be careful that the returned tensor could be empty.
"""
sqrtarea = tf.sqrt(tf_area(boxes))
level = tf.to_int32(tf.floor(
4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2))))
# RoI levels range from 2~5 (not 6)
level_ids = [
tf.where(level <= 2),
tf.where(tf.equal(level, 3)), # == is not supported
tf.where(tf.equal(level, 4)),
tf.where(level >= 5)]
level_ids = [tf.reshape(x, [-1], name='roi_level{}_id'.format(i + 2))
for i, x in enumerate(level_ids)]
num_in_levels = [tf.size(x, name='num_roi_level{}'.format(i + 2))
for i, x in enumerate(level_ids)]
add_moving_summary(*num_in_levels)
level_boxes = [tf.gather(boxes, ids) for ids in level_ids]
return level_ids, level_boxes
@under_name_scope()
def multilevel_roi_align(features, rcnn_boxes, resolution):
"""
Args:
features ([tf.Tensor]): 4 FPN feature level 2-5
rcnn_boxes (tf.Tensor): nx4 boxes
resolution (int): output spatial resolution
Returns:
NxC x res x res
"""
assert len(features) == 4, features
# Reassign rcnn_boxes to levels
level_ids, level_boxes = fpn_map_rois_to_levels(rcnn_boxes)
all_rois = []
# Crop patches from corresponding levels
for i, boxes, featuremap in zip(itertools.count(), level_boxes, features):
with tf.name_scope('roi_level{}'.format(i + 2)):
boxes_on_featuremap = boxes * (1.0 / cfg.FPN.ANCHOR_STRIDES[i])
all_rois.append(roi_align(featuremap, boxes_on_featuremap, resolution))
all_rois = tf.concat(all_rois, axis=0) # NCHW
# Unshuffle to the original order, to match the original samples
level_id_perm = tf.concat(level_ids, axis=0) # A permutation of 1~N
level_id_invert_perm = tf.invert_permutation(level_id_perm)
all_rois = tf.gather(all_rois, level_id_invert_perm)
return all_rois
def multilevel_rpn_losses(
multilevel_anchors, multilevel_label_logits, multilevel_box_logits):
"""
Args:
multilevel_anchors: #lvl RPNAnchors
multilevel_label_logits: #lvl tensors of shape HxWxA
multilevel_box_logits: #lvl tensors of shape HxWxAx4
Returns:
label_loss, box_loss
"""
num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
assert len(multilevel_anchors) == num_lvl
assert len(multilevel_label_logits) == num_lvl
assert len(multilevel_box_logits) == num_lvl
losses = []
for lvl in range(num_lvl):
with tf.name_scope('RPNLoss_Lvl{}'.format(lvl + 2)):
anchors = multilevel_anchors[lvl]
label_loss, box_loss = rpn_losses(
anchors.gt_labels, anchors.encoded_gt_boxes(),
multilevel_label_logits[lvl], multilevel_box_logits[lvl])
losses.extend([label_loss, box_loss])
with tf.name_scope('rpn_losses'):
total_label_loss = tf.add_n(losses[::2], name='label_loss')
total_box_loss = tf.add_n(losses[1::2], name='box_loss')
add_moving_summary(total_label_loss, total_box_loss)
return total_label_loss, total_box_loss
def generate_fpn_proposals(
multilevel_anchors, multilevel_label_logits, multilevel_box_logits,
image_shape2d, pre_nms_topk, post_nms_topk):
"""
Args:
multilevel_anchors: #lvl RPNAnchors
multilevel_label_logits: #lvl tensors of shape HxWxA
multilevel_box_logits: #lvl tensors of shape HxWxAx4
Returns:
boxes: kx4 float
scores: k logits
"""
num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
assert len(multilevel_anchors) == num_lvl
assert len(multilevel_label_logits) == num_lvl
assert len(multilevel_box_logits) == num_lvl
all_boxes = []
all_scores = []
for lvl in range(num_lvl):
with tf.name_scope('FPNProposal_Lvl{}'.format(lvl + 2)):
anchors = multilevel_anchors[lvl]
pred_boxes_decoded = anchors.decode_logits(multilevel_box_logits[lvl])
proposal_boxes, proposal_scores = generate_rpn_proposals(
tf.reshape(pred_boxes_decoded, [-1, 4]),
tf.reshape(multilevel_label_logits[lvl], [-1]),
image_shape2d, pre_nms_topk)
all_boxes.append(proposal_boxes)
all_scores.append(proposal_scores)
proposal_boxes = tf.concat(all_boxes, axis=0) # nx4
proposal_scores = tf.concat(all_scores, axis=0) # n
proposal_topk = tf.minimum(tf.size(proposal_scores), post_nms_topk)
proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
proposal_boxes = tf.gather(proposal_boxes, topk_indices)
return proposal_boxes, proposal_scores
......@@ -2,165 +2,17 @@
# File: model.py
import tensorflow as tf
import numpy as np
import itertools
from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import under_name_scope, auto_reuse_variable_scope
from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.models import (
Conv2D, FullyConnected, MaxPooling,
layer_register, Conv2DTranspose, FixedUnPooling)
Conv2D, FullyConnected, layer_register)
from utils.box_ops import pairwise_iou
from utils.box_ops import area as tf_area
from model_box import roi_align, clip_boxes
from config import config as cfg
@layer_register(log_shape=True)
@auto_reuse_variable_scope
def rpn_head(featuremap, channel, num_anchors):
"""
Returns:
label_logits: fHxfWxNA
box_logits: fHxfWxNAx4
"""
with argscope(Conv2D, data_format='channels_first',
kernel_initializer=tf.random_normal_initializer(stddev=0.01)):
hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu)
label_logits = Conv2D('class', hidden, num_anchors, 1)
box_logits = Conv2D('box', hidden, 4 * num_anchors, 1)
# 1, NA(*4), im/16, im/16 (NCHW)
label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA
label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA
shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW
box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4)
box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], num_anchors, 4])) # fHxfWxNAx4
return label_logits, box_logits
@under_name_scope()
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
"""
Args:
anchor_labels: fHxfWxNA
anchor_boxes: fHxfWxNAx4, encoded
label_logits: fHxfWxNA
box_logits: fHxfWxNAx4
Returns:
label_loss, box_loss
"""
with tf.device('/cpu:0'):
valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor')
nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor')
# nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0.
valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
valid_label_logits = tf.boolean_mask(label_logits, valid_mask)
with tf.name_scope('label_metrics'):
valid_label_prob = tf.nn.sigmoid(valid_label_logits)
summaries = []
with tf.device('/cpu:0'):
for th in [0.5, 0.2, 0.1]:
valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction')
pos_prediction_corr = tf.count_nonzero(
tf.logical_and(
valid_label_prob > th,
tf.equal(valid_prediction, valid_anchor_labels)),
dtype=tf.int32)
placeholder = 0.5 # A small value will make summaries appear lower.
recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos))
recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th))
precision = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos_prediction))
precision = tf.where(tf.equal(nr_pos_prediction, 0),
placeholder, precision, name='precision_th{}'.format(th))
summaries.extend([precision, recall])
add_moving_summary(*summaries)
# Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
# But the total loss is still the same. TODO make the summary op smarter
placeholder = 0.
label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')
pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
delta = 1.0 / 9
box_loss = tf.losses.huber_loss(
pos_anchor_boxes, pos_box_logits, delta=delta,
reduction=tf.losses.Reduction.SUM) / delta
box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM)
box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss')
add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
return label_loss, box_loss
@under_name_scope()
def generate_rpn_proposals(boxes, scores, img_shape,
pre_nms_topk, post_nms_topk=None):
"""
Sample RPN proposals by the following steps:
1. Pick top k1 by scores
2. NMS them
3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output.
Args:
boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already
scores: n float, the logits
img_shape: [h, w]
pre_nms_topk, post_nms_topk (int): See above.
Returns:
boxes: kx4 float
scores: k logits
"""
assert boxes.shape.ndims == 2, boxes.shape
if post_nms_topk is None:
post_nms_topk = pre_nms_topk
topk = tf.minimum(pre_nms_topk, tf.size(scores))
topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
topk_boxes = tf.gather(boxes, topk_indices)
topk_boxes = clip_boxes(topk_boxes, img_shape)
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
# TODO not needed
topk_valid_boxes_y1x1y2x2 = tf.reshape(
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
(-1, 4), name='nms_input_boxes')
nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2,
# TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
tf.exp(topk_valid_scores),
max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
final_boxes = tf.gather(topk_valid_boxes, nms_indices)
final_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(final_scores, name='probs') # for visualization
return tf.stop_gradient(final_boxes, name='boxes'), tf.stop_gradient(final_scores, name='scores')
@under_name_scope()
def proposal_metrics(iou):
"""
......@@ -401,243 +253,3 @@ def fastrcnn_predictions(boxes, probs):
filtered_selection = tf.gather(selected_indices, topk_indices)
filtered_selection = tf.reverse(filtered_selection, axis=[1], name='filtered_indices')
return filtered_selection, topk_probs
@layer_register(log_shape=True)
def maskrcnn_upXconv_head(feature, num_category, num_convs):
"""
Args:
feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
num_category(int):
num_convs (int): number of convolution layers
Returns:
mask_logits (N x num_category x 2s x 2s):
"""
l = feature
with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
# c2's MSRAFill is fan_out
for k in range(num_convs):
l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('conv', l, num_category, 1)
return l
@under_name_scope()
def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
"""
Args:
mask_logits: #fg x #category xhxw
fg_labels: #fg, in 1~#class
fg_target_masks: #fgxhxw, int
"""
num_fg = tf.size(fg_labels)
indices = tf.stack([tf.range(num_fg), tf.to_int32(fg_labels) - 1], axis=1) # #fgx2
mask_logits = tf.gather_nd(mask_logits, indices) # #fgxhxw
mask_probs = tf.sigmoid(mask_logits)
# add some training visualizations to tensorboard
with tf.name_scope('mask_viz'):
viz = tf.concat([fg_target_masks, mask_probs], axis=1)
viz = tf.expand_dims(viz, 3)
viz = tf.cast(viz * 255, tf.uint8, name='viz')
tf.summary.image('mask_truth|pred', viz, max_outputs=10)
loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=fg_target_masks, logits=mask_logits)
loss = tf.reduce_mean(loss, name='maskrcnn_loss')
pred_label = mask_probs > 0.5
truth_label = fg_target_masks > 0.5
accuracy = tf.reduce_mean(
tf.to_float(tf.equal(pred_label, truth_label)),
name='accuracy')
pos_accuracy = tf.logical_and(
tf.equal(pred_label, truth_label),
tf.equal(truth_label, True))
pos_accuracy = tf.reduce_mean(tf.to_float(pos_accuracy), name='pos_accuracy')
fg_pixel_ratio = tf.reduce_mean(tf.to_float(truth_label), name='fg_pixel_ratio')
add_moving_summary(loss, accuracy, fg_pixel_ratio, pos_accuracy)
return loss
@layer_register(log_shape=True)
def fpn_model(features):
"""
Args:
features ([tf.Tensor]): ResNet features c2-c5
Returns:
[tf.Tensor]: FPN features p2-p6
"""
assert len(features) == 4, features
num_channel = cfg.FPN.NUM_CHANNEL
def upsample2x(name, x):
return FixedUnPooling(
name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'),
data_format='channels_first')
# tf.image.resize is, again, not aligned.
# with tf.name_scope(name):
# shape2d = tf.shape(x)[2:]
# x = tf.transpose(x, [0, 2, 3, 1])
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
# x = tf.transpose(x, [0, 3, 1, 2])
# return x
with argscope(Conv2D, data_format='channels_first',
activation=tf.identity, use_bias=True,
kernel_initializer=tf.variance_scaling_initializer(scale=1.)):
lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1)
for i, c in enumerate(features)]
lat_sum_5432 = []
for idx, lat in enumerate(lat_2345[::-1]):
if idx == 0:
lat_sum_5432.append(lat)
else:
lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1])
lat_sum_5432.append(lat)
p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3)
for i, c in enumerate(lat_sum_5432[::-1])]
p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first')
return p2345 + [p6]
@under_name_scope()
def fpn_map_rois_to_levels(boxes):
"""
Assign boxes to level 2~5.
Args:
boxes (nx4):
Returns:
[tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level.
[tf.Tensor]: 4 tensors, the gathered boxes in each level.
Be careful that the returned tensor could be empty.
"""
sqrtarea = tf.sqrt(tf_area(boxes))
level = tf.to_int32(tf.floor(
4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2))))
# RoI levels range from 2~5 (not 6)
level_ids = [
tf.where(level <= 2),
tf.where(tf.equal(level, 3)), # == is not supported
tf.where(tf.equal(level, 4)),
tf.where(level >= 5)]
level_ids = [tf.reshape(x, [-1], name='roi_level{}_id'.format(i + 2))
for i, x in enumerate(level_ids)]
num_in_levels = [tf.size(x, name='num_roi_level{}'.format(i + 2))
for i, x in enumerate(level_ids)]
add_moving_summary(*num_in_levels)
level_boxes = [tf.gather(boxes, ids) for ids in level_ids]
return level_ids, level_boxes
@under_name_scope()
def multilevel_roi_align(features, rcnn_boxes, resolution):
"""
Args:
features ([tf.Tensor]): 4 FPN feature level 2-5
rcnn_boxes (tf.Tensor): nx4 boxes
resolution (int): output spatial resolution
Returns:
NxC x res x res
"""
assert len(features) == 4, features
# Reassign rcnn_boxes to levels
level_ids, level_boxes = fpn_map_rois_to_levels(rcnn_boxes)
all_rois = []
# Crop patches from corresponding levels
for i, boxes, featuremap in zip(itertools.count(), level_boxes, features):
with tf.name_scope('roi_level{}'.format(i + 2)):
boxes_on_featuremap = boxes * (1.0 / cfg.FPN.ANCHOR_STRIDES[i])
all_rois.append(roi_align(featuremap, boxes_on_featuremap, resolution))
all_rois = tf.concat(all_rois, axis=0) # NCHW
# Unshuffle to the original order, to match the original samples
level_id_perm = tf.concat(level_ids, axis=0) # A permutation of 1~N
level_id_invert_perm = tf.invert_permutation(level_id_perm)
all_rois = tf.gather(all_rois, level_id_invert_perm)
return all_rois
def multilevel_rpn_losses(
multilevel_anchors, multilevel_label_logits, multilevel_box_logits):
"""
Args:
multilevel_anchors: #lvl RPNAnchors
multilevel_label_logits: #lvl tensors of shape HxWxA
multilevel_box_logits: #lvl tensors of shape HxWxAx4
Returns:
label_loss, box_loss
"""
num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
assert len(multilevel_anchors) == num_lvl
assert len(multilevel_label_logits) == num_lvl
assert len(multilevel_box_logits) == num_lvl
losses = []
for lvl in range(num_lvl):
with tf.name_scope('RPNLoss_Lvl{}'.format(lvl + 2)):
anchors = multilevel_anchors[lvl]
label_loss, box_loss = rpn_losses(
anchors.gt_labels, anchors.encoded_gt_boxes(),
multilevel_label_logits[lvl], multilevel_box_logits[lvl])
losses.extend([label_loss, box_loss])
with tf.name_scope('rpn_losses'):
total_label_loss = tf.add_n(losses[::2], name='label_loss')
total_box_loss = tf.add_n(losses[1::2], name='box_loss')
add_moving_summary(total_label_loss, total_box_loss)
return total_label_loss, total_box_loss
def generate_fpn_proposals(
multilevel_anchors, multilevel_label_logits, multilevel_box_logits,
image_shape2d, pre_nms_topk, post_nms_topk):
"""
Args:
multilevel_anchors: #lvl RPNAnchors
multilevel_label_logits: #lvl tensors of shape HxWxA
multilevel_box_logits: #lvl tensors of shape HxWxAx4
Returns:
boxes: kx4 float
scores: k logits
"""
num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
assert len(multilevel_anchors) == num_lvl
assert len(multilevel_label_logits) == num_lvl
assert len(multilevel_box_logits) == num_lvl
all_boxes = []
all_scores = []
for lvl in range(num_lvl):
with tf.name_scope('FPNProposal_Lvl{}'.format(lvl + 2)):
anchors = multilevel_anchors[lvl]
pred_boxes_decoded = anchors.decode_logits(multilevel_box_logits[lvl])
proposal_boxes, proposal_scores = generate_rpn_proposals(
tf.reshape(pred_boxes_decoded, [-1, 4]),
tf.reshape(multilevel_label_logits[lvl], [-1]),
image_shape2d, pre_nms_topk)
all_boxes.append(proposal_boxes)
all_scores.append(proposal_scores)
proposal_boxes = tf.concat(all_boxes, axis=0) # nx4
proposal_scores = tf.concat(all_scores, axis=0) # n
proposal_topk = tf.minimum(tf.size(proposal_scores), post_nms_topk)
proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
proposal_boxes = tf.gather(proposal_boxes, topk_indices)
return proposal_boxes, proposal_scores
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorpack.tfutils.argscope import argscope
from tensorpack.models import (
Conv2D, layer_register, Conv2DTranspose)
from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.tfutils.summary import add_moving_summary
from config import config as cfg
@layer_register(log_shape=True)
def maskrcnn_upXconv_head(feature, num_category, num_convs):
"""
Args:
feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models.
num_category(int):
num_convs (int): number of convolution layers
Returns:
mask_logits (N x num_category x 2s x 2s):
"""
l = feature
with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
# c2's MSRAFill is fan_out
for k in range(num_convs):
l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu)
l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('conv', l, num_category, 1)
return l
@under_name_scope()
def maskrcnn_loss(mask_logits, fg_labels, fg_target_masks):
"""
Args:
mask_logits: #fg x #category xhxw
fg_labels: #fg, in 1~#class
fg_target_masks: #fgxhxw, int
"""
num_fg = tf.size(fg_labels)
indices = tf.stack([tf.range(num_fg), tf.to_int32(fg_labels) - 1], axis=1) # #fgx2
mask_logits = tf.gather_nd(mask_logits, indices) # #fgxhxw
mask_probs = tf.sigmoid(mask_logits)
# add some training visualizations to tensorboard
with tf.name_scope('mask_viz'):
viz = tf.concat([fg_target_masks, mask_probs], axis=1)
viz = tf.expand_dims(viz, 3)
viz = tf.cast(viz * 255, tf.uint8, name='viz')
tf.summary.image('mask_truth|pred', viz, max_outputs=10)
loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=fg_target_masks, logits=mask_logits)
loss = tf.reduce_mean(loss, name='maskrcnn_loss')
pred_label = mask_probs > 0.5
truth_label = fg_target_masks > 0.5
accuracy = tf.reduce_mean(
tf.to_float(tf.equal(pred_label, truth_label)),
name='accuracy')
pos_accuracy = tf.logical_and(
tf.equal(pred_label, truth_label),
tf.equal(truth_label, True))
pos_accuracy = tf.reduce_mean(tf.to_float(pos_accuracy), name='pos_accuracy')
fg_pixel_ratio = tf.reduce_mean(tf.to_float(truth_label), name='fg_pixel_ratio')
add_moving_summary(loss, accuracy, fg_pixel_ratio, pos_accuracy)
return loss
# -*- coding: utf-8 -*-
import tensorflow as tf
from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import under_name_scope, auto_reuse_variable_scope
from tensorpack.models import Conv2D, layer_register
from model_box import clip_boxes
from config import config as cfg
@layer_register(log_shape=True)
@auto_reuse_variable_scope
def rpn_head(featuremap, channel, num_anchors):
"""
Returns:
label_logits: fHxfWxNA
box_logits: fHxfWxNAx4
"""
with argscope(Conv2D, data_format='channels_first',
kernel_initializer=tf.random_normal_initializer(stddev=0.01)):
hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu)
label_logits = Conv2D('class', hidden, num_anchors, 1)
box_logits = Conv2D('box', hidden, 4 * num_anchors, 1)
# 1, NA(*4), im/16, im/16 (NCHW)
label_logits = tf.transpose(label_logits, [0, 2, 3, 1]) # 1xfHxfWxNA
label_logits = tf.squeeze(label_logits, 0) # fHxfWxNA
shp = tf.shape(box_logits) # 1x(NAx4)xfHxfW
box_logits = tf.transpose(box_logits, [0, 2, 3, 1]) # 1xfHxfWx(NAx4)
box_logits = tf.reshape(box_logits, tf.stack([shp[2], shp[3], num_anchors, 4])) # fHxfWxNAx4
return label_logits, box_logits
@under_name_scope()
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
"""
Args:
anchor_labels: fHxfWxNA
anchor_boxes: fHxfWxNAx4, encoded
label_logits: fHxfWxNA
box_logits: fHxfWxNAx4
Returns:
label_loss, box_loss
"""
with tf.device('/cpu:0'):
valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor')
nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32), name='num_pos_anchor')
# nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0.
valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
valid_label_logits = tf.boolean_mask(label_logits, valid_mask)
with tf.name_scope('label_metrics'):
valid_label_prob = tf.nn.sigmoid(valid_label_logits)
summaries = []
with tf.device('/cpu:0'):
for th in [0.5, 0.2, 0.1]:
valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction')
pos_prediction_corr = tf.count_nonzero(
tf.logical_and(
valid_label_prob > th,
tf.equal(valid_prediction, valid_anchor_labels)),
dtype=tf.int32)
placeholder = 0.5 # A small value will make summaries appear lower.
recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos))
recall = tf.where(tf.equal(nr_pos, 0), placeholder, recall, name='recall_th{}'.format(th))
precision = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos_prediction))
precision = tf.where(tf.equal(nr_pos_prediction, 0),
placeholder, precision, name='precision_th{}'.format(th))
summaries.extend([precision, recall])
add_moving_summary(*summaries)
# Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
# But the total loss is still the same. TODO make the summary op smarter
placeholder = 0.
label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')
pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
delta = 1.0 / 9
box_loss = tf.losses.huber_loss(
pos_anchor_boxes, pos_box_logits, delta=delta,
reduction=tf.losses.Reduction.SUM) / delta
box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM)
box_loss = tf.where(tf.equal(nr_pos, 0), placeholder, box_loss, name='box_loss')
add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
return label_loss, box_loss
@under_name_scope()
def generate_rpn_proposals(boxes, scores, img_shape,
pre_nms_topk, post_nms_topk=None):
"""
Sample RPN proposals by the following steps:
1. Pick top k1 by scores
2. NMS them
3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output.
Args:
boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already
scores: n float, the logits
img_shape: [h, w]
pre_nms_topk, post_nms_topk (int): See above.
Returns:
boxes: kx4 float
scores: k logits
"""
assert boxes.shape.ndims == 2, boxes.shape
if post_nms_topk is None:
post_nms_topk = pre_nms_topk
topk = tf.minimum(pre_nms_topk, tf.size(scores))
topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
topk_boxes = tf.gather(boxes, topk_indices)
topk_boxes = clip_boxes(topk_boxes, img_shape)
topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
# nx1x2 each
wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1) # n,
topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
topk_valid_scores = tf.boolean_mask(topk_scores, valid)
# TODO not needed
topk_valid_boxes_y1x1y2x2 = tf.reshape(
tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
(-1, 4), name='nms_input_boxes')
nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2,
# TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
tf.exp(topk_valid_scores),
max_output_size=post_nms_topk,
iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)
topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
final_boxes = tf.gather(topk_valid_boxes, nms_indices)
final_scores = tf.gather(topk_valid_scores, nms_indices)
tf.sigmoid(final_scores, name='probs') # for visualization
return tf.stop_gradient(final_boxes, name='boxes'), tf.stop_gradient(final_scores, name='scores')
......@@ -29,16 +29,20 @@ from coco import COCODetection
from basemodel import (
image_preprocess, resnet_c4_backbone, resnet_conv5,
resnet_fpn_backbone)
import model
from model import (
rpn_head, rpn_losses,
generate_rpn_proposals, sample_fast_rcnn_targets,
fastrcnn_outputs, fastrcnn_losses, fastrcnn_predictions,
maskrcnn_upXconv_head, maskrcnn_loss,
fpn_model, multilevel_roi_align, multilevel_rpn_losses, generate_fpn_proposals)
import model_frcnn
from model_frcnn import (
sample_fast_rcnn_targets,
fastrcnn_outputs, fastrcnn_losses, fastrcnn_predictions)
from model_mrcnn import maskrcnn_upXconv_head, maskrcnn_loss
from model_rpn import rpn_head, rpn_losses, generate_rpn_proposals
from model_fpn import (
fpn_model, multilevel_roi_align,
multilevel_rpn_losses, generate_fpn_proposals)
from model_box import (
clip_boxes, decode_bbox_target, encode_bbox_target,
crop_and_resize, roi_align, RPNAnchors)
from data import (
get_train_dataflow, get_eval_dataflow,
get_all_anchors, get_all_anchors_fpn)
......@@ -328,7 +332,7 @@ class ResNetFPNModel(DetectionModel):
roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7)
fastrcnn_head_func = getattr(model, cfg.FPN.FRCNN_HEAD_FUNC)
fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC)
fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head_func(
'fastrcnn', roi_feature_fastrcnn, cfg.DATA.NUM_CLASS)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment