Commit 21d54280 authored by Yuxin Wu's avatar Yuxin Wu

cleanups and rename

parent e3f463ab
...@@ -92,7 +92,7 @@ def resnet_group(l, name, block_func, features, count, stride): ...@@ -92,7 +92,7 @@ def resnet_group(l, name, block_func, features, count, stride):
return l return l
def pretrained_resnet_c4_backbone(image, num_blocks, freeze_c2=True): def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
assert len(num_blocks) == 3 assert len(num_blocks) == 3
with resnet_argscope(): with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]]) l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
...@@ -116,7 +116,7 @@ def resnet_conv5(image, num_block): ...@@ -116,7 +116,7 @@ def resnet_conv5(image, num_block):
return l return l
def pretrained_resnet_fpn_backbone(image, num_blocks, freeze_c2=True): def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
shape2d = tf.shape(image)[2:] shape2d = tf.shape(image)[2:]
mult = config.FPN_RESOLUTION_REQUIREMENT * 1. mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult) new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
......
...@@ -17,7 +17,7 @@ CLASS_NAMES = [] # NUM_CLASS strings. Will be populated later by coco loader ...@@ -17,7 +17,7 @@ CLASS_NAMES = [] # NUM_CLASS strings. Will be populated later by coco loader
# basemodel ---------------------- # basemodel ----------------------
RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50 RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101 # RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
FREEZE_AFFINE = False # do not train affine parameters inside BN FREEZE_AFFINE = False # do not train affine parameters inside BN
# schedule ----------------------- # schedule -----------------------
BASE_LR = 1e-2 BASE_LR = 1e-2
...@@ -25,7 +25,7 @@ WARMUP = 1000 # in steps ...@@ -25,7 +25,7 @@ WARMUP = 1000 # in steps
STEPS_PER_EPOCH = 500 STEPS_PER_EPOCH = 500
LR_SCHEDULE = [150000, 230000, 280000] LR_SCHEDULE = [150000, 230000, 280000]
LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
#LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# image resolution -------------------- # image resolution --------------------
SHORT_EDGE_SIZE = 800 SHORT_EDGE_SIZE = 800
......
...@@ -143,7 +143,7 @@ def print_evaluation_scores(json_file): ...@@ -143,7 +143,7 @@ def print_evaluation_scores(json_file):
cocoEval.summarize() cocoEval.summarize()
fields = ['IoU=0.5:0.95', 'IoU=0.5', 'IoU=0.75', 'small', 'medium', 'large'] fields = ['IoU=0.5:0.95', 'IoU=0.5', 'IoU=0.75', 'small', 'medium', 'large']
for k in range(6): for k in range(6):
ret['mAP(bbox)/' + fields[k]] = cocoEval.stat[k] ret['mAP(bbox)/' + fields[k]] = cocoEval.stats[k]
if config.MODE_MASK: if config.MODE_MASK:
cocoEval = COCOeval(coco, cocoDt, 'segm') cocoEval = COCOeval(coco, cocoDt, 'segm')
......
...@@ -424,8 +424,6 @@ def fastrcnn_2fc_head(feature, num_classes): ...@@ -424,8 +424,6 @@ def fastrcnn_2fc_head(feature, num_classes):
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
""" """
dim = config.FASTRCNN_FC_HEAD_DIM dim = config.FASTRCNN_FC_HEAD_DIM
logger.info("fc-head-xavier-fanin")
#init = tf.random_normal_initializer(stddev=0.01)
init = tf.variance_scaling_initializer() init = tf.variance_scaling_initializer()
hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, nl=tf.nn.relu) hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, nl=tf.nn.relu)
hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, nl=tf.nn.relu) hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, nl=tf.nn.relu)
......
...@@ -25,8 +25,8 @@ from tensorpack.utils.gpu import get_nr_gpu ...@@ -25,8 +25,8 @@ from tensorpack.utils.gpu import get_nr_gpu
from coco import COCODetection from coco import COCODetection
from basemodel import ( from basemodel import (
image_preprocess, pretrained_resnet_c4_backbone, resnet_conv5, image_preprocess, resnet_c4_backbone, resnet_conv5,
pretrained_resnet_fpn_backbone) resnet_fpn_backbone)
from model import ( from model import (
clip_boxes, decode_bbox_target, encode_bbox_target, crop_and_resize, clip_boxes, decode_bbox_target, encode_bbox_target, crop_and_resize,
rpn_head, rpn_losses, rpn_head, rpn_losses,
...@@ -182,7 +182,7 @@ class ResNetC4Model(DetectionModel): ...@@ -182,7 +182,7 @@ class ResNetC4Model(DetectionModel):
image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
image = self.preprocess(image) # 1CHW image = self.preprocess(image) # 1CHW
featuremap = pretrained_resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3]) featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3])
rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024, config.NUM_ANCHOR)
fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap( fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap(
...@@ -211,6 +211,7 @@ class ResNetC4Model(DetectionModel): ...@@ -211,6 +211,7 @@ class ResNetC4Model(DetectionModel):
roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)
# HACK to work around https://github.com/tensorflow/tensorflow/issues/14657 # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
# which was fixed in TF 1.6
def ff_true(): def ff_true():
feature_fastrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7 feature_fastrcnn = resnet_conv5(roi_resized, config.RESNET_NUM_BLOCK[-1]) # nxcx7x7
feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first')
...@@ -247,7 +248,7 @@ class ResNetC4Model(DetectionModel): ...@@ -247,7 +248,7 @@ class ResNetC4Model(DetectionModel):
# In training, mask branch shares the same C5 feature. # In training, mask branch shares the same C5 feature.
fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
mask_logits = maskrcnn_upXconv_head( mask_logits = maskrcnn_upXconv_head(
'maskrcnn', fg_feature, config.NUM_CLASS, 0) # #fg x #cat x 14x14 'maskrcnn', fg_feature, config.NUM_CLASS, num_convs=0) # #fg x #cat x 14x14
matched_gt_masks = tf.gather(gt_masks, fg_inds_wrt_gt) # nfg x H x W matched_gt_masks = tf.gather(gt_masks, fg_inds_wrt_gt) # nfg x H x W
target_masks_for_fg = crop_and_resize( target_masks_for_fg = crop_and_resize(
...@@ -326,20 +327,19 @@ class ResNetFPNModel(DetectionModel): ...@@ -326,20 +327,19 @@ class ResNetFPNModel(DetectionModel):
image = self.preprocess(image) # 1CHW image = self.preprocess(image) # 1CHW
image_shape2d = tf.shape(image)[2:] # h,w image_shape2d = tf.shape(image)[2:] # h,w
c2345 = pretrained_resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK) c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK)
p23456 = fpn_model('fpn', c2345) p23456 = fpn_model('fpn', c2345)
# Multi-Level RPN Proposals # Multi-Level RPN Proposals
multilevel_anchors = get_all_anchors_fpn()
assert len(multilevel_anchors) == num_fpn_level
multilevel_proposals = [] multilevel_proposals = []
rpn_loss_collection = [] rpn_loss_collection = []
for lvl in range(num_fpn_level): for lvl in range(num_fpn_level):
rpn_label_logits, rpn_box_logits = rpn_head( rpn_label_logits, rpn_box_logits = rpn_head(
'rpn', p23456[lvl], config.FPN_NUM_CHANNEL, len(config.ANCHOR_RATIOS)) 'rpn', p23456[lvl], config.FPN_NUM_CHANNEL, len(config.ANCHOR_RATIOS))
with tf.name_scope('FPN_lvl{}'.format(lvl + 2)): with tf.name_scope('FPN_lvl{}'.format(lvl + 2)):
anchors = tf.constant(get_all_anchors_fpn()[lvl], name='rpn_anchor_lvl{}'.format(lvl + 2))
anchors, anchor_labels, anchor_boxes = \ anchors, anchor_labels, anchor_boxes = \
self.narrow_to_featuremap(p23456[lvl], multilevel_anchors[lvl], self.narrow_to_featuremap(p23456[lvl], anchors,
multilevel_anchor_labels[lvl], multilevel_anchor_labels[lvl],
multilevel_anchor_boxes[lvl]) multilevel_anchor_boxes[lvl])
anchor_boxes_encoded = encode_bbox_target(anchor_boxes, anchors) anchor_boxes_encoded = encode_bbox_target(anchor_boxes, anchors)
...@@ -356,12 +356,11 @@ class ResNetFPNModel(DetectionModel): ...@@ -356,12 +356,11 @@ class ResNetFPNModel(DetectionModel):
rpn_label_logits, rpn_box_logits) rpn_label_logits, rpn_box_logits)
rpn_loss_collection.extend([label_loss, box_loss]) rpn_loss_collection.extend([label_loss, box_loss])
# merge proposals from multi levels # Merge proposals from multi levels, pick top K
proposal_boxes = tf.concat([x[0] for x in multilevel_proposals], axis=0) # nx4 proposal_boxes = tf.concat([x[0] for x in multilevel_proposals], axis=0) # nx4
proposal_scores = tf.concat([x[1] for x in multilevel_proposals], axis=0) # n proposal_scores = tf.concat([x[1] for x in multilevel_proposals], axis=0) # n
proposal_topk = tf.minimum(tf.size(proposal_scores), proposal_topk = tf.minimum(tf.size(proposal_scores),
config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK) config.TRAIN_FPN_NMS_TOPK if is_training else config.TEST_FPN_NMS_TOPK)
proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False) proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
proposal_boxes = tf.gather(proposal_boxes, topk_indices) proposal_boxes = tf.gather(proposal_boxes, topk_indices)
...@@ -378,7 +377,7 @@ class ResNetFPNModel(DetectionModel): ...@@ -378,7 +377,7 @@ class ResNetFPNModel(DetectionModel):
'fastrcnn', roi_feature_fastrcnn, config.NUM_CLASS) 'fastrcnn', roi_feature_fastrcnn, config.NUM_CLASS)
if is_training: if is_training:
# rpn loss ... # rpn loss is already defined above
with tf.name_scope('rpn_losses'): with tf.name_scope('rpn_losses'):
rpn_total_label_loss = tf.add_n(rpn_loss_collection[::2], name='label_loss') rpn_total_label_loss = tf.add_n(rpn_loss_collection[::2], name='label_loss')
rpn_total_box_loss = tf.add_n(rpn_loss_collection[1::2], name='box_loss') rpn_total_box_loss = tf.add_n(rpn_loss_collection[1::2], name='box_loss')
...@@ -428,6 +427,7 @@ class ResNetFPNModel(DetectionModel): ...@@ -428,6 +427,7 @@ class ResNetFPNModel(DetectionModel):
final_boxes, final_labels = self.fastrcnn_inference( final_boxes, final_labels = self.fastrcnn_inference(
image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits)
if config.MODE_MASK: if config.MODE_MASK:
# Cascade inference needs roi transform with refined boxes.
roi_feature_maskrcnn = multilevel_roi_align( roi_feature_maskrcnn = multilevel_roi_align(
p23456[:4], final_boxes, 14) p23456[:4], final_boxes, 14)
mask_logits = maskrcnn_upXconv_head( mask_logits = maskrcnn_upXconv_head(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment