Commit cc0f07a9 authored by Yuxin Wu's avatar Yuxin Wu

[FasterRCNN] rename a config; prefer arguments over constants

parent 26b13269
......@@ -22,7 +22,7 @@ ANCHOR_STRIDE = 16
# sqrtarea of the anchor box
ANCHOR_SIZES = (32, 64, 128, 256, 512)
ANCHOR_RATIOS = (0.5, 1., 2.)
NR_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS)
NUM_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS)
POSITIVE_ANCHOR_THRES = 0.7
NEGATIVE_ANCHOR_THRES = 0.3
......
......@@ -29,25 +29,29 @@ class MalformedData(BaseException):
@memoized
def get_all_anchors():
def get_all_anchors(
stride=config.ANCHOR_STRIDE,
sizes=config.ANCHOR_SIZES):
"""
Get all anchors in the largest possible image, shifted, floatbox
Returns:
anchors: SxSxNR_ANCHORx4, where S == MAX_SIZE//STRIDE, floatbox
anchors: SxSxNUM_ANCHORx4, where S == MAX_SIZE//STRIDE, floatbox
The layout in the NUM_ANCHOR dim is NUM_RATIO x NUM_SCALE.
"""
# Generates a NAx4 matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
# are centered on stride / 2, have (approximate) sqrt areas of the specified
# sizes, and aspect ratios as given.
cell_anchors = generate_anchors(
config.ANCHOR_STRIDE,
scales=np.array(config.ANCHOR_SIZES, dtype=np.float) / config.ANCHOR_STRIDE,
stride,
scales=np.array(sizes, dtype=np.float) / stride,
ratios=np.array(config.ANCHOR_RATIOS, dtype=np.float))
# anchors are intbox here.
# anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
field_size = config.MAX_SIZE // config.ANCHOR_STRIDE
shifts = np.arange(0, field_size) * config.ANCHOR_STRIDE
field_size = config.MAX_SIZE // stride
shifts = np.arange(0, field_size) * stride
shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten()
shift_y = shift_y.flatten()
......@@ -179,12 +183,12 @@ def get_rpn_anchor_input(im, boxes, klass, is_crowd):
anchor_labels, anchor_boxes = get_anchor_labels(inside_anchors, non_crowd_boxes, crowd_boxes)
# Fill them back to original size: fHxfWx1, fHxfWx4
featuremap_labels = -np.ones((featureH * featureW * config.NR_ANCHOR, ), dtype='int32')
featuremap_labels = -np.ones((featureH * featureW * config.NUM_ANCHOR, ), dtype='int32')
featuremap_labels[inside_ind] = anchor_labels
featuremap_labels = featuremap_labels.reshape((featureH, featureW, config.NR_ANCHOR))
featuremap_boxes = np.zeros((featureH * featureW * config.NR_ANCHOR, 4), dtype='float32')
featuremap_labels = featuremap_labels.reshape((featureH, featureW, config.NUM_ANCHOR))
featuremap_boxes = np.zeros((featureH * featureW * config.NUM_ANCHOR, 4), dtype='float32')
featuremap_boxes[inside_ind, :] = anchor_boxes
featuremap_boxes = featuremap_boxes.reshape((featureH, featureW, config.NR_ANCHOR, 4))
featuremap_boxes = featuremap_boxes.reshape((featureH, featureW, config.NUM_ANCHOR, 4))
return featuremap_labels, featuremap_boxes
......
......@@ -96,11 +96,12 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
@under_name_scope()
def decode_bbox_target(box_predictions, anchors):
def decode_bbox_target(box_predictions, anchors, stride):
"""
Args:
box_predictions: fHxfWxNAx4, logits
anchors: fHxfWxNAx4, floatbox
stride (int): the stride of the anchors
Returns:
box_decoded: (fHxfWxNA)x4, float32
......@@ -115,7 +116,7 @@ def decode_bbox_target(box_predictions, anchors):
xaya = tf.to_float(anchors_x2y2 + anchors_x1y1) * 0.5
wbhb = tf.exp(tf.minimum(
box_pred_twth, np.log(config.MAX_SIZE * 1.0 / config.ANCHOR_STRIDE))) * waha
box_pred_twth, np.log(config.MAX_SIZE * 1.0 / stride))) * waha
xbyb = box_pred_txty * waha + xaya
x1y1 = xbyb - wbhb * 0.5
x2y2 = xbyb + wbhb * 0.5
......
......@@ -54,8 +54,8 @@ class Model(ModelDesc):
def _get_inputs(self):
return [
InputDesc(tf.float32, (None, None, 3), 'image'),
InputDesc(tf.int32, (None, None, config.NR_ANCHOR), 'anchor_labels'),
InputDesc(tf.float32, (None, None, config.NR_ANCHOR, 4), 'anchor_boxes'),
InputDesc(tf.int32, (None, None, config.NUM_ANCHOR), 'anchor_labels'),
InputDesc(tf.float32, (None, None, config.NUM_ANCHOR, 4), 'anchor_boxes'),
InputDesc(tf.float32, (None, 4), 'gt_boxes'),
InputDesc(tf.int64, (None,), 'gt_labels'),
]
......@@ -88,11 +88,11 @@ class Model(ModelDesc):
anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
featuremap = pretrained_resnet_conv4(image, config.RESNET_NUM_BLOCK[:3])
rpn_label_logits, rpn_box_logits = rpn_head(featuremap, 1024, config.NR_ANCHOR)
rpn_label_logits, rpn_box_logits = rpn_head(featuremap, 1024, config.NUM_ANCHOR)
rpn_label_loss, rpn_box_loss = rpn_losses(
anchor_labels, anchor_boxes_encoded, rpn_label_logits, rpn_box_logits)
decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors) # (fHxfWxNA)x4, floatbox
decoded_boxes = decode_bbox_target(rpn_box_logits, fm_anchors, config.ANCHOR_STRIDE) # (fHxfWxNA)x4, floatbox
proposal_boxes, proposal_scores = generate_rpn_proposals(
decoded_boxes,
tf.reshape(rpn_label_logits, [-1]),
......@@ -131,7 +131,7 @@ class Model(ModelDesc):
fg_boxes = tf.gather(proposal_boxes, fg_ind)
fg_box_logits = fg_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS)
decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes) # Nfx4, floatbox
decoded_boxes = decode_bbox_target(fg_box_logits, fg_boxes, config.ANCHOR_STRIDE) # Nfx4, floatbox
decoded_boxes = tf.identity(decoded_boxes, name='fastrcnn_fg_boxes')
def _get_optimizer(self):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment