Commit 94499e81 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] add pad mode option; add weight decay for FPN

parent 0937a01f
......@@ -21,6 +21,12 @@ def maybe_freeze_affine(getter, *args, **kwargs):
return getter(*args, **kwargs)
def maybe_reverse_pad(topleft, bottomright):
if config.TF_PAD_MODE:
return [topleft, bottomright]
return [bottomright, topleft]
@contextmanager
def resnet_argscope():
with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \
......@@ -58,7 +64,8 @@ def resnet_shortcut(l, n_out, stride, activation=tf.identity):
data_format = get_arg_scope()['Conv2D']['data_format']
n_in = l.get_shape().as_list()[1 if data_format in ['NCHW', 'channels_first'] else 3]
if n_in != n_out: # change dimension when channel is not the same
if stride == 2:
# TF's SAME mode output ceil(x/stride), which is NOT what we want when x is odd and stride is 2
if not config.MODE_FPN and stride == 2:
l = l[:, :, :-1, :-1]
return Conv2D('convshortcut', l, n_out, 1,
strides=stride, padding='VALID', activation=activation)
......@@ -73,12 +80,13 @@ def resnet_bottleneck(l, ch_out, stride):
l, shortcut = l, l
l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU)
if stride == 2:
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
l = Conv2D('conv2', l, ch_out, 3, strides=2, activation=BNReLU, padding='VALID')
else:
l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
ret = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
return tf.nn.relu(ret, name='output')
def resnet_group(name, l, block_func, features, count, stride):
......@@ -87,17 +95,15 @@ def resnet_group(name, l, block_func, features, count, stride):
with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features,
stride if i == 0 else 1)
# end of each block need an activation
l = tf.nn.relu(l)
return l
def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
assert len(num_blocks) == 3
with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
l = tf.pad(image, [[0, 0], [0, 0], maybe_reverse_pad(2, 3), maybe_reverse_pad(2, 3)])
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
# TODO replace var by const to enable optimization
......@@ -118,17 +124,19 @@ def resnet_conv5(image, num_block):
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
shape2d = tf.shape(image)[2:]
mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
mult = float(config.FPN_RESOLUTION_REQUIREMENT)
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
pad_shape2d = new_shape2d - shape2d
assert len(num_blocks) == 4, num_blocks
with resnet_argscope():
chan = image.shape[1]
l = tf.pad(image, tf.stack(
[[0, 0], [0, 0], [2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
[[0, 0], [0, 0],
maybe_reverse_pad(2, 3 + pad_shape2d[0]),
maybe_reverse_pad(2, 3 + pad_shape2d[1])]))
l.set_shape([None, chan, None, None])
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = tf.pad(l, [[0, 0], [0, 0], maybe_reverse_pad(0, 1), maybe_reverse_pad(0, 1)])
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
if freeze_c2:
......
......@@ -18,6 +18,10 @@ CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loade
RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
FREEZE_AFFINE = False # do not train affine parameters inside BN
# Use a base model with TF-preferred pad mode
# which may pad more pixels on right/bottom than top/left.
# This is probably not good for alignment but we'll have to live with it.
TF_PAD_MODE = True
# schedule -----------------------
BASE_LR = 1e-2
......
......@@ -418,7 +418,7 @@ class ResNetFPNModel(DetectionModel):
mrcnn_loss = 0.0
wd_cost = regularize_cost(
'(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
'(?:group1|group2|group3|rpn|fpn|fastrcnn|maskrcnn)/.*W',
l2_regularizer(1e-4), name='wd_cost')
total_cost = tf.add_n(rpn_loss_collection + [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment