Commit 7800cf1c authored by Yuxin Wu's avatar Yuxin Wu

refactor resnet group function

parent f002bfb9
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
## Build the docs: ## Build the docs:
### Dependencies: ### Dependencies:
1. python3 1. Python3
2. `pip install -r requirements.txt` 2. `pip install -r requirements.txt`. These requirements are different from tensorpack dependencies.
### Build HTML docs: ### Build HTML docs:
`make html` `make html`
......
...@@ -91,7 +91,7 @@ class Model(ModelDesc): ...@@ -91,7 +91,7 @@ class Model(ModelDesc):
argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
argscope(Conv2D, use_bias=False, nl=tf.identity): argscope(Conv2D, use_bias=False, nl=tf.identity):
logits = (LinearWrap(image) logits = (LinearWrap(image)
# use explicit padding here, because our training framework has # use explicit padding here, because our private training framework has
# different padding mechanisms from TensorFlow # different padding mechanisms from TensorFlow
.tf.pad([[0, 0], [3, 2], [3, 2], [0, 0]]) .tf.pad([[0, 0], [3, 2], [3, 2], [0, 0]])
.Conv2D('conv1', 64, 7, stride=2, padding='VALID', use_bias=True) .Conv2D('conv1', 64, 7, stride=2, padding='VALID', use_bias=True)
......
...@@ -5,7 +5,7 @@ This is a minimal implementation that simply contains these files: ...@@ -5,7 +5,7 @@ This is a minimal implementation that simply contains these files:
+ data.py: prepare data for training + data.py: prepare data for training
+ common.py: common data preparation utilities + common.py: common data preparation utilities
+ basemodel.py: implement resnet + basemodel.py: implement resnet
+ model.py: implement rpn/faster-rcnn/mask-rcnn + model.py: implement RPN/Faster-RCNN/RPN/Mask-RCNN
+ train.py: main training script + train.py: main training script
+ utils/: third-party helper functions + utils/: third-party helper functions
+ eval.py: evaluation utilities + eval.py: evaluation utilities
......
...@@ -36,8 +36,8 @@ Train: ...@@ -36,8 +36,8 @@ Train:
``` ```
./train.py --load /path/to/ImageNet-ResNet50.npz ./train.py --load /path/to/ImageNet-ResNet50.npz
``` ```
The code is only for training with 1, 2, 4 or 8 GPUs. The code is only valid for training with 1, 2, 4 or 8 GPUs.
Otherwise, you probably need different hyperparameters for the same performance. Not training with 8 GPUs may result in different performance from the table below.
Predict on an image (and show output in a window): Predict on an image (and show output in a window):
``` ```
...@@ -62,8 +62,8 @@ MaskRCNN results contain both bbox and segm mAP. ...@@ -62,8 +62,8 @@ MaskRCNN results contain both bbox and segm mAP.
|R50-C4 |512 |(800, 1333)|360k |36.6 |49h on 8 V100s| |R50-C4 |512 |(800, 1333)|360k |36.6 |49h on 8 V100s|
|R50-FPN |512 |(800, 1333)|360k |37.5 |28h on 8 V100s| |R50-FPN |512 |(800, 1333)|360k |37.5 |28h on 8 V100s|
|R50-C4 |256 |(800, 1333)|280k |36.8/32.1 |39h on 8 P100s| |R50-C4 |256 |(800, 1333)|280k |36.8/32.1 |39h on 8 P100s|
|R50-C4 |512 |(800, 1333)|360k |37.8/33.2 |51h on 8 V100s| |R50-C4 |512 |(800, 1333)|360k |37.8/33.1 |51h on 8 V100s|
|R50-FPN |512 |(800, 1333)|360k |38.1/34.9 |38h on 8 V100s| |R50-FPN |512 |(800, 1333)|360k |38.1/34.9 |38h on 8 V100s|
|R101-C4 |512 |(800, 1333)|280k |40.1/34.4 |70h on 8 P100s| |R101-C4 |512 |(800, 1333)|280k |40.1/34.4 |70h on 8 P100s|
|R101-C4 |512 |(800, 1333)|360k |40.8/35.1 |63h on 8 V100s| |R101-C4 |512 |(800, 1333)|360k |40.8/35.1 |63h on 8 V100s|
......
...@@ -81,7 +81,7 @@ def resnet_bottleneck(l, ch_out, stride): ...@@ -81,7 +81,7 @@ def resnet_bottleneck(l, ch_out, stride):
return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
def resnet_group(l, name, block_func, features, count, stride): def resnet_group(name, l, block_func, features, count, stride):
with tf.variable_scope(name): with tf.variable_scope(name):
for i in range(0, count): for i in range(0, count):
with tf.variable_scope('block{}'.format(i)): with tf.variable_scope('block{}'.format(i)):
...@@ -99,12 +99,12 @@ def resnet_c4_backbone(image, num_blocks, freeze_c2=True): ...@@ -99,12 +99,12 @@ def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
# TODO replace var by const to enable optimization # TODO replace var by const to enable optimization
if freeze_c2: if freeze_c2:
c2 = tf.stop_gradient(c2) c2 = tf.stop_gradient(c2)
c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2) c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
# 16x downsampling up to now # 16x downsampling up to now
return c4 return c4
...@@ -112,7 +112,7 @@ def resnet_c4_backbone(image, num_blocks, freeze_c2=True): ...@@ -112,7 +112,7 @@ def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
@auto_reuse_variable_scope @auto_reuse_variable_scope
def resnet_conv5(image, num_block): def resnet_conv5(image, num_block):
with resnet_argscope(): with resnet_argscope():
l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, 2) l = resnet_group('group3', image, resnet_bottleneck, 512, num_block, 2)
return l return l
...@@ -130,11 +130,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True): ...@@ -130,11 +130,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID') l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID') l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
if freeze_c2: if freeze_c2:
c2 = tf.stop_gradient(c2) c2 = tf.stop_gradient(c2)
c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2) c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
c5 = resnet_group(c4, 'group3', resnet_bottleneck, 512, num_blocks[3], 2) c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
# 32x downsampling up to now # 32x downsampling up to now
return c2, c3, c4, c5 return c2, c3, c4, c5
...@@ -164,8 +164,6 @@ class COCODetection(object): ...@@ -164,8 +164,6 @@ class COCODetection(object):
if add_mask: if add_mask:
img['segmentation'] = [obj['segmentation'] for obj in valid_objs] img['segmentation'] = [obj['segmentation'] for obj in valid_objs]
del objs
def print_class_histogram(self, imgs): def print_class_histogram(self, imgs):
nr_class = len(COCOMeta.class_names) nr_class = len(COCOMeta.class_names)
hist_bins = np.arange(nr_class + 1) hist_bins = np.arange(nr_class + 1)
...@@ -200,9 +198,7 @@ class COCODetection(object): ...@@ -200,9 +198,7 @@ class COCODetection(object):
if __name__ == '__main__': if __name__ == '__main__':
c = COCODetection('/home/wyx/data/coco', 'train2014') c = COCODetection(config.BASEDIR, 'train2014')
gt_boxes = c.load(add_gt=True, add_mask=True) gt_boxes = c.load(add_gt=True, add_mask=True)
import IPython as IP
IP.embed()
print("#Images:", len(gt_boxes)) print("#Images:", len(gt_boxes))
c.print_class_histogram(gt_boxes) c.print_class_histogram(gt_boxes)
...@@ -444,7 +444,10 @@ class ResNetFPNModel(DetectionModel): ...@@ -444,7 +444,10 @@ class ResNetFPNModel(DetectionModel):
def visualize(model_path, nr_visualize=50, output_dir='output'): def visualize(model_path, nr_visualize=50, output_dir='output'):
assert not config.MODE_FPN, "FPN visualize is not supported yet!" """
Visualize some intermediate results (proposals, raw predictions) inside the pipeline.
Does not support FPN.
"""
df = get_train_dataflow() # we don't visualize mask stuff df = get_train_dataflow() # we don't visualize mask stuff
df.reset_state() df.reset_state()
...@@ -547,9 +550,9 @@ if __name__ == '__main__': ...@@ -547,9 +550,9 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load model for evaluation or training') parser.add_argument('--load', help='load model for evaluation or training')
parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn') parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn')
parser.add_argument('--datadir', help='override config.BASEDIR') parser.add_argument('--datadir', help='override config.BASEDIR')
parser.add_argument('--visualize', action='store_true') parser.add_argument('--visualize', action='store_true', help='visualize intermediate results')
parser.add_argument('--evaluate', help="Run evaluation on COCO. " parser.add_argument('--evaluate', help="Run evaluation on COCO. "
"This option is the path to the output json evaluation file") "This argument is the path to the output json evaluation file")
parser.add_argument('--predict', help="Run prediction on a given image. " parser.add_argument('--predict', help="Run prediction on a given image. "
"This argument is the path to the input image file") "This argument is the path to the input image file")
args = parser.parse_args() args = parser.parse_args()
...@@ -570,6 +573,7 @@ if __name__ == '__main__': ...@@ -570,6 +573,7 @@ if __name__ == '__main__':
config.RESULT_SCORE_THRESH = config.RESULT_SCORE_THRESH_VIS config.RESULT_SCORE_THRESH = config.RESULT_SCORE_THRESH_VIS
if args.visualize: if args.visualize:
assert not config.MODE_FPN, "FPN visualize is not supported!"
visualize(args.load) visualize(args.load)
else: else:
pred = OfflinePredictor(PredictConfig( pred = OfflinePredictor(PredictConfig(
......
...@@ -201,10 +201,10 @@ class ImageNetModel(ModelDesc): ...@@ -201,10 +201,10 @@ class ImageNetModel(ModelDesc):
def get_logits(self, image): def get_logits(self, image):
""" """
Args: Args:
image: 4D tensor of 224x224 in ``self.data_format`` image: 4D tensor of ``self.input_shape`` in ``self.data_format``
Returns: Returns:
Nx1000 logits Nx#class logits
""" """
def optimizer(self): def optimizer(self):
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
import argparse import argparse
import os import os
from tensorpack import logger, QueueInput from tensorpack import logger, QueueInput
from tensorpack.models import * from tensorpack.models import *
from tensorpack.callbacks import * from tensorpack.callbacks import *
...@@ -64,7 +63,7 @@ def get_config(model, fake=False): ...@@ -64,7 +63,7 @@ def get_config(model, fake=False):
logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch)) logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
if batch < 32 or batch > 64: if batch < 32 or batch > 64:
logger.warn("Batch size per tower not in [32, 64]. This may lead to worse accuracy than reported.") logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
if fake: if fake:
data = QueueInput(FakeData( data = QueueInput(FakeData(
[[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8')) [[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))
......
...@@ -3,11 +3,9 @@ ...@@ -3,11 +3,9 @@
import tensorflow as tf import tensorflow as tf
from tensorpack.tfutils.argscope import argscope, get_arg_scope from tensorpack.tfutils.argscope import argscope, get_arg_scope
from tensorpack.models import ( from tensorpack.models import (
Conv2D, GlobalAvgPooling, BatchNorm, BNReLU, FullyConnected, Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm, BNReLU, FullyConnected)
LinearWrap)
def resnet_shortcut(l, n_out, stride, activation=tf.identity): def resnet_shortcut(l, n_out, stride, activation=tf.identity):
...@@ -54,7 +52,7 @@ def preresnet_bottleneck(l, ch_out, stride, preact): ...@@ -54,7 +52,7 @@ def preresnet_bottleneck(l, ch_out, stride, preact):
return l + resnet_shortcut(shortcut, ch_out * 4, stride) return l + resnet_shortcut(shortcut, ch_out * 4, stride)
def preresnet_group(l, name, block_func, features, count, stride): def preresnet_group(name, l, block_func, features, count, stride):
with tf.variable_scope(name): with tf.variable_scope(name):
for i in range(0, count): for i in range(0, count):
with tf.variable_scope('block{}'.format(i)): with tf.variable_scope('block{}'.format(i)):
...@@ -71,7 +69,8 @@ def resnet_basicblock(l, ch_out, stride): ...@@ -71,7 +69,8 @@ def resnet_basicblock(l, ch_out, stride):
shortcut = l shortcut = l
l = Conv2D('conv1', l, ch_out, 3, strides=stride, activation=BNReLU) l = Conv2D('conv1', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, activation=get_bn(zero_init=True)) l = Conv2D('conv2', l, ch_out, 3, activation=get_bn(zero_init=True))
return l + resnet_shortcut(shortcut, ch_out, stride, activation=get_bn(zero_init=False)) out = l + resnet_shortcut(shortcut, ch_out, stride, activation=get_bn(zero_init=False))
return tf.nn.relu(out)
def resnet_bottleneck(l, ch_out, stride, stride_first=False): def resnet_bottleneck(l, ch_out, stride, stride_first=False):
...@@ -82,7 +81,8 @@ def resnet_bottleneck(l, ch_out, stride, stride_first=False): ...@@ -82,7 +81,8 @@ def resnet_bottleneck(l, ch_out, stride, stride_first=False):
l = Conv2D('conv1', l, ch_out, 1, strides=stride if stride_first else 1, activation=BNReLU) l = Conv2D('conv1', l, ch_out, 1, strides=stride if stride_first else 1, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, strides=1 if stride_first else stride, activation=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=1 if stride_first else stride, activation=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True)) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) out = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
return tf.nn.relu(out)
def se_resnet_bottleneck(l, ch_out, stride): def se_resnet_bottleneck(l, ch_out, stride):
...@@ -99,29 +99,28 @@ def se_resnet_bottleneck(l, ch_out, stride): ...@@ -99,29 +99,28 @@ def se_resnet_bottleneck(l, ch_out, stride):
shape = [-1, 1, 1, 1] shape = [-1, 1, 1, 1]
shape[ch_ax] = ch_out * 4 shape[ch_ax] = ch_out * 4
l = l * tf.reshape(squeeze, shape) l = l * tf.reshape(squeeze, shape)
return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False)) out = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
return tf.nn.relu(out)
def resnet_group(l, name, block_func, features, count, stride): def resnet_group(name, l, block_func, features, count, stride):
with tf.variable_scope(name): with tf.variable_scope(name):
for i in range(0, count): for i in range(0, count):
with tf.variable_scope('block{}'.format(i)): with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features, stride if i == 0 else 1) l = block_func(l, features, stride if i == 0 else 1)
# end of each block need an activation
l = tf.nn.relu(l)
return l return l
def resnet_backbone(image, num_blocks, group_func, block_func): def resnet_backbone(image, num_blocks, group_func, block_func):
with argscope(Conv2D, use_bias=False, with argscope(Conv2D, use_bias=False,
kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
logits = (LinearWrap(image) l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
.Conv2D('conv0', 64, 7, strides=2, activation=BNReLU) l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
.apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) l = group_func('group1', l, block_func, 128, num_blocks[1], 2)
.apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) l = group_func('group2', l, block_func, 256, num_blocks[2], 2)
.apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) l = group_func('group3', l, block_func, 512, num_blocks[3], 2)
.apply(group_func, 'group3', block_func, 512, num_blocks[3], 2) l = GlobalAvgPooling('gap', l)
.GlobalAvgPooling('gap') logits = FullyConnected('linear', l, 1000,
.FullyConnected('linear', 1000)()) kernel_initializer=tf.random_normal_initializer(stddev=0.01))
return logits return logits
...@@ -51,14 +51,13 @@ class Model(ModelDesc): ...@@ -51,14 +51,13 @@ class Model(ModelDesc):
convmaps = (LinearWrap(image) convmaps = (LinearWrap(image)
.Conv2D('conv0', 64, 7, strides=2, activation=BNReLU) .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
.MaxPooling('pool0', 3, strides=2, padding='SAME') .MaxPooling('pool0', 3, strides=2, padding='SAME')
.apply(preresnet_group, 'group0', block_func, 64, defs[0], 1) .apply2(preresnet_group, 'group0', block_func, 64, defs[0], 1)
.apply(preresnet_group, 'group1', block_func, 128, defs[1], 2) .apply2(preresnet_group, 'group1', block_func, 128, defs[1], 2)
.apply(preresnet_group, 'group2', block_func, 256, defs[2], 2) .apply2(preresnet_group, 'group2', block_func, 256, defs[2], 2)
.apply(preresnet_group, 'group3new', block_func, 512, defs[3], 1)()) .apply2(preresnet_group, 'group3new', block_func, 512, defs[3], 1)())
print(convmaps) print(convmaps)
logits = (LinearWrap(convmaps) convmaps = GlobalAvgPooling('gap', convmaps)
.GlobalAvgPooling('gap') logits = FullyConnected('linearnew', convmaps, 1000)
.FullyConnected('linearnew', 1000)())
loss = compute_loss_and_error(logits, label) loss = compute_loss_and_error(logits, label)
wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss') wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment