refactor resnet group function

7800cf1c · Yuxin Wu · f002bfb9 · 7800cf1c · 7800cf1c · 7800cf1c
Commit 7800cf1c authored May 24, 2018 by Yuxin Wu
11 changed files
--- a/docs/README.md
+++ b/docs/README.md
@@ -2,8 +2,8 @@
 ## Build the docs:
 ### Dependencies:
-1. python3
+1. Python3
-2. `pip install -r requirements.txt`
+2. `pip install -r requirements.txt`. These requirements are different from tensorpack dependencies.
 ### Build HTML docs:
 `make html`

--- a/examples/DoReFa-Net/resnet-dorefa.py
+++ b/examples/DoReFa-Net/resnet-dorefa.py
@@ -91,7 +91,7 @@ class Model(ModelDesc):
                argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False, nl=tf.identity):
            logits = (LinearWrap(image)
-                      # use explicit padding here, because our training framework has
+                      # use explicit padding here, because our private training framework has
                      # different padding mechanisms from TensorFlow
                      .tf.pad([[0, 0], [3, 2], [3, 2], [0, 0]])
                      .Conv2D('conv1', 64, 7, stride=2, padding='VALID', use_bias=True)

--- a/examples/FasterRCNN/NOTES.md
+++ b/examples/FasterRCNN/NOTES.md
@@ -5,7 +5,7 @@ This is a minimal implementation that simply contains these files:
 + data.py: prepare data for training
 + common.py: common data preparation utilities
 + basemodel.py: implement resnet
-+ model.py: implement rpn/faster-rcnn/mask-rcnn
+ model.py: implement RPN/Faster-RCNN/RPN/Mask-RCNN
 + train.py: main training script
 + utils/: third-party helper functions
 + eval.py: evaluation utilities

--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
@@ -36,8 +36,8 @@ Train:
 ```
 ./train.py --load /path/to/ImageNet-ResNet50.npz
 ```
-The code is only for training with 1, 2, 4 or 8 GPUs.
+The code is only valid for training with 1, 2, 4 or 8 GPUs.
-Otherwise, you probably need different hyperparameters for the same performance.
+Not training with 8 GPUs may result in different performance from the table below.
 Predict on an image (and show output in a window):
 ```
@@ -62,7 +62,7 @@ MaskRCNN results contain both bbox and segm mAP.
 |R50-C4  |512             |(800, 1333)|360k    |36.6           |49h on 8 V100s|
 |R50-FPN |512             |(800, 1333)|360k    |37.5           |28h on 8 V100s|
 |R50-C4  |256             |(800, 1333)|280k    |36.8/32.1      |39h on 8 P100s|
-|R50-C4  |512							|(800, 1333)|360k    |37.8/33.2      |51h on 8 V100s|
+|R50-C4  |512             |(800, 1333)|360k    |37.8/33.1      |51h on 8 V100s|
 |R50-FPN |512             |(800, 1333)|360k    |38.1/34.9      |38h on 8 V100s|
 |R101-C4 |512             |(800, 1333)|280k    |40.1/34.4      |70h on 8 P100s|
 |R101-C4 |512             |(800, 1333)|360k    |40.8/35.1      |63h on 8 V100s|

--- a/examples/FasterRCNN/basemodel.py
+++ b/examples/FasterRCNN/basemodel.py
@@ -81,7 +81,7 @@ def resnet_bottleneck(l, ch_out, stride):
    return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
-def resnet_group(l, name, block_func, features, count, stride):
+def resnet_group(name, l, block_func, features, count, stride):
    with tf.variable_scope(name):
        for i in range(0, count):
            with tf.variable_scope('block{}'.format(i)):
@@ -99,12 +99,12 @@ def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
        l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
        l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
        l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
-        c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
+        c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
        # TODO replace var by const to enable optimization
        if freeze_c2:
            c2 = tf.stop_gradient(c2)
-        c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
+        c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
-        c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
+        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
    # 16x downsampling up to now
    return c4
@@ -112,7 +112,7 @@ def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
 @auto_reuse_variable_scope
 def resnet_conv5(image, num_block):
    with resnet_argscope():
-        l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, 2)
+        l = resnet_group('group3', image, resnet_bottleneck, 512, num_block, 2)
        return l
@@ -130,11 +130,11 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
        l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
        l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
        l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
-        c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
+        c2 = resnet_group('group0', l, resnet_bottleneck, 64, num_blocks[0], 1)
        if freeze_c2:
            c2 = tf.stop_gradient(c2)
-        c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
+        c3 = resnet_group('group1', c2, resnet_bottleneck, 128, num_blocks[1], 2)
-        c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
+        c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
-        c5 = resnet_group(c4, 'group3', resnet_bottleneck, 512, num_blocks[3], 2)
+        c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
    # 32x downsampling up to now
    return c2, c3, c4, c5
--- a/examples/FasterRCNN/coco.py
+++ b/examples/FasterRCNN/coco.py
@@ -164,8 +164,6 @@ class COCODetection(object):
        if add_mask:
            img['segmentation'] = [obj['segmentation'] for obj in valid_objs]
-        del objs
    def print_class_histogram(self, imgs):
        nr_class = len(COCOMeta.class_names)
        hist_bins = np.arange(nr_class + 1)
@@ -200,9 +198,7 @@ class COCODetection(object):
 if __name__ == '__main__':
-    c = COCODetection('/home/wyx/data/coco', 'train2014')
+    c = COCODetection(config.BASEDIR, 'train2014')
    gt_boxes = c.load(add_gt=True, add_mask=True)
-    import IPython as IP
-    IP.embed()
    print("#Images:", len(gt_boxes))
    c.print_class_histogram(gt_boxes)
--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -444,7 +444,10 @@ class ResNetFPNModel(DetectionModel):
 def visualize(model_path, nr_visualize=50, output_dir='output'):
-    assert not config.MODE_FPN, "FPN visualize is not supported yet!"
+    """
+    Visualize some intermediate results (proposals, raw predictions) inside the pipeline.
+    Does not support FPN.
+    """
    df = get_train_dataflow()   # we don't visualize mask stuff
    df.reset_state()
@@ -547,9 +550,9 @@ if __name__ == '__main__':
    parser.add_argument('--load', help='load model for evaluation or training')
    parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn')
    parser.add_argument('--datadir', help='override config.BASEDIR')
-    parser.add_argument('--visualize', action='store_true')
+    parser.add_argument('--visualize', action='store_true', help='visualize intermediate results')
    parser.add_argument('--evaluate', help="Run evaluation on COCO. "
-                                           "This option is the path to the output json evaluation file")
+                                           "This argument is the path to the output json evaluation file")
    parser.add_argument('--predict', help="Run prediction on a given image. "
                                          "This argument is the path to the input image file")
    args = parser.parse_args()
@@ -570,6 +573,7 @@ if __name__ == '__main__':
            config.RESULT_SCORE_THRESH = config.RESULT_SCORE_THRESH_VIS
        if args.visualize:
+            assert not config.MODE_FPN, "FPN visualize is not supported!"
            visualize(args.load)
        else:
            pred = OfflinePredictor(PredictConfig(

--- a/examples/ImageNetModels/imagenet_utils.py
+++ b/examples/ImageNetModels/imagenet_utils.py
@@ -201,10 +201,10 @@ class ImageNetModel(ModelDesc):
    def get_logits(self, image):
        """
        Args:
-            image: 4D tensor of 224x224 in ``self.data_format``
+            image: 4D tensor of ``self.input_shape`` in ``self.data_format``
        Returns:
-            Nx1000 logits
+            Nx#class logits
        """
    def optimizer(self):

--- a/examples/ResNet/imagenet-resnet.py
+++ b/examples/ResNet/imagenet-resnet.py
@@ -5,7 +5,6 @@
 import argparse
 import os
 from tensorpack import logger, QueueInput
 from tensorpack.models import *
 from tensorpack.callbacks import *
@@ -64,7 +63,7 @@ def get_config(model, fake=False):
    logger.info("Running on {} towers. Batch size per tower: {}".format(nr_tower, batch))
    if batch < 32 or batch > 64:
-        logger.warn("Batch size per tower not in [32, 64]. This may lead to worse accuracy than reported.")
+        logger.warn("Batch size per tower not in [32, 64]. This probably will lead to worse accuracy than reported.")
    if fake:
        data = QueueInput(FakeData(
            [[batch, 224, 224, 3], [batch]], 1000, random=False, dtype='uint8'))

--- a/examples/ResNet/resnet_model.py
+++ b/examples/ResNet/resnet_model.py
@@ -3,11 +3,9 @@
 import tensorflow as tf
 from tensorpack.tfutils.argscope import argscope, get_arg_scope
 from tensorpack.models import (
-    Conv2D, GlobalAvgPooling, BatchNorm, BNReLU, FullyConnected,
+    Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm, BNReLU, FullyConnected)
-    LinearWrap)
 def resnet_shortcut(l, n_out, stride, activation=tf.identity):
@@ -54,7 +52,7 @@ def preresnet_bottleneck(l, ch_out, stride, preact):
    return l + resnet_shortcut(shortcut, ch_out * 4, stride)
-def preresnet_group(l, name, block_func, features, count, stride):
+def preresnet_group(name, l, block_func, features, count, stride):
    with tf.variable_scope(name):
        for i in range(0, count):
            with tf.variable_scope('block{}'.format(i)):
@@ -71,7 +69,8 @@ def resnet_basicblock(l, ch_out, stride):
    shortcut = l
    l = Conv2D('conv1', l, ch_out, 3, strides=stride, activation=BNReLU)
    l = Conv2D('conv2', l, ch_out, 3, activation=get_bn(zero_init=True))
-    return l + resnet_shortcut(shortcut, ch_out, stride, activation=get_bn(zero_init=False))
+    out = l + resnet_shortcut(shortcut, ch_out, stride, activation=get_bn(zero_init=False))
+    return tf.nn.relu(out)
 def resnet_bottleneck(l, ch_out, stride, stride_first=False):
@@ -82,7 +81,8 @@ def resnet_bottleneck(l, ch_out, stride, stride_first=False):
    l = Conv2D('conv1', l, ch_out, 1, strides=stride if stride_first else 1, activation=BNReLU)
    l = Conv2D('conv2', l, ch_out, 3, strides=1 if stride_first else stride, activation=BNReLU)
    l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
-    return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
+    out = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
+    return tf.nn.relu(out)
 def se_resnet_bottleneck(l, ch_out, stride):
@@ -99,29 +99,28 @@ def se_resnet_bottleneck(l, ch_out, stride):
    shape = [-1, 1, 1, 1]
    shape[ch_ax] = ch_out * 4
    l = l * tf.reshape(squeeze, shape)
-    return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
+    out = l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
+    return tf.nn.relu(out)
-def resnet_group(l, name, block_func, features, count, stride):
+def resnet_group(name, l, block_func, features, count, stride):
    with tf.variable_scope(name):
        for i in range(0, count):
            with tf.variable_scope('block{}'.format(i)):
                l = block_func(l, features, stride if i == 0 else 1)
-                # end of each block need an activation
-                l = tf.nn.relu(l)
    return l
 def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D, use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
-        logits = (LinearWrap(image)
+        l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
-                  .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
+        l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
-                  .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
+        l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
-                  .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1)
+        l = group_func('group1', l, block_func, 128, num_blocks[1], 2)
-                  .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2)
+        l = group_func('group2', l, block_func, 256, num_blocks[2], 2)
-                  .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2)
+        l = group_func('group3', l, block_func, 512, num_blocks[3], 2)
-                  .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)
+        l = GlobalAvgPooling('gap', l)
-                  .GlobalAvgPooling('gap')
+        logits = FullyConnected('linear', l, 1000,
-                  .FullyConnected('linear', 1000)())
+                                kernel_initializer=tf.random_normal_initializer(stddev=0.01))
    return logits
--- a/examples/Saliency/CAM-resnet.py
+++ b/examples/Saliency/CAM-resnet.py
@@ -51,14 +51,13 @@ class Model(ModelDesc):
            convmaps = (LinearWrap(image)
                        .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
                        .MaxPooling('pool0', 3, strides=2, padding='SAME')
-                        .apply(preresnet_group, 'group0', block_func, 64, defs[0], 1)
+                        .apply2(preresnet_group, 'group0', block_func, 64, defs[0], 1)
-                        .apply(preresnet_group, 'group1', block_func, 128, defs[1], 2)
+                        .apply2(preresnet_group, 'group1', block_func, 128, defs[1], 2)
-                        .apply(preresnet_group, 'group2', block_func, 256, defs[2], 2)
+                        .apply2(preresnet_group, 'group2', block_func, 256, defs[2], 2)
-                        .apply(preresnet_group, 'group3new', block_func, 512, defs[3], 1)())
+                        .apply2(preresnet_group, 'group3new', block_func, 512, defs[3], 1)())
            print(convmaps)
-            logits = (LinearWrap(convmaps)
+            convmaps = GlobalAvgPooling('gap', convmaps)
-                      .GlobalAvgPooling('gap')
+            logits = FullyConnected('linearnew', convmaps, 1000)
-                      .FullyConnected('linearnew', 1000)())
        loss = compute_loss_and_error(logits, label)
        wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')