update docs

910cfaec · Yuxin Wu · fa1dccdd · 910cfaec · 910cfaec · 910cfaec
Commit 910cfaec authored Feb 22, 2018 by Yuxin Wu
5 changed files
--- a/examples/FasterRCNN/NOTES.md
+++ b/examples/FasterRCNN/NOTES.md
@@ -17,6 +17,7 @@ Data:
 1. It's easy to train on your own data. Just replace `COCODetection.load_many` in `data.py` by your own loader.
 	Also remember to change `config.NUM_CLASS` and `config.CLASS_NAMES`.
+	The current evaluation code is also COCO-specific, and you need to change it to use your data and metrics.
 2. You can easily add more augmentations such as rotation, but be careful how a box should be
 	 augmented. The code now will always use the minimal axis-aligned bounding box of the 4 corners,

--- a/examples/FasterRCNN/basemodel.py
+++ b/examples/FasterRCNN/basemodel.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 # File: basemodel.py
+from contextlib import contextmanager
 import tensorflow as tf
 from tensorpack.tfutils.argscope import argscope, get_arg_scope
 from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope
@@ -9,6 +10,14 @@ from tensorpack.models import (
    Conv2D, MaxPooling, BatchNorm, BNReLU)
+@contextmanager
+def resnet_argscope():
+    with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \
+            argscope(Conv2D, use_bias=False), \
+            argscope(BatchNorm, use_local_stat=False):
+        yield
 def image_preprocess(image, bgr=True):
    with tf.name_scope('image_preprocess'):
        if image.dtype.base_dtype != tf.float32:
@@ -71,29 +80,25 @@ def resnet_group(l, name, block_func, features, count, stride):
    return l
-def pretrained_resnet_conv4(image, num_blocks):
+def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True):
    assert len(num_blocks) == 3
-    with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \
+    with resnet_argscope():
-            argscope(Conv2D, nl=tf.identity, use_bias=False), \
-            argscope(BatchNorm, use_local_stat=False):
        l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
        l = Conv2D('conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID')
        l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
        l = MaxPooling('pool0', l, shape=3, stride=2, padding='VALID')
-        l = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
+        c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
-        # TODO replace var by const to enable folding
+        # TODO replace var by const to enable optimization
-        l = tf.stop_gradient(l)
+        if freeze_c2:
-        l = resnet_group(l, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
+            c2 = tf.stop_gradient(c2)
-        l = resnet_group(l, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
+        c3 = resnet_group(c2, 'group1', resnet_bottleneck, 128, num_blocks[1], 2)
+        c4 = resnet_group(c3, 'group2', resnet_bottleneck, 256, num_blocks[2], 2)
    # 16x downsampling up to now
-    return l
+    return c4
 @auto_reuse_variable_scope
 def resnet_conv5(image, num_block):
-    with argscope([Conv2D, BatchNorm], data_format='NCHW'), \
+    with resnet_argscope():
-            argscope(Conv2D, nl=tf.identity, use_bias=False), \
+        l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, 2)
-            argscope(BatchNorm, use_local_stat=False):
-        # 14x14:
-        l = resnet_group(image, 'group3', resnet_bottleneck, 512, num_block, stride=2)
        return l
--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -196,7 +196,7 @@ def get_train_dataflow(add_mask=False):
        config.BASEDIR, config.TRAIN_DATASET, add_gt=True, add_mask=add_mask)
    """
    To train on your own data, change this to your loader.
-    Produce "igms" as a list of dict, in the dict the following keys are needed for training:
+    Produce "imgs" as a list of dict, in the dict the following keys are needed for training:
    height, width: integer
    file_name: str
    boxes: kx4 floats
@@ -247,7 +247,7 @@ def get_train_dataflow(add_mask=False):
        if add_mask:
            # augmentation will modify the polys in-place
-            segmentation = copy.deepcopy(img.get('segmentation', None))
+            segmentation = copy.deepcopy(img['segmentation'])
            segmentation = [segmentation[k] for k in range(len(segmentation)) if not is_crowd[k]]
            assert len(segmentation) == len(boxes)

--- a/examples/keras/README.md
+++ b/examples/keras/README.md
@@ -22,8 +22,10 @@ are the only two tools I know that can scale the training of a large Keras model
 reproduce exactly the same setting of [tensorpack ResNet example](../ResNet) on ImageNet.
 It has:
-+ ResNet-50 model modified from [keras.applications](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/resnet50.py)
+ ResNet-50 model modified from [keras.applications](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/_impl/keras/applications/resnet50.py).
+	(We put stride on 3x3 conv in each bottleneck, which is different from some other implementations).
 + Multi-GPU data-parallel __training and validation__ which scales
-	+ With 8 V100s, still has >90% GPU utilization and finished 100 epochs in 19.5 hours
+	+ Finished 100 epochs in 19.5 hours on 8 V100s, with >90% GPU utilization.
+	+ Still slightly slower than native tensorpack examples.
 + Good accuracy (same as [tensorpack ResNet example](../ResNet))
--- a/tensorpack/utils/utils.py
+++ b/tensorpack/utils/utils.py
@@ -50,7 +50,7 @@ def humanize_time_delta(sec):
        vals[-1] = sec
    def _format(v, u):
-        return "{} {}{}".format(v, u, "s" if v > 1 else "")
+        return "{:.3g} {}{}".format(v, u, "s" if v > 1 else "")
    required = False
    ans = []