update docs

099975c5 · Yuxin Wu · 0ee4d8b0 · 099975c5 · 099975c5 · 099975c5
Commit 099975c5 authored May 29, 2018 by Yuxin Wu
7 changed files
--- a/examples/FasterRCNN/NOTES.md
+++ b/examples/FasterRCNN/NOTES.md
@@ -35,8 +35,8 @@ Model:
 3. We only support single image per GPU.

 4. Because of (3), BatchNorm statistics are not supposed to be updated during fine-tuning.
-	 This specific kind of BatchNorm will need [my kernel](https://github.com/tensorflow/tensorflow/pull/12580)
-	 which is included since TF 1.4. If using an earlier version of TF, it will be either slow or wrong.
+   This specific kind of BatchNorm will need [my kernel](https://github.com/tensorflow/tensorflow/pull/12580)
+   which is included since TF 1.4. If using an earlier version of TF, it will be either slow or wrong.

 Speed:


--- a/examples/FasterRCNN/README.md
+++ b/examples/FasterRCNN/README.md
 # Faster-RCNN / Mask-RCNN on COCO
-This example provides a minimal (only 1.6k lines) but faithful implementation of
-the following papers:
+This example provides a minimal (only 1.6k lines) but faithful implementation of the following papers:

 + [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497)
 + [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)
@@ -70,7 +69,7 @@ MaskRCNN results contain both bbox and segm mAP.
 The two R50-C4 360k models have the same configuration __and mAP__
 as the `R50-C4-2x` entries in
 [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
-So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron.
+<!-- So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. -->
 The other models listed here do not correspond to any configurations in Detectron.

 ## Notes

--- a/examples/FasterRCNN/data.py
+++ b/examples/FasterRCNN/data.py
@@ -10,6 +10,7 @@ from tensorpack.utils.argtools import memoized, log_once
 from tensorpack.dataflow import (
    imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ,
    MapDataComponent, DataFromList)
+from tensorpack.utils import logger
 # import tensorpack.utils.viz as tpviz

 from coco import COCODetection
@@ -277,7 +278,10 @@ def get_train_dataflow():

    # Valid training images should have at least one fg box.
    # But this filter shall not be applied for testing.
+    num = len(imgs)
    imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs))    # log invalid training
+    logger.info("Filtered {} images which contain no groudtruth boxes. Total #images for training: {}".format(
+        num - len(imgs), len(imgs)))

    ds = DataFromList(imgs, shuffle=True)


--- a/examples/FasterRCNN/model.py
+++ b/examples/FasterRCNN/model.py
@@ -99,7 +99,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
        add_moving_summary(*summaries)

    # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
-    # But the total loss is still the same.
+    # But the total loss is still the same.  TODO make the summary op smarter
    placeholder = 0.
    label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
@@ -217,7 +217,8 @@ def generate_rpn_proposals(boxes, scores, img_shape,
        (-1, 4), name='nms_input_boxes')
    nms_indices = tf.image.non_max_suppression(
        topk_valid_boxes_y1x1y2x2,
-        topk_valid_scores,
+        # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
+        tf.exp(topk_valid_scores),
        max_output_size=post_nms_topk,
        iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)

@@ -608,7 +609,6 @@ def fpn_model(features):

        # tf.image.resize is, again, not aligned.
        # with tf.name_scope(name):
-        #     logger.info("Nearest neighbor")
        #     shape2d = tf.shape(x)[2:]
        #     x = tf.transpose(x, [0, 2, 3, 1])
        #     x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)

--- a/examples/FasterRCNN/train.py
+++ b/examples/FasterRCNN/train.py
@@ -623,5 +623,6 @@ if __name__ == '__main__':
            max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,
            session_init=get_model_loader(args.load) if args.load else None,
        )
+        # nccl mode gives the best speed
        trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu(), mode='nccl')
        launch_train_with_config(cfg, trainer)
--- a/examples/ImageNetModels/README.md
+++ b/examples/ImageNetModels/README.md
@@ -31,8 +31,8 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/):
 This AlexNet script is quite close to the setting in its [original
 paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
 Trained with 64x2 batch size, the script reaches 58% single-crop validation
-accuracy after 100 epochs. It also generates first-layer filter visualizations
-similar to the paper in tensorboard.
+accuracy after 100 epochs (21 hours on 2 V100s).
+It also puts in tensorboard the first-layer filter visualizations similar to the paper.

 ### Inception-BN, VGG16


--- a/examples/ResNet/resnet_model.py
+++ b/examples/ResNet/resnet_model.py
@@ -114,6 +114,8 @@ def resnet_group(name, l, block_func, features, count, stride):
 def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D, use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
+        # Note that this pads the image by [2, 3] instead of [3, 2].
+        # Similar things happen in later stride=2 layers as well.
        l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
        l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
        l = group_func('group0', l, block_func, 64, num_blocks[0], 1)