Commit 099975c5 authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent 0ee4d8b0
...@@ -35,8 +35,8 @@ Model: ...@@ -35,8 +35,8 @@ Model:
3. We only support single image per GPU. 3. We only support single image per GPU.
4. Because of (3), BatchNorm statistics are not supposed to be updated during fine-tuning. 4. Because of (3), BatchNorm statistics are not supposed to be updated during fine-tuning.
This specific kind of BatchNorm will need [my kernel](https://github.com/tensorflow/tensorflow/pull/12580) This specific kind of BatchNorm will need [my kernel](https://github.com/tensorflow/tensorflow/pull/12580)
which is included since TF 1.4. If using an earlier version of TF, it will be either slow or wrong. which is included since TF 1.4. If using an earlier version of TF, it will be either slow or wrong.
Speed: Speed:
......
# Faster-RCNN / Mask-RCNN on COCO # Faster-RCNN / Mask-RCNN on COCO
This example provides a minimal (only 1.6k lines) but faithful implementation of This example provides a minimal (only 1.6k lines) but faithful implementation of the following papers:
the following papers:
+ [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497) + [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497)
+ [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144) + [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)
...@@ -70,7 +69,7 @@ MaskRCNN results contain both bbox and segm mAP. ...@@ -70,7 +69,7 @@ MaskRCNN results contain both bbox and segm mAP.
The two R50-C4 360k models have the same configuration __and mAP__ The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines). [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. <!-- So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. -->
The other models listed here do not correspond to any configurations in Detectron. The other models listed here do not correspond to any configurations in Detectron.
## Notes ## Notes
......
...@@ -10,6 +10,7 @@ from tensorpack.utils.argtools import memoized, log_once ...@@ -10,6 +10,7 @@ from tensorpack.utils.argtools import memoized, log_once
from tensorpack.dataflow import ( from tensorpack.dataflow import (
imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ, imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ,
MapDataComponent, DataFromList) MapDataComponent, DataFromList)
from tensorpack.utils import logger
# import tensorpack.utils.viz as tpviz # import tensorpack.utils.viz as tpviz
from coco import COCODetection from coco import COCODetection
...@@ -277,7 +278,10 @@ def get_train_dataflow(): ...@@ -277,7 +278,10 @@ def get_train_dataflow():
# Valid training images should have at least one fg box. # Valid training images should have at least one fg box.
# But this filter shall not be applied for testing. # But this filter shall not be applied for testing.
num = len(imgs)
imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training
logger.info("Filtered {} images which contain no groudtruth boxes. Total #images for training: {}".format(
num - len(imgs), len(imgs)))
ds = DataFromList(imgs, shuffle=True) ds = DataFromList(imgs, shuffle=True)
......
...@@ -99,7 +99,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): ...@@ -99,7 +99,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
add_moving_summary(*summaries) add_moving_summary(*summaries)
# Per-level loss summaries in FPN may appear lower due to the use of a small placeholder. # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
# But the total loss is still the same. # But the total loss is still the same. TODO make the summary op smarter
placeholder = 0. placeholder = 0.
label_loss = tf.nn.sigmoid_cross_entropy_with_logits( label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
...@@ -217,7 +217,8 @@ def generate_rpn_proposals(boxes, scores, img_shape, ...@@ -217,7 +217,8 @@ def generate_rpn_proposals(boxes, scores, img_shape,
(-1, 4), name='nms_input_boxes') (-1, 4), name='nms_input_boxes')
nms_indices = tf.image.non_max_suppression( nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2, topk_valid_boxes_y1x1y2x2,
topk_valid_scores, # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
tf.exp(topk_valid_scores),
max_output_size=post_nms_topk, max_output_size=post_nms_topk,
iou_threshold=config.RPN_PROPOSAL_NMS_THRESH) iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)
...@@ -608,7 +609,6 @@ def fpn_model(features): ...@@ -608,7 +609,6 @@ def fpn_model(features):
# tf.image.resize is, again, not aligned. # tf.image.resize is, again, not aligned.
# with tf.name_scope(name): # with tf.name_scope(name):
# logger.info("Nearest neighbor")
# shape2d = tf.shape(x)[2:] # shape2d = tf.shape(x)[2:]
# x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.transpose(x, [0, 2, 3, 1])
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
......
...@@ -623,5 +623,6 @@ if __name__ == '__main__': ...@@ -623,5 +623,6 @@ if __name__ == '__main__':
max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum, max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,
session_init=get_model_loader(args.load) if args.load else None, session_init=get_model_loader(args.load) if args.load else None,
) )
# nccl mode gives the best speed
trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu(), mode='nccl') trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu(), mode='nccl')
launch_train_with_config(cfg, trainer) launch_train_with_config(cfg, trainer)
...@@ -31,8 +31,8 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/): ...@@ -31,8 +31,8 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/):
This AlexNet script is quite close to the setting in its [original This AlexNet script is quite close to the setting in its [original
paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
Trained with 64x2 batch size, the script reaches 58% single-crop validation Trained with 64x2 batch size, the script reaches 58% single-crop validation
accuracy after 100 epochs. It also generates first-layer filter visualizations accuracy after 100 epochs (21 hours on 2 V100s).
similar to the paper in tensorboard. It also puts in tensorboard the first-layer filter visualizations similar to the paper.
### Inception-BN, VGG16 ### Inception-BN, VGG16
......
...@@ -114,6 +114,8 @@ def resnet_group(name, l, block_func, features, count, stride): ...@@ -114,6 +114,8 @@ def resnet_group(name, l, block_func, features, count, stride):
def resnet_backbone(image, num_blocks, group_func, block_func): def resnet_backbone(image, num_blocks, group_func, block_func):
with argscope(Conv2D, use_bias=False, with argscope(Conv2D, use_bias=False,
kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
# Note that this pads the image by [2, 3] instead of [3, 2].
# Similar things happen in later stride=2 layers as well.
l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment