Commit 099975c5 authored by Yuxin Wu's avatar Yuxin Wu

update docs

parent 0ee4d8b0
# Faster-RCNN / Mask-RCNN on COCO # Faster-RCNN / Mask-RCNN on COCO
This example provides a minimal (only 1.6k lines) but faithful implementation of This example provides a minimal (only 1.6k lines) but faithful implementation of the following papers:
the following papers:
+ [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497) + [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497)
+ [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144) + [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)
...@@ -70,7 +69,7 @@ MaskRCNN results contain both bbox and segm mAP. ...@@ -70,7 +69,7 @@ MaskRCNN results contain both bbox and segm mAP.
The two R50-C4 360k models have the same configuration __and mAP__ The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines). [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. <!-- So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. -->
The other models listed here do not correspond to any configurations in Detectron. The other models listed here do not correspond to any configurations in Detectron.
## Notes ## Notes
......
...@@ -10,6 +10,7 @@ from tensorpack.utils.argtools import memoized, log_once ...@@ -10,6 +10,7 @@ from tensorpack.utils.argtools import memoized, log_once
from tensorpack.dataflow import ( from tensorpack.dataflow import (
imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ, imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ,
MapDataComponent, DataFromList) MapDataComponent, DataFromList)
from tensorpack.utils import logger
# import tensorpack.utils.viz as tpviz # import tensorpack.utils.viz as tpviz
from coco import COCODetection from coco import COCODetection
...@@ -277,7 +278,10 @@ def get_train_dataflow(): ...@@ -277,7 +278,10 @@ def get_train_dataflow():
# Valid training images should have at least one fg box. # Valid training images should have at least one fg box.
# But this filter shall not be applied for testing. # But this filter shall not be applied for testing.
num = len(imgs)
imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training imgs = list(filter(lambda img: len(img['boxes']) > 0, imgs)) # log invalid training
logger.info("Filtered {} images which contain no groudtruth boxes. Total #images for training: {}".format(
num - len(imgs), len(imgs)))
ds = DataFromList(imgs, shuffle=True) ds = DataFromList(imgs, shuffle=True)
......
...@@ -99,7 +99,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits): ...@@ -99,7 +99,7 @@ def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
add_moving_summary(*summaries) add_moving_summary(*summaries)
# Per-level loss summaries in FPN may appear lower due to the use of a small placeholder. # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
# But the total loss is still the same. # But the total loss is still the same. TODO make the summary op smarter
placeholder = 0. placeholder = 0.
label_loss = tf.nn.sigmoid_cross_entropy_with_logits( label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits) labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
...@@ -217,7 +217,8 @@ def generate_rpn_proposals(boxes, scores, img_shape, ...@@ -217,7 +217,8 @@ def generate_rpn_proposals(boxes, scores, img_shape,
(-1, 4), name='nms_input_boxes') (-1, 4), name='nms_input_boxes')
nms_indices = tf.image.non_max_suppression( nms_indices = tf.image.non_max_suppression(
topk_valid_boxes_y1x1y2x2, topk_valid_boxes_y1x1y2x2,
topk_valid_scores, # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
tf.exp(topk_valid_scores),
max_output_size=post_nms_topk, max_output_size=post_nms_topk,
iou_threshold=config.RPN_PROPOSAL_NMS_THRESH) iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)
...@@ -608,7 +609,6 @@ def fpn_model(features): ...@@ -608,7 +609,6 @@ def fpn_model(features):
# tf.image.resize is, again, not aligned. # tf.image.resize is, again, not aligned.
# with tf.name_scope(name): # with tf.name_scope(name):
# logger.info("Nearest neighbor")
# shape2d = tf.shape(x)[2:] # shape2d = tf.shape(x)[2:]
# x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.transpose(x, [0, 2, 3, 1])
# x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True)
......
...@@ -623,5 +623,6 @@ if __name__ == '__main__': ...@@ -623,5 +623,6 @@ if __name__ == '__main__':
max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum, max_epoch=config.LR_SCHEDULE[-1] * factor // stepnum,
session_init=get_model_loader(args.load) if args.load else None, session_init=get_model_loader(args.load) if args.load else None,
) )
# nccl mode gives the best speed
trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu(), mode='nccl') trainer = SyncMultiGPUTrainerReplicated(get_nr_gpu(), mode='nccl')
launch_train_with_config(cfg, trainer) launch_train_with_config(cfg, trainer)
...@@ -31,8 +31,8 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/): ...@@ -31,8 +31,8 @@ Evaluate the [pretrained model](http://models.tensorpack.com/ShuffleNet/):
This AlexNet script is quite close to the setting in its [original This AlexNet script is quite close to the setting in its [original
paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks). paper](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks).
Trained with 64x2 batch size, the script reaches 58% single-crop validation Trained with 64x2 batch size, the script reaches 58% single-crop validation
accuracy after 100 epochs. It also generates first-layer filter visualizations accuracy after 100 epochs (21 hours on 2 V100s).
similar to the paper in tensorboard. It also puts in tensorboard the first-layer filter visualizations similar to the paper.
### Inception-BN, VGG16 ### Inception-BN, VGG16
......
...@@ -114,6 +114,8 @@ def resnet_group(name, l, block_func, features, count, stride): ...@@ -114,6 +114,8 @@ def resnet_group(name, l, block_func, features, count, stride):
def resnet_backbone(image, num_blocks, group_func, block_func): def resnet_backbone(image, num_blocks, group_func, block_func):
with argscope(Conv2D, use_bias=False, with argscope(Conv2D, use_bias=False,
kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
# Note that this pads the image by [2, 3] instead of [3, 2].
# Similar things happen in later stride=2 layers as well.
l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU) l = Conv2D('conv0', image, 64, 7, strides=2, activation=BNReLU)
l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME') l = MaxPooling('pool0', l, pool_size=3, strides=2, padding='SAME')
l = group_func('group0', l, block_func, 64, num_blocks[0], 1) l = group_func('group0', l, block_func, 64, num_blocks[0], 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment