Commit a19eb489 authored by Yuxin Wu's avatar Yuxin Wu

Merge branch 'fpn'

parents 804c5ee6 4fe9e5b1
# Faster-RCNN / Mask-RCNN on COCO
This example aims to provide a minimal (1.3k lines) implementation of
end-to-end Faster-RCNN & Mask-RCNN (with ResNet backbones) on COCO.
This example provides a minimal (only 1.6k lines) but faithful implementation the
following papers in combination:
+ [Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks](https://arxiv.org/abs/1506.01497)
+ [Feature Pyramid Networks for Object Detection](https://arxiv.org/abs/1612.03144)
+ [Mask R-CNN](https://arxiv.org/abs/1703.06870)
## Dependencies
+ Python 3; TensorFlow >= 1.4.0
+ Python 3; TensorFlow >= 1.4.0 (>=1.6.0 recommended due to a TF bug);
+ [pycocotools](https://github.com/pdollar/coco/tree/master/PythonAPI/pycocotools), OpenCV.
+ Pre-trained [ResNet model](http://models.tensorpack.com/ResNet/) from tensorpack model zoo.
+ COCO data. It assumes the following directory structure:
......@@ -53,18 +57,21 @@ MaskRCNN results contain both bbox and segm mAP.
|Backbone|`FASTRCNN_BATCH`|resolution |schedule|mAP (bbox/segm)|Time |
| - | - | - | - | - | - |
|R-50 |64 |(600, 1024)|280k |33.1 |18h on 8 V100s|
|R-50 |512 |(800, 1333)|280k |35.6 |55h on 8 P100s|
|R-50 |512 |(800, 1333)|360k |36.6 |49h on 8 V100s|
|R-50 |256 |(800, 1333)|280k |36.8/32.1 |39h on 8 P100s|
|R-50 |512 |(800, 1333)|360k |37.8/33.2 |51h on 8 V100s|
|R-101 |512 |(800, 1333)|280k |40.1/34.4 |70h on 8 P100s|
|R-101 |512 |(800, 1333)|360k |40.8/35.1 |63h on 8 V100s|
|R50-C4 |64 |(600, 1024)|280k |33.1 |18h on 8 V100s|
|R50-C4 |512 |(800, 1333)|280k |35.6 |55h on 8 P100s|
|R50-C4 |512 |(800, 1333)|360k |36.6 |49h on 8 V100s|
|R50-FPN |512 |(800, 1333)|360k |37.5 |28h on 8 V100s|
|R50-C4 |256 |(800, 1333)|280k |36.8/32.1 |39h on 8 P100s|
|R50-C4 |512 |(800, 1333)|360k |37.8/33.2 |51h on 8 V100s|
|R50-FPN |512 |(800, 1333)|360k |38.1/34.9 |38h on 8 V100s|
|R101-C4 |512 |(800, 1333)|280k |40.1/34.4 |70h on 8 P100s|
|R101-C4 |512 |(800, 1333)|360k |40.8/35.1 |63h on 8 V100s|
The two R-50 360k models have the same configuration __and mAP__
The two R-50 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
So far this seems to be the only open source re-implementation that can reproduce mAP in Detectron.
So far this is the only TensorFlow implementation that can reproduce mAP in Detectron.
The other models listed here do not correspond to any configurations in Detectron.
## Notes
......
......@@ -92,7 +92,7 @@ def resnet_group(l, name, block_func, features, count, stride):
return l
def pretrained_resnet_c4_backbone(image, num_blocks, freeze_c2=True):
def resnet_c4_backbone(image, num_blocks, freeze_c2=True):
assert len(num_blocks) == 3
with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
......@@ -116,10 +116,19 @@ def resnet_conv5(image, num_block):
return l
def pretrained_resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
shape2d = tf.shape(image)[2:]
mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
new_shape2d = tf.to_int32(tf.ceil(tf.to_float(shape2d) / mult) * mult)
pad_shape2d = new_shape2d - shape2d
assert len(num_blocks) == 4
# TODO pad 1 at each stage
with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
chan = image.shape[1]
l = tf.pad(image,
tf.stack([[0, 0], [0, 0],
[2, 3 + pad_shape2d[0]], [2, 3 + pad_shape2d[1]]]))
l.set_shape([None, chan, None, None])
l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
......
......@@ -5,6 +5,7 @@ import numpy as np
# mode flags ---------------------
MODE_MASK = True
MODE_FPN = False
# dataset -----------------------
BASEDIR = '/path/to/your/COCO/DIR'
......@@ -34,6 +35,7 @@ MAX_SIZE = 1333
# anchors -------------------------
ANCHOR_STRIDE = 16
ANCHOR_STRIDES_FPN = (4, 8, 16, 32, 64) # strides for each FPN level. Must be the same length as ANCHOR_SIZES
FPN_RESOLUTION_REQUIREMENT = 32 # image size into the backbone has to be multiple of this number
ANCHOR_SIZES = (32, 64, 128, 256, 512) # sqrtarea of the anchor box
ANCHOR_RATIOS = (0.5, 1., 2.)
NUM_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS)
......@@ -48,6 +50,7 @@ RPN_MIN_SIZE = 0
RPN_PROPOSAL_NMS_THRESH = 0.7
TRAIN_PRE_NMS_TOPK = 12000
TRAIN_POST_NMS_TOPK = 2000
TRAIN_FPN_NMS_TOPK = 2000
CROWD_OVERLAP_THRES = 0.7 # boxes overlapping crowd will be ignored.
# fastrcnn training ---------------------
......@@ -56,15 +59,16 @@ FASTRCNN_BBOX_REG_WEIGHTS = np.array([10, 10, 5, 5], dtype='float32')
FASTRCNN_FG_THRESH = 0.5
FASTRCNN_FG_RATIO = 0.25 # fg ratio in a ROI batch
# modeling -------------------------
FPN_NUM_CHANNEL = 256
FASTRCNN_FC_HEAD_DIM = 1024
MASKRCNN_HEAD_DIM = 256
# testing -----------------------
TEST_PRE_NMS_TOPK = 6000
TEST_POST_NMS_TOPK = 1000 # if you encounter OOM in inference, set this to a smaller number
TEST_FPN_NMS_TOPK = 1000
FASTRCNN_NMS_THRESH = 0.5
RESULT_SCORE_THRESH = 0.05
RESULT_SCORE_THRESH_VIS = 0.3 # only visualize confident results
RESULTS_PER_IM = 100
# TODO Not Functioning. Don't USE
MODE_FPN = False
FPN_NUM_CHANNEL = 256
FPN_SIZE_REQUIREMENT = 32
......@@ -8,7 +8,7 @@ import itertools
from tensorpack.utils.argtools import memoized, log_once
from tensorpack.dataflow import (
imgaug, TestDataSpeed, PrefetchDataZMQ, MapData,
imgaug, TestDataSpeed, PrefetchDataZMQ, MultiProcessMapDataZMQ,
MapDataComponent, DataFromList)
# import tensorpack.utils.viz as tpviz
......@@ -51,7 +51,12 @@ def get_all_anchors(
# anchors are intbox here.
# anchors at featuremap [0,0] are centered at fpcoor (8,8) (half of stride)
field_size = int(np.ceil(config.MAX_SIZE / stride))
max_size = config.MAX_SIZE
if config.MODE_FPN:
# TODO setting this in config is perhaps better
size_mult = config.FPN_RESOLUTION_REQUIREMENT * 1.
max_size = np.ceil(max_size / size_mult) * size_mult
field_size = int(np.ceil(max_size / stride))
shifts = np.arange(0, field_size) * stride
shift_x, shift_y = np.meshgrid(shifts, shifts)
shift_x = shift_x.flatten()
......@@ -136,17 +141,19 @@ def get_anchor_labels(anchors, gt_boxes, crowd_boxes):
overlap_with_crowd = cand_inds[ious.max(axis=1) > config.CROWD_OVERLAP_THRES]
anchor_labels[overlap_with_crowd] = -1
# Filter fg labels: ignore some fg if fg is too many
# Subsample fg labels: ignore some fg if fg is too many
target_num_fg = int(config.RPN_BATCH_PER_IM * config.RPN_FG_RATIO)
fg_inds = filter_box_label(anchor_labels, 1, target_num_fg)
if len(fg_inds) == 0:
raise MalformedData("No valid foreground for RPN!")
# Note that fg could be fewer than the target ratio
# filter bg labels. num_bg is not allowed to be too many
# Subsample bg labels. num_bg is not allowed to be too many
old_num_bg = np.sum(anchor_labels == 0)
if old_num_bg == 0 or len(fg_inds) == 0:
if old_num_bg == 0:
# No valid bg/fg in this image, skip.
# This can happen if, e.g. the image has large crowd.
raise MalformedData("No valid foreground/background for RPN!")
raise MalformedData("No valid background for RPN!")
target_num_bg = config.RPN_BATCH_PER_IM - len(fg_inds)
filter_box_label(anchor_labels, 0, target_num_bg) # ignore return values
......@@ -336,8 +343,7 @@ def get_train_dataflow(add_mask=False):
# tpviz.interactive_imshow(viz)
return ret
ds = MapData(ds, preprocess)
ds = PrefetchDataZMQ(ds, 1)
ds = MultiProcessMapDataZMQ(ds, 10, preprocess)
return ds
......@@ -359,7 +365,6 @@ if __name__ == '__main__':
import os
from tensorpack.dataflow import PrintData
config.BASEDIR = os.path.expanduser('~/data/coco')
config.TRAIN_DATASET = ['train2014']
ds = get_train_dataflow(add_mask=config.MODE_MASK)
ds = PrintData(ds, 100)
TestDataSpeed(ds, 50000).start()
......
......@@ -141,12 +141,15 @@ def print_evaluation_scores(json_file):
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
ret['mAP(bbox)'] = cocoEval.stats[0]
fields = ['IoU=0.5:0.95', 'IoU=0.5', 'IoU=0.75', 'small', 'medium', 'large']
for k in range(6):
ret['mAP(bbox)/' + fields[k]] = cocoEval.stats[k]
if config.MODE_MASK:
cocoEval = COCOeval(coco, cocoDt, 'segm')
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
ret['mAP(segm)'] = cocoEval.stats[0]
for k in range(6):
ret['mAP(segm)/' + fields[k]] = cocoEval.stats[k]
return ret
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment