Commit e9fa7eb6 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] Slice FPN p2-4; Set config from cmdline

parent c0ae913e
...@@ -57,7 +57,6 @@ MaskRCNN results contain both bbox and segm mAP. ...@@ -57,7 +57,6 @@ MaskRCNN results contain both bbox and segm mAP.
|Backbone|`FASTRCNN_BATCH`|resolution |schedule|mAP (bbox/segm)|Time | |Backbone|`FASTRCNN_BATCH`|resolution |schedule|mAP (bbox/segm)|Time |
| - | - | - | - | - | - | | - | - | - | - | - | - |
|R50-C4 |64 |(600, 1024)|280k |33.1 |18h on 8 V100s| |R50-C4 |64 |(600, 1024)|280k |33.1 |18h on 8 V100s|
|R50-C4 |512 |(800, 1333)|280k |35.6 |55h on 8 P100s|
|R50-C4 |512 |(800, 1333)|360k |36.6 |49h on 8 V100s| |R50-C4 |512 |(800, 1333)|360k |36.6 |49h on 8 V100s|
|R50-FPN |512 |(800, 1333)|360k |37.5 |28h on 8 V100s| |R50-FPN |512 |(800, 1333)|360k |37.5 |28h on 8 V100s|
|R50-C4 |256 |(800, 1333)|280k |36.8/32.1 |39h on 8 P100s| |R50-C4 |256 |(800, 1333)|280k |36.8/32.1 |39h on 8 P100s|
...@@ -69,7 +68,6 @@ MaskRCNN results contain both bbox and segm mAP. ...@@ -69,7 +68,6 @@ MaskRCNN results contain both bbox and segm mAP.
The two R50-C4 360k models have the same configuration __and mAP__ The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in as the `R50-C4-2x` entries in
[Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines). [Detectron Model Zoo](https://github.com/facebookresearch/Detectron/blob/master/MODEL_ZOO.md#end-to-end-faster--mask-r-cnn-baselines).
<!-- So far this is the only public TensorFlow implementation that can reproduce mAP in Detectron. -->
The other models listed here do not correspond to any configurations in Detectron. The other models listed here do not correspond to any configurations in Detectron.
## Notes ## Notes
......
...@@ -146,4 +146,5 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True): ...@@ -146,4 +146,5 @@ def resnet_fpn_backbone(image, num_blocks, freeze_c2=True):
c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2) c4 = resnet_group('group2', c3, resnet_bottleneck, 256, num_blocks[2], 2)
c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2) c5 = resnet_group('group3', c4, resnet_bottleneck, 512, num_blocks[3], 2)
# 32x downsampling up to now # 32x downsampling up to now
# size of c5: ceil(input/32)
return c2, c3, c4, c5 return c2, c3, c4, c5
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# File: common.py # File: common.py
import numpy as np import numpy as np
import six
import cv2 import cv2
from tensorpack.dataflow import RNGDataFlow from tensorpack.dataflow import RNGDataFlow
...@@ -139,6 +140,16 @@ def filter_boxes_inside_shape(boxes, shape): ...@@ -139,6 +140,16 @@ def filter_boxes_inside_shape(boxes, shape):
return indices, boxes[indices, :] return indices, boxes[indices, :]
def write_config_from_args(configs):
for cfg in configs:
k, v = cfg.split('=', maxsplit=1)
assert k in dir(config), "Unknown config key: {}".format(k)
oldv = getattr(config, k)
if not isinstance(oldv, six.text_type):
v = eval(v)
setattr(config, k, v)
def print_config(): def print_config():
logger.info("Config: ------------------------------------------") logger.info("Config: ------------------------------------------")
for k in dir(config): for k in dir(config):
......
...@@ -20,9 +20,11 @@ CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loade ...@@ -20,9 +20,11 @@ CLASS_NAMES = [] # NUM_CLASS strings. Needs to be populated later by data loade
RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50 RESNET_NUM_BLOCK = [3, 4, 6, 3] # for resnet50
# RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101 # RESNET_NUM_BLOCK = [3, 4, 23, 3] # for resnet101
FREEZE_AFFINE = False # do not train affine parameters inside BN FREEZE_AFFINE = False # do not train affine parameters inside BN
# Use a base model with TF-preferred pad mode
# Use a base model with TF-preferred pad mode,
# which may pad more pixels on right/bottom than top/left. # which may pad more pixels on right/bottom than top/left.
# This is probably not good for alignment but we'll have to live with it. # TF_PAD_MODE=False is better for performance but will require a different base model.
# See https://github.com/tensorflow/tensorflow/issues/18213
TF_PAD_MODE = True TF_PAD_MODE = True
# schedule ----------------------- # schedule -----------------------
...@@ -41,7 +43,7 @@ MAX_SIZE = 1333 ...@@ -41,7 +43,7 @@ MAX_SIZE = 1333
# anchors ------------------------- # anchors -------------------------
ANCHOR_STRIDE = 16 ANCHOR_STRIDE = 16
ANCHOR_STRIDES_FPN = (4, 8, 16, 32, 64) # strides for each FPN level. Must be the same length as ANCHOR_SIZES ANCHOR_STRIDES_FPN = (4, 8, 16, 32, 64) # strides for each FPN level. Must be the same length as ANCHOR_SIZES
FPN_RESOLUTION_REQUIREMENT = 32 # image size into the backbone has to be multiple of this number FPN_RESOLUTION_REQUIREMENT = 32 # image size into the backbone has to be multiple of this number
ANCHOR_SIZES = (32, 64, 128, 256, 512) # sqrtarea of the anchor box ANCHOR_SIZES = (32, 64, 128, 256, 512) # sqrtarea of the anchor box
ANCHOR_RATIOS = (0.5, 1., 2.) ANCHOR_RATIOS = (0.5, 1., 2.)
NUM_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS) NUM_ANCHOR = len(ANCHOR_SIZES) * len(ANCHOR_RATIOS)
......
...@@ -45,7 +45,7 @@ from data import ( ...@@ -45,7 +45,7 @@ from data import (
from viz import ( from viz import (
draw_annotation, draw_proposal_recall, draw_annotation, draw_proposal_recall,
draw_predictions, draw_final_outputs) draw_predictions, draw_final_outputs)
from common import print_config from common import print_config, write_config_from_args
from eval import ( from eval import (
eval_coco, detect_one_image, print_evaluation_scores, DetectionResult) eval_coco, detect_one_image, print_evaluation_scores, DetectionResult)
import config import config
...@@ -332,6 +332,14 @@ class ResNetFPNModel(DetectionModel): ...@@ -332,6 +332,14 @@ class ResNetFPNModel(DetectionModel):
c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK) c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK)
p23456 = fpn_model('fpn', c2345) p23456 = fpn_model('fpn', c2345)
# images are padded for p5, which are too large for p2-p4
for i, stride in enumerate(config.ANCHOR_STRIDES_FPN[:3]):
pi = p23456[i]
target_shape = tf.to_int32(tf.ceil(tf.to_float(image_shape2d) * (1.0 / stride)))
p23456[i] = tf.slice(pi, [0, 0, 0, 0],
tf.concat([[-1, -1], target_shape], axis=0))
p23456[i].set_shape([1, pi.shape[1], None, None])
# Multi-Level RPN Proposals # Multi-Level RPN Proposals
multilevel_proposals = [] multilevel_proposals = []
rpn_loss_collection = [] rpn_loss_collection = []
...@@ -429,8 +437,7 @@ class ResNetFPNModel(DetectionModel): ...@@ -429,8 +437,7 @@ class ResNetFPNModel(DetectionModel):
image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits)
if config.MODE_MASK: if config.MODE_MASK:
# Cascade inference needs roi transform with refined boxes. # Cascade inference needs roi transform with refined boxes.
roi_feature_maskrcnn = multilevel_roi_align( roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14)
p23456[:4], final_boxes, 14)
mask_logits = maskrcnn_upXconv_head( mask_logits = maskrcnn_upXconv_head(
'maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28 'maskrcnn', roi_feature_maskrcnn, config.NUM_CLASS, 4) # #fg x #cat x 28 x 28
indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1)
...@@ -560,15 +567,16 @@ if __name__ == '__main__': ...@@ -560,15 +567,16 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--load', help='load a model for evaluation or training') parser.add_argument('--load', help='load a model for evaluation or training')
parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn') parser.add_argument('--logdir', help='log directory', default='train_log/maskrcnn')
parser.add_argument('--datadir', help='override config.BASEDIR')
parser.add_argument('--visualize', action='store_true', help='visualize intermediate results') parser.add_argument('--visualize', action='store_true', help='visualize intermediate results')
parser.add_argument('--evaluate', help="Run evaluation on COCO. " parser.add_argument('--evaluate', help="Run evaluation on COCO. "
"This argument is the path to the output json evaluation file") "This argument is the path to the output json evaluation file")
parser.add_argument('--predict', help="Run prediction on a given image. " parser.add_argument('--predict', help="Run prediction on a given image. "
"This argument is the path to the input image file") "This argument is the path to the input image file")
parser.add_argument('--config', help="A list of key=value to overwrite those defined in config.py",
nargs='+')
args = parser.parse_args() args = parser.parse_args()
if args.datadir: write_config_from_args(args.config)
config.BASEDIR = args.datadir
if args.visualize or args.evaluate or args.predict: if args.visualize or args.evaluate or args.predict:
# autotune is too slow for inference # autotune is too slow for inference
...@@ -619,8 +627,8 @@ if __name__ == '__main__': ...@@ -619,8 +627,8 @@ if __name__ == '__main__':
mult = 0.1 ** (idx + 1) mult = 0.1 ** (idx + 1)
lr_schedule.append( lr_schedule.append(
(steps * factor // stepnum, config.BASE_LR * mult)) (steps * factor // stepnum, config.BASE_LR * mult))
logger.info("Warmup Up Schedule: " + str(warmup_schedule)) logger.info("Warm Up Schedule (steps, value): " + str(warmup_schedule))
logger.info("LR Schedule: " + str(lr_schedule)) logger.info("LR Schedule (epochs, value): " + str(lr_schedule))
callbacks = [ callbacks = [
PeriodicCallback( PeriodicCallback(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment