Commit 141ab53c authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] use un-quantized anchors; use better postprocessing; use 1x schedule

parent ae7b0774
......@@ -54,11 +54,8 @@ Model:
4. Because of (3), BatchNorm statistics are supposed to be freezed during fine-tuning.
5. An alternative to freezing BatchNorm is to sync BatchNorm statistics across
GPUs (the `BACKBONE.NORM=SyncBN` option). This would require [my bugfix](https://github.com/tensorflow/tensorflow/pull/20360)
which is available since TF 1.10. You can manually apply the patch to use it.
For now the total batch size is at most 8, so this option does not improve the model by much.
6. Another alternative to BatchNorm is GroupNorm (`BACKBONE.NORM=GN`) which has better performance.
GPUs (the `BACKBONE.NORM=SyncBN` option).
Another alternative to BatchNorm is GroupNorm (`BACKBONE.NORM=GN`) which has better performance.
Efficiency:
......@@ -82,6 +79,14 @@ Efficiency:
1. Inference is unoptimized. Tensorpack is a training interface, therefore it
does not help you on optimized inference.
1. To reduce RAM usage on host: (1) make sure you're using the "spawn" method as
set in `train.py`; (2) reduce `buffer_size` or `NUM_WORKERS` in `data.py`
(which may negatively impact your throughput). The training needs <10G RAM if `NUM_WORKERS=0`.
1. Inference is unoptimized. Tensorpack is a training interface, therefore it
does not help you on optimized inference. In fact, the current implementation
uses some slow numpy operations in inference (in `eval.py:_paste_mask`).
Possible Future Enhancements:
1. Define a better interface to load different datasets.
......
This diff is collapsed.
......@@ -130,8 +130,8 @@ _C.TRAIN.STARTING_EPOCH = 1 # the first epoch to start with, useful to continue
# the base learning rate are computed from BASE_LR and LR_SCHEDULE.
# Therefore, there is *no need* to modify the config if you only change the number of GPUs.
# _C.TRAIN.LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
_C.TRAIN.LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
_C.TRAIN.LR_SCHEDULE = [120000, 160000, 180000] # "1x" schedule in detectron
# _C.TRAIN.LR_SCHEDULE = [240000, 320000, 360000] # "2x" schedule in detectron
# Longer schedules for from-scratch training (https://arxiv.org/abs/1811.08883):
# _C.TRAIN.LR_SCHEDULE = [960000, 1040000, 1080000] # "6x" schedule in detectron
# _C.TRAIN.LR_SCHEDULE = [1500000, 1580000, 1620000] # "9x" schedule in detectron
......
This diff is collapsed.
......@@ -13,6 +13,7 @@ from contextlib import ExitStack
import cv2
import pycocotools.mask as cocomask
import tqdm
from scipy import interpolate
from tensorpack.callbacks import Callback
from tensorpack.tfutils.common import get_tf_version_tuple
......@@ -41,6 +42,23 @@ mask: None, or a binary image of the original image shape
"""
def _scale_box(box, scale):
w_half = (box[2] - box[0]) * 0.5
h_half = (box[3] - box[1]) * 0.5
x_c = (box[2] + box[0]) * 0.5
y_c = (box[3] + box[1]) * 0.5
w_half *= scale
h_half *= scale
scaled_box = np.zeros_like(box)
scaled_box[0] = x_c - w_half
scaled_box[2] = x_c + w_half
scaled_box[1] = y_c - h_half
scaled_box[3] = y_c + h_half
return scaled_box
def _paste_mask(box, mask, shape):
"""
Args:
......@@ -50,6 +68,25 @@ def _paste_mask(box, mask, shape):
Returns:
A uint8 binary image of hxw.
"""
assert mask.shape[0] == mask.shape[1], mask.shape
if True:
# This method is accurate but much slower.
mask = np.pad(mask, [(1, 1), (1, 1)], mode='constant')
box = _scale_box(box, float(mask.shape[0]) / (mask.shape[0] - 2))
mask_pixels = np.arange(0.0, mask.shape[0]) + 0.5
mask_continuous = interpolate.interp2d(mask_pixels, mask_pixels, mask, fill_value=0.0)
h, w = shape
ys = np.arange(0.0, h) + 0.5
xs = np.arange(0.0, w) + 0.5
ys = (ys - box[1]) / (box[3] - box[1]) * mask.shape[0]
xs = (xs - box[0]) / (box[2] - box[0]) * mask.shape[1]
res = mask_continuous(xs, ys)
return (res >= 0.5).astype('uint8')
else:
# This method (inspired by Detectron) is less accurate but fast.
# int() is floor
# box fpcoor=0.0 -> intcoor=0.0
x0, y0 = list(map(int, box[:2] + 0.5))
......@@ -82,7 +119,6 @@ def predict_image(img, model_func):
Returns:
[DetectionResult]
"""
orig_shape = img.shape[:2]
resizer = CustomResize(cfg.PREPROC.TEST_SHORT_EDGE_SIZE, cfg.PREPROC.MAX_SIZE)
resized_img = resizer.augment(img)
......
# Some third-party helper functions
+ generate_anchors.py: copied from [py-faster-rcnn](https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py).
+ box_ops.py: modified from [TF object detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/core/box_list_ops.py).
+ np_box_ops.py: copied from [TF object detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/np_box_ops.py).
# https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/rpn/generate_anchors.py
# --------------------------------------------------------
# Faster R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick and Sean Bell
# --------------------------------------------------------
import numpy as np
from six.moves import range
# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# >> anchors
#
# anchors =
#
# -83 -39 100 56
# -175 -87 192 104
# -359 -183 376 200
# -55 -55 72 72
# -119 -119 136 136
# -247 -247 264 264
# -35 -79 52 96
# -79 -167 96 184
# -167 -343 184 360
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
scales=2**np.arange(3, 6)):
"""
Generate anchor (reference) windows by enumerating aspect ratios X
scales wrt a reference (0, 0, 15, 15) window.
"""
base_anchor = np.array([1, 1, base_size, base_size], dtype='float32') - 1
ratio_anchors = _ratio_enum(base_anchor, ratios)
anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
for i in range(ratio_anchors.shape[0])])
return anchors
def _whctrs(anchor):
"""
Return width, height, x center, and y center for an anchor (window).
"""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
Given a vector of widths (ws) and heights (hs) around a center
(x_ctr, y_ctr), output a set of anchors (windows).
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1),
y_ctr + 0.5 * (hs - 1)))
return anchors
def _ratio_enum(anchor, ratios):
"""
Enumerate a set of anchors for each aspect ratio wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _scale_enum(anchor, scales):
"""
Enumerate a set of anchors for each scale wrt an anchor.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment