Commit 759f54b4 authored by Yuxin Wu's avatar Yuxin Wu

[MaskRCNN] add GN

parent ed32de25
......@@ -9,7 +9,7 @@ from tensorpack.utils.argtools import graph_memoized
@graph_memoized
def get_dorefa(bitW, bitA, bitG):
"""
return the three quantization functions fw, fa, fg, for weights, activations and gradients respectively
Return the three quantization functions fw, fa, fg, for weights, activations and gradients respectively
It's unsafe to call this function multiple times with different parameters
"""
def quantize(x, k):
......
......@@ -69,10 +69,12 @@ MaskRCNN results contain both bbox and segm mAP.
| R50-FPN | 37.5 | 37.9<sup>[1](#ft1)</sup> | 28h on 8 V100s | <details><summary>standard</summary>`MODE_MASK=False MODE_FPN=True` </details> |
| R50-C4 | 36.8/32.1 | | 39h on 8 P100s | <details><summary>quick</summary>`MODE_MASK=True FRCNN.BATCH_PER_IM=256`<br/>`TRAIN.LR_SCHEDULE=[150000,230000,280000]` </details> |
| R50-C4 | 37.8/33.1 | 37.8/32.8 | 51h on 8 V100s | <details><summary>standard</summary>`MODE_MASK=True` </details> |
| R50-FPN | 38.1/34.9 | 38.6/34.5<sup>[1](#ft1)</sup> | 38h on 8 V100s | <details><summary>standard</summary>`MODE_MASK=True MODE_FPN=True` </details> |
| R50-FPN | 38.1/34.9 | 38.6/34.5<sup>[1](#ft1)</sup> | 32h on 8 V100s | <details><summary>standard</summary>`MODE_MASK=True MODE_FPN=True` </details> |
| R50-FPN | 38.5/34.8 | 38.6/34.2<sup>[2](#ft2)</sup> | 34h on 8 V100s | <details><summary>standard+convhead</summary>`MODE_MASK=True MODE_FPN=True`<br/>`FPN.FRCNN_HEAD_FUNC=fastrcnn_4conv1fc_head` </details> |
| R101-C4 | 40.8/35.1 | | 63h on 8 V100s | <details><summary>standard</summary>`MODE_MASK=True `<br/>`BACKBONE.RESNET_NUM_BLOCK=[3,4,23,3]` </details> |
<a id="ft1">1</a>: Slightly different configurations.
<a id="ft2">2</a>: Number from [Group Normalization](https://arxiv.org/abs/1803.08494)
The two R50-C4 360k models have the same configuration __and mAP__
as the `R50-C4-2x` entries in
......
......@@ -7,11 +7,39 @@ from tensorpack.tfutils.argscope import argscope, get_arg_scope
from tensorpack.tfutils.scope_utils import auto_reuse_variable_scope
from tensorpack.tfutils.varreplace import custom_getter_scope
from tensorpack.models import (
Conv2D, MaxPooling, BatchNorm, BNReLU)
Conv2D, MaxPooling, BatchNorm, BNReLU, layer_register)
from config import config as cfg
@layer_register(log_shape=True)
def GroupNorm(x, group=32, gamma_initializer=tf.constant_initializer(1.)):
shape = x.get_shape().as_list()
ndims = len(shape)
assert ndims == 4, shape
chan = shape[1]
assert chan % group == 0, chan
group_size = chan // group
orig_shape = tf.shape(x)
h, w = orig_shape[2], orig_shape[3]
x = tf.reshape(x, tf.stack([-1, group, group_size, h, w]))
mean, var = tf.nn.moments(x, [2, 3, 4], keep_dims=True)
new_shape = [1, group, group_size, 1, 1]
beta = tf.get_variable('beta', [chan], initializer=tf.constant_initializer())
beta = tf.reshape(beta, new_shape)
gamma = tf.get_variable('gamma', [chan], initializer=gamma_initializer)
gamma = tf.reshape(gamma, new_shape)
out = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-5, name='output')
return tf.reshape(out, orig_shape, name='output')
def maybe_freeze_affine(getter, *args, **kwargs):
# custom getter to freeze affine params inside bn
name = args[0] if len(args) else kwargs.get('name')
......
......@@ -165,6 +165,7 @@ def finalize_configs(is_training):
size_mult = _C.FPN.RESOLUTION_REQUIREMENT * 1.
_C.PREPROC.MAX_SIZE = np.ceil(_C.PREPROC.MAX_SIZE / size_mult) * size_mult
assert _C.FPN.PROPOSAL_MODE in ['Level', 'Joint']
assert _C.FPN.FRCNN_HEAD_FUNC.endswith('_head')
if is_training:
os.environ['TF_AUTOTUNE_THRESHOLD'] = '1'
......
......@@ -9,6 +9,7 @@ from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.models import (
Conv2D, FullyConnected, layer_register)
from basemodel import GroupNorm
from utils.box_ops import pairwise_iou
from config import config as cfg
......@@ -116,50 +117,6 @@ def fastrcnn_outputs(feature, num_classes):
return classification, box_regression
@layer_register(log_shape=True)
def fastrcnn_2fc_head(feature, num_classes):
"""
Args:
feature (any shape):
num_classes(int): num_category + 1
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
dim = cfg.FPN.FRCNN_FC_HEAD_DIM
init = tf.variance_scaling_initializer()
hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
return fastrcnn_outputs('outputs', hidden, num_classes)
@layer_register(log_shape=True)
def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs):
"""
Args:
feature (any shape):
num_classes(int): num_category + 1
num_convs (int): number of conv layers
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
l = feature
with argscope(Conv2D, data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
for k in range(num_convs):
l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
return fastrcnn_outputs('outputs', l, num_classes)
def fastrcnn_4conv1fc_head(*args, **kwargs):
# This head was used in Group Normalization
return fastrcnn_Xconv1fc_head(*args, num_convs=4, **kwargs)
@under_name_scope()
def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
"""
......@@ -254,3 +211,58 @@ def fastrcnn_predictions(boxes, probs):
filtered_selection = tf.gather(selected_indices, topk_indices)
filtered_selection = tf.reverse(filtered_selection, axis=[1], name='filtered_indices')
return filtered_selection, topk_probs
"""
FC Heads:
"""
@layer_register(log_shape=True)
def fastrcnn_2fc_head(feature, num_classes):
"""
Args:
feature (any shape):
num_classes(int): num_category + 1
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
dim = cfg.FPN.FRCNN_FC_HEAD_DIM
init = tf.variance_scaling_initializer()
hidden = FullyConnected('fc6', feature, dim, kernel_initializer=init, activation=tf.nn.relu)
hidden = FullyConnected('fc7', hidden, dim, kernel_initializer=init, activation=tf.nn.relu)
return fastrcnn_outputs('outputs', hidden, num_classes)
@layer_register(log_shape=True)
def fastrcnn_Xconv1fc_head(feature, num_classes, num_convs, norm=None):
"""
Args:
feature (any shape):
num_classes(int): num_category + 1
num_convs (int): number of conv layers
norm (str or None): either None or 'GN'
Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
"""
l = feature
with argscope(Conv2D, data_format='channels_first',
kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')):
for k in range(num_convs):
l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu)
if norm is not None:
l = GroupNorm('gn{}'.format(k), l)
l = FullyConnected('fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM,
kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu)
return fastrcnn_outputs('outputs', l, num_classes)
def fastrcnn_4conv1fc_head(*args, **kwargs):
return fastrcnn_Xconv1fc_head(*args, num_convs=4, **kwargs)
def fastrcnn_4conv1fc_gn_head(*args, **kwargs):
return fastrcnn_Xconv1fc_head(*args, num_convs=4, norm='GN', **kwargs)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment