Commit e44e9c04 authored by Yuxin Wu's avatar Yuxin Wu

migrate all examples to use tf.layers naming convention (#627)

parent fe33c833
...@@ -13,7 +13,7 @@ It's Yet Another TF wrapper, but different in: ...@@ -13,7 +13,7 @@ It's Yet Another TF wrapper, but different in:
1. Focus on __training speed__. 1. Focus on __training speed__.
+ Speed comes for free with tensorpack -- it uses TensorFlow in the __efficient way__ with no extra overhead. + Speed comes for free with tensorpack -- it uses TensorFlow in the __efficient way__ with no extra overhead.
On different CNNs, it runs [1.1~3.5x faster](https://github.com/tensorpack/benchmarks/tree/master/other-wrappers) than the equivalent Keras code. On different CNNs, it runs [1.2~4x faster](https://github.com/tensorpack/benchmarks/tree/master/other-wrappers) than the equivalent Keras code.
+ Data-parallel multi-GPU training is off-the-shelf to use. It scales as well as Google's [official benchmark](https://www.tensorflow.org/performance/benchmarks). + Data-parallel multi-GPU training is off-the-shelf to use. It scales as well as Google's [official benchmark](https://www.tensorflow.org/performance/benchmarks).
......
...@@ -24,9 +24,9 @@ def maybe_freeze_affine(getter, *args, **kwargs): ...@@ -24,9 +24,9 @@ def maybe_freeze_affine(getter, *args, **kwargs):
@contextmanager @contextmanager
def resnet_argscope(): def resnet_argscope():
with argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'), \ with argscope([Conv2D, MaxPooling, BatchNorm], data_format='channels_first'), \
argscope(Conv2D, use_bias=False), \ argscope(Conv2D, use_bias=False), \
argscope(BatchNorm, use_local_stat=False), \ argscope(BatchNorm, training=False), \
custom_getter_scope(maybe_freeze_affine): custom_getter_scope(maybe_freeze_affine):
yield yield
...@@ -50,36 +50,36 @@ def image_preprocess(image, bgr=True): ...@@ -50,36 +50,36 @@ def image_preprocess(image, bgr=True):
def get_bn(zero_init=False): def get_bn(zero_init=False):
if zero_init: if zero_init:
return lambda x, name: BatchNorm('bn', x, gamma_init=tf.zeros_initializer()) return lambda x, name=None: BatchNorm('bn', x, gamma_init=tf.zeros_initializer())
else: else:
return lambda x, name: BatchNorm('bn', x) return lambda x, name=None: BatchNorm('bn', x)
def resnet_shortcut(l, n_out, stride, nl=tf.identity): def resnet_shortcut(l, n_out, stride, activation=tf.identity):
data_format = get_arg_scope()['Conv2D']['data_format'] data_format = get_arg_scope()['Conv2D']['data_format']
n_in = l.get_shape().as_list()[1 if data_format == 'NCHW' else 3] n_in = l.get_shape().as_list()[1 if data_format in ['NCHW', 'channels_first'] else 3]
if n_in != n_out: # change dimension when channel is not the same if n_in != n_out: # change dimension when channel is not the same
if stride == 2: if stride == 2:
l = l[:, :, :-1, :-1] l = l[:, :, :-1, :-1]
return Conv2D('convshortcut', l, n_out, 1, return Conv2D('convshortcut', l, n_out, 1,
stride=stride, padding='VALID', nl=nl) strides=stride, padding='VALID', activation=activation)
else: else:
return Conv2D('convshortcut', l, n_out, 1, return Conv2D('convshortcut', l, n_out, 1,
stride=stride, nl=nl) strides=stride, activation=activation)
else: else:
return l return l
def resnet_bottleneck(l, ch_out, stride): def resnet_bottleneck(l, ch_out, stride):
l, shortcut = l, l l, shortcut = l, l
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU)
if stride == 2: if stride == 2:
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = Conv2D('conv2', l, ch_out, 3, stride=2, nl=BNReLU, padding='VALID') l = Conv2D('conv2', l, ch_out, 3, strides=2, activation=BNReLU, padding='VALID')
else: else:
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1, nl=get_bn(zero_init=True)) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
return l + resnet_shortcut(shortcut, ch_out * 4, stride, nl=get_bn(zero_init=False)) return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
def resnet_group(l, name, block_func, features, count, stride): def resnet_group(l, name, block_func, features, count, stride):
...@@ -97,9 +97,9 @@ def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True): ...@@ -97,9 +97,9 @@ def pretrained_resnet_conv4(image, num_blocks, freeze_c2=True):
assert len(num_blocks) == 3 assert len(num_blocks) == 3
with resnet_argscope(): with resnet_argscope():
l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]]) l = tf.pad(image, [[0, 0], [0, 0], [2, 3], [2, 3]])
l = Conv2D('conv0', l, 64, 7, stride=2, nl=BNReLU, padding='VALID') l = Conv2D('conv0', l, 64, 7, strides=2, activation=BNReLU, padding='VALID')
l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 1], [0, 1]])
l = MaxPooling('pool0', l, shape=3, stride=2, padding='VALID') l = MaxPooling('pool0', l, 3, strides=2, padding='VALID')
c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1) c2 = resnet_group(l, 'group0', resnet_bottleneck, 64, num_blocks[0], 1)
# TODO replace var by const to enable optimization # TODO replace var by const to enable optimization
if freeze_c2: if freeze_c2:
......
...@@ -8,7 +8,7 @@ from tensorpack.tfutils.summary import add_moving_summary ...@@ -8,7 +8,7 @@ from tensorpack.tfutils.summary import add_moving_summary
from tensorpack.tfutils.argscope import argscope from tensorpack.tfutils.argscope import argscope
from tensorpack.tfutils.scope_utils import under_name_scope from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.models import ( from tensorpack.models import (
Conv2D, FullyConnected, GlobalAvgPooling, layer_register, Deconv2D) Conv2D, FullyConnected, GlobalAvgPooling, layer_register, Conv2DTranspose)
from utils.box_ops import pairwise_iou from utils.box_ops import pairwise_iou
import config import config
...@@ -34,9 +34,9 @@ def rpn_head(featuremap, channel, num_anchors): ...@@ -34,9 +34,9 @@ def rpn_head(featuremap, channel, num_anchors):
label_logits: fHxfWxNA label_logits: fHxfWxNA
box_logits: fHxfWxNAx4 box_logits: fHxfWxNAx4
""" """
with argscope(Conv2D, data_format='NCHW', with argscope(Conv2D, data_format='channels_first',
W_init=tf.random_normal_initializer(stddev=0.01)): kernel_initializer=tf.random_normal_initializer(stddev=0.01)):
hidden = Conv2D('conv0', featuremap, channel, 3, nl=tf.nn.relu) hidden = Conv2D('conv0', featuremap, channel, 3, activation=tf.nn.relu)
label_logits = Conv2D('class', hidden, num_anchors, 1) label_logits = Conv2D('class', hidden, num_anchors, 1)
box_logits = Conv2D('box', hidden, 4 * num_anchors, 1) box_logits = Conv2D('box', hidden, 4 * num_anchors, 1)
...@@ -384,13 +384,13 @@ def fastrcnn_head(feature, num_classes): ...@@ -384,13 +384,13 @@ def fastrcnn_head(feature, num_classes):
Returns: Returns:
cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4) cls_logits (Nxnum_class), reg_logits (Nx num_class-1 x 4)
""" """
feature = GlobalAvgPooling('gap', feature, data_format='NCHW') feature = GlobalAvgPooling('gap', feature, data_format='channels_first')
classification = FullyConnected( classification = FullyConnected(
'class', feature, num_classes, 'class', feature, num_classes,
W_init=tf.random_normal_initializer(stddev=0.01)) kernel_initializer=tf.random_normal_initializer(stddev=0.01))
box_regression = FullyConnected( box_regression = FullyConnected(
'box', feature, (num_classes - 1) * 4, 'box', feature, (num_classes - 1) * 4,
W_init=tf.random_normal_initializer(stddev=0.001)) kernel_initializer=tf.random_normal_initializer(stddev=0.001))
box_regression = tf.reshape(box_regression, (-1, num_classes - 1, 4)) box_regression = tf.reshape(box_regression, (-1, num_classes - 1, 4))
return classification, box_regression return classification, box_regression
...@@ -501,11 +501,11 @@ def maskrcnn_head(feature, num_class): ...@@ -501,11 +501,11 @@ def maskrcnn_head(feature, num_class):
Returns: Returns:
mask_logits (N x num_category x 14 x 14): mask_logits (N x num_category x 14 x 14):
""" """
with argscope([Conv2D, Deconv2D], data_format='NCHW', with argscope([Conv2D, Conv2DTranspose], data_format='channels_first',
W_init=tf.variance_scaling_initializer( kernel_initializer=tf.variance_scaling_initializer(
scale=2.0, mode='fan_out', distribution='normal')): scale=2.0, mode='fan_out', distribution='normal')):
# c2's MSRAFill is fan_out # c2's MSRAFill is fan_out
l = Deconv2D('deconv', feature, 256, 2, stride=2, nl=tf.nn.relu) l = Conv2DTranspose('deconv', feature, 256, 2, strides=2, activation=tf.nn.relu)
l = Conv2D('conv', l, num_class - 1, 1) l = Conv2D('conv', l, num_class - 1, 1)
return l return l
......
...@@ -10,12 +10,9 @@ import cv2 ...@@ -10,12 +10,9 @@ import cv2
import tensorflow as tf import tensorflow as tf
from tensorpack import logger, QueueInput, InputDesc, PlaceholderInput, TowerContext from tensorpack import *
from tensorpack.models import *
from tensorpack.callbacks import *
from tensorpack.train import *
from tensorpack.dataflow import imgaug from tensorpack.dataflow import imgaug
from tensorpack.tfutils import argscope, get_model_loader from tensorpack.tfutils import argscope, get_model_loader, model_utils
from tensorpack.tfutils.scope_utils import under_name_scope from tensorpack.tfutils.scope_utils import under_name_scope
from tensorpack.utils.gpu import get_nr_gpu from tensorpack.utils.gpu import get_nr_gpu
...@@ -55,7 +52,7 @@ def channel_shuffle(l, group): ...@@ -55,7 +52,7 @@ def channel_shuffle(l, group):
return l return l
def BN(x, name): def BN(x, name=None):
return BatchNorm('bn', x) return BatchNorm('bn', x)
...@@ -206,6 +203,7 @@ if __name__ == '__main__': ...@@ -206,6 +203,7 @@ if __name__ == '__main__':
input.setup(input_desc) input.setup(input_desc)
with TowerContext('', is_training=True): with TowerContext('', is_training=True):
model.build_graph(*input.get_input_tensors()) model.build_graph(*input.get_input_tensors())
model_utils.describe_trainable_vars()
tf.profiler.profile( tf.profiler.profile(
tf.get_default_graph(), tf.get_default_graph(),
......
...@@ -74,7 +74,7 @@ class Model(ModelDesc): ...@@ -74,7 +74,7 @@ class Model(ModelDesc):
embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE], initializer=initializer) embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE], initializer=initializer)
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x hiddensize input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x hiddensize
input_feature = Dropout(input_feature, DROPOUT) input_feature = Dropout(input_feature, rate=DROPOUT)
with tf.variable_scope('LSTM', initializer=initializer): with tf.variable_scope('LSTM', initializer=initializer):
input_list = tf.unstack(input_feature, num=SEQ_LEN, axis=1) # seqlen x (Bxhidden) input_list = tf.unstack(input_feature, num=SEQ_LEN, axis=1) # seqlen x (Bxhidden)
...@@ -89,7 +89,9 @@ class Model(ModelDesc): ...@@ -89,7 +89,9 @@ class Model(ModelDesc):
# seqlen x (Bxrnnsize) # seqlen x (Bxrnnsize)
output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE]) # (Bxseqlen) x hidden
logits = FullyConnected('fc', output, VOCAB_SIZE, nl=tf.identity, W_init=initializer, b_init=initializer) logits = FullyConnected('fc', output, VOCAB_SIZE,
activation=tf.identity, kernel_initializer=initializer,
bias_initializer=initializer)
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=tf.reshape(nextinput, [-1])) logits=logits, labels=tf.reshape(nextinput, [-1]))
......
...@@ -27,14 +27,13 @@ def preactivation_block(input, num_filters, stride=1): ...@@ -27,14 +27,13 @@ def preactivation_block(input, num_filters, stride=1):
# residual # residual
net = BNReLU(input) net = BNReLU(input)
residual = Conv2D('conv1', net, num_filters, kernel_shape=3, stride=stride, use_bias=False, nl=BNReLU) residual = Conv2D('conv1', net, num_filters, kernel_size=3, strides=stride, use_bias=False, activation=BNReLU)
residual = Conv2D('conv2', residual, num_filters, kernel_shape=3, stride=1, use_bias=False, nl=tf.identity) residual = Conv2D('conv2', residual, num_filters, kernel_size=3, strides=1, use_bias=False)
# identity # identity
shortcut = input shortcut = input
if stride != 1 or num_filters_in != num_filters: if stride != 1 or num_filters_in != num_filters:
shortcut = Conv2D('shortcut', net, num_filters, kernel_shape=1, stride=stride, use_bias=False, shortcut = Conv2D('shortcut', net, num_filters, kernel_size=1, strides=stride, use_bias=False)
nl=tf.identity)
return shortcut + residual return shortcut + residual
...@@ -54,17 +53,17 @@ class ResNet_Cifar(ModelDesc): ...@@ -54,17 +53,17 @@ class ResNet_Cifar(ModelDesc):
image = tf.transpose(image, [0, 3, 1, 2]) image = tf.transpose(image, [0, 3, 1, 2])
pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform') pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform')
with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='NCHW'), \ with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
argscope(Conv2D, W_init=pytorch_default_init): argscope(Conv2D, kernel_initializer=pytorch_default_init):
net = Conv2D('conv0', image, 64, kernel_shape=3, stride=1, use_bias=False) net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False)
for i, blocks_in_module in enumerate(MODULE_SIZES): for i, blocks_in_module in enumerate(MODULE_SIZES):
for j in range(blocks_in_module): for j in range(blocks_in_module):
stride = 2 if j == 0 and i > 0 else 1 stride = 2 if j == 0 and i > 0 else 1
with tf.variable_scope("res%d.%d" % (i, j)): with tf.variable_scope("res%d.%d" % (i, j)):
net = preactivation_block(net, FILTER_SIZES[i], stride) net = preactivation_block(net, FILTER_SIZES[i], stride)
net = GlobalAvgPooling('gap', net) net = GlobalAvgPooling('gap', net)
logits = FullyConnected('linear', net, out_dim=CLASS_NUM, logits = FullyConnected('linear', net, CLASS_NUM,
nl=tf.identity, W_init=tf.random_normal_initializer(stddev=1e-3)) kernel_initializer=tf.random_normal_initializer(stddev=1e-3))
ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits) ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)
ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss') ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss')
......
...@@ -63,7 +63,7 @@ class Model(ModelDesc): ...@@ -63,7 +63,7 @@ class Model(ModelDesc):
with tf.variable_scope(name): with tf.variable_scope(name):
b1 = l if first else BNReLU(l) b1 = l if first else BNReLU(l)
c1 = Conv2D('conv1', b1, out_channel, stride=stride1, nl=BNReLU) c1 = Conv2D('conv1', b1, out_channel, strides=stride1, activation=BNReLU)
c2 = Conv2D('conv2', c1, out_channel) c2 = Conv2D('conv2', c1, out_channel)
if increase_dim: if increase_dim:
l = AvgPooling('pool', l, 2) l = AvgPooling('pool', l, 2)
...@@ -72,10 +72,10 @@ class Model(ModelDesc): ...@@ -72,10 +72,10 @@ class Model(ModelDesc):
l = c2 + l l = c2 + l
return l return l
with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NCHW'), \ with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
argscope(Conv2D, nl=tf.identity, use_bias=False, kernel_shape=3, argscope(Conv2D, use_bias=False, kernel_size=3,
W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
l = Conv2D('conv0', image, 16, nl=BNReLU) l = Conv2D('conv0', image, 16, activation=BNReLU)
l = residual('res1.0', l, first=True) l = residual('res1.0', l, first=True)
for k in range(1, self.n): for k in range(1, self.n):
l = residual('res1.{}'.format(k), l) l = residual('res1.{}'.format(k), l)
...@@ -93,7 +93,7 @@ class Model(ModelDesc): ...@@ -93,7 +93,7 @@ class Model(ModelDesc):
# 8,c=64 # 8,c=64
l = GlobalAvgPooling('gap', l) l = GlobalAvgPooling('gap', l)
logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity) logits = FullyConnected('linear', l, 10)
tf.nn.softmax(logits, name='output') tf.nn.softmax(logits, name='output')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
......
...@@ -43,17 +43,17 @@ class Model(ModelDesc): ...@@ -43,17 +43,17 @@ class Model(ModelDesc):
image = tf.pad(image, [[0, 0], [3, 3], [3, 3], [0, 0]]) image = tf.pad(image, [[0, 0], [3, 3], [3, 3], [0, 0]])
image = tf.transpose(image, [0, 3, 1, 2]) image = tf.transpose(image, [0, 3, 1, 2])
with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm],
data_format='NCHW'), \ data_format='channels_first'), \
argscope(Conv2D, nl=tf.identity, use_bias=False): argscope(Conv2D, use_bias=False):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID') .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU, padding='VALID')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') .MaxPooling('pool0', 3, strides=2, padding='SAME')
.apply(resnet_group, 'group0', bottleneck, 64, blocks[0], 1) .apply(resnet_group, 'group0', bottleneck, 64, blocks[0], 1)
.apply(resnet_group, 'group1', bottleneck, 128, blocks[1], 2) .apply(resnet_group, 'group1', bottleneck, 128, blocks[1], 2)
.apply(resnet_group, 'group2', bottleneck, 256, blocks[2], 2) .apply(resnet_group, 'group2', bottleneck, 256, blocks[2], 2)
.apply(resnet_group, 'group3', bottleneck, 512, blocks[3], 2) .apply(resnet_group, 'group3', bottleneck, 512, blocks[3], 2)
.GlobalAvgPooling('gap') .GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)()) .FullyConnected('linear', 1000)())
tf.nn.softmax(logits, name='prob') tf.nn.softmax(logits, name='prob')
ImageNetModel.compute_loss_and_error(logits, label) ImageNetModel.compute_loss_and_error(logits, label)
......
...@@ -11,11 +11,11 @@ from tensorpack.models import ( ...@@ -11,11 +11,11 @@ from tensorpack.models import (
LinearWrap) LinearWrap)
def resnet_shortcut(l, n_out, stride, nl=tf.identity): def resnet_shortcut(l, n_out, stride, activation=tf.identity):
data_format = get_arg_scope()['Conv2D']['data_format'] data_format = get_arg_scope()['Conv2D']['data_format']
n_in = l.get_shape().as_list()[1 if data_format == 'NCHW' else 3] n_in = l.get_shape().as_list()[1 if data_format in ['NCHW', 'channels_first'] else 3]
if n_in != n_out: # change dimension when channel is not the same if n_in != n_out: # change dimension when channel is not the same
return Conv2D('convshortcut', l, n_out, 1, stride=stride, nl=nl) return Conv2D('convshortcut', l, n_out, 1, strides=stride, activation=activation)
else: else:
return l return l
...@@ -34,14 +34,14 @@ def get_bn(zero_init=False): ...@@ -34,14 +34,14 @@ def get_bn(zero_init=False):
Zero init gamma is good for resnet. See https://arxiv.org/abs/1706.02677. Zero init gamma is good for resnet. See https://arxiv.org/abs/1706.02677.
""" """
if zero_init: if zero_init:
return lambda x, name: BatchNorm('bn', x, gamma_init=tf.zeros_initializer()) return lambda x, name=None: BatchNorm('bn', x, gamma_initializer=tf.zeros_initializer())
else: else:
return lambda x, name: BatchNorm('bn', x) return lambda x, name=None: BatchNorm('bn', x)
def preresnet_basicblock(l, ch_out, stride, preact): def preresnet_basicblock(l, ch_out, stride, preact):
l, shortcut = apply_preactivation(l, preact) l, shortcut = apply_preactivation(l, preact)
l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3) l = Conv2D('conv2', l, ch_out, 3)
return l + resnet_shortcut(shortcut, ch_out, stride) return l + resnet_shortcut(shortcut, ch_out, stride)
...@@ -49,8 +49,8 @@ def preresnet_basicblock(l, ch_out, stride, preact): ...@@ -49,8 +49,8 @@ def preresnet_basicblock(l, ch_out, stride, preact):
def preresnet_bottleneck(l, ch_out, stride, preact): def preresnet_bottleneck(l, ch_out, stride, preact):
# stride is applied on the second conv, following fb.resnet.torch # stride is applied on the second conv, following fb.resnet.torch
l, shortcut = apply_preactivation(l, preact) l, shortcut = apply_preactivation(l, preact)
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1) l = Conv2D('conv3', l, ch_out * 4, 1)
return l + resnet_shortcut(shortcut, ch_out * 4, stride) return l + resnet_shortcut(shortcut, ch_out * 4, stride)
...@@ -70,9 +70,9 @@ def preresnet_group(l, name, block_func, features, count, stride): ...@@ -70,9 +70,9 @@ def preresnet_group(l, name, block_func, features, count, stride):
def resnet_basicblock(l, ch_out, stride): def resnet_basicblock(l, ch_out, stride):
shortcut = l shortcut = l
l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, nl=get_bn(zero_init=True)) l = Conv2D('conv2', l, ch_out, 3, activation=get_bn(zero_init=True))
return l + resnet_shortcut(shortcut, ch_out, stride, nl=get_bn(zero_init=False)) return l + resnet_shortcut(shortcut, ch_out, stride, activation=get_bn(zero_init=False))
def resnet_bottleneck(l, ch_out, stride, stride_first=False): def resnet_bottleneck(l, ch_out, stride, stride_first=False):
...@@ -80,27 +80,27 @@ def resnet_bottleneck(l, ch_out, stride, stride_first=False): ...@@ -80,27 +80,27 @@ def resnet_bottleneck(l, ch_out, stride, stride_first=False):
stride_first: original resnet put stride on first conv. fb.resnet.torch put stride on second conv. stride_first: original resnet put stride on first conv. fb.resnet.torch put stride on second conv.
""" """
shortcut = l shortcut = l
l = Conv2D('conv1', l, ch_out, 1, stride=stride if stride_first else 1, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 1, strides=stride if stride_first else 1, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, stride=1 if stride_first else stride, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=1 if stride_first else stride, activation=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1, nl=get_bn(zero_init=True)) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
return l + resnet_shortcut(shortcut, ch_out * 4, stride, nl=get_bn(zero_init=False)) return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
def se_resnet_bottleneck(l, ch_out, stride): def se_resnet_bottleneck(l, ch_out, stride):
shortcut = l shortcut = l
l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv1', l, ch_out, 1, activation=BNReLU)
l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, strides=stride, activation=BNReLU)
l = Conv2D('conv3', l, ch_out * 4, 1, nl=get_bn(zero_init=True)) l = Conv2D('conv3', l, ch_out * 4, 1, activation=get_bn(zero_init=True))
squeeze = GlobalAvgPooling('gap', l) squeeze = GlobalAvgPooling('gap', l)
squeeze = FullyConnected('fc1', squeeze, ch_out // 4, nl=tf.nn.relu) squeeze = FullyConnected('fc1', squeeze, ch_out // 4, activation=tf.nn.relu)
squeeze = FullyConnected('fc2', squeeze, ch_out * 4, nl=tf.nn.sigmoid) squeeze = FullyConnected('fc2', squeeze, ch_out * 4, activation=tf.nn.sigmoid)
data_format = get_arg_scope()['Conv2D']['data_format'] data_format = get_arg_scope()['Conv2D']['data_format']
ch_ax = 1 if data_format == 'NCHW' else 3 ch_ax = 1 if data_format in ['NCHW', 'channels_first'] else 3
shape = [-1, 1, 1, 1] shape = [-1, 1, 1, 1]
shape[ch_ax] = ch_out * 4 shape[ch_ax] = ch_out * 4
l = l * tf.reshape(squeeze, shape) l = l * tf.reshape(squeeze, shape)
return l + resnet_shortcut(shortcut, ch_out * 4, stride, nl=get_bn(zero_init=False)) return l + resnet_shortcut(shortcut, ch_out * 4, stride, activation=get_bn(zero_init=False))
def resnet_group(l, name, block_func, features, count, stride): def resnet_group(l, name, block_func, features, count, stride):
...@@ -114,15 +114,15 @@ def resnet_group(l, name, block_func, features, count, stride): ...@@ -114,15 +114,15 @@ def resnet_group(l, name, block_func, features, count, stride):
def resnet_backbone(image, num_blocks, group_func, block_func): def resnet_backbone(image, num_blocks, group_func, block_func):
with argscope(Conv2D, nl=tf.identity, use_bias=False, with argscope(Conv2D, use_bias=False,
W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')): kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(group_func, 'group0', block_func, 64, num_blocks[0], 1) .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1)
.apply(group_func, 'group1', block_func, 128, num_blocks[1], 2) .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2)
.apply(group_func, 'group2', block_func, 256, num_blocks[2], 2) .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2)
.apply(group_func, 'group3', block_func, 512, num_blocks[3], 2) .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)
.GlobalAvgPooling('gap') .GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)()) .FullyConnected('linear', 1000)())
return logits return logits
...@@ -46,12 +46,12 @@ class Model(ModelDesc): ...@@ -46,12 +46,12 @@ class Model(ModelDesc):
} }
defs, block_func = cfg[DEPTH] defs, block_func = cfg[DEPTH]
with argscope(Conv2D, nl=tf.identity, use_bias=False, with argscope(Conv2D, use_bias=False,
W_init=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \ kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='NCHW'): argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format='channels_first'):
convmaps = (LinearWrap(image) convmaps = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') .MaxPooling('pool0', 3, strides=2, padding='SAME')
.apply(preresnet_group, 'group0', block_func, 64, defs[0], 1) .apply(preresnet_group, 'group0', block_func, 64, defs[0], 1)
.apply(preresnet_group, 'group1', block_func, 128, defs[1], 2) .apply(preresnet_group, 'group1', block_func, 128, defs[1], 2)
.apply(preresnet_group, 'group2', block_func, 256, defs[2], 2) .apply(preresnet_group, 'group2', block_func, 256, defs[2], 2)
...@@ -59,7 +59,7 @@ class Model(ModelDesc): ...@@ -59,7 +59,7 @@ class Model(ModelDesc):
print(convmaps) print(convmaps)
logits = (LinearWrap(convmaps) logits = (LinearWrap(convmaps)
.GlobalAvgPooling('gap') .GlobalAvgPooling('gap')
.FullyConnected('linearnew', 1000, nl=tf.identity)()) .FullyConnected('linearnew', 1000)())
loss = compute_loss_and_error(logits, label) loss = compute_loss_and_error(logits, label)
wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss') wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss')
......
../ResNet/imagenet_utils.py ../ImageNetModels/imagenet_utils.py
\ No newline at end of file \ No newline at end of file
...@@ -39,10 +39,10 @@ class Model(ModelDesc): ...@@ -39,10 +39,10 @@ class Model(ModelDesc):
.Conv2D('conv0', 20, 5, padding='VALID') .Conv2D('conv0', 20, 5, padding='VALID')
.MaxPooling('pool0', 2) .MaxPooling('pool0', 2)
.Conv2D('conv1', 20, 5, padding='VALID') .Conv2D('conv1', 20, 5, padding='VALID')
.FullyConnected('fc1', out_dim=32) .FullyConnected('fc1', 32)
.FullyConnected('fct', out_dim=6, nl=tf.identity, .FullyConnected('fct', 6, activation=tf.identity,
W_init=tf.constant_initializer(), kernel_initializer=tf.constant_initializer(),
b_init=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))()) bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
# output 6 parameters for affine transformation # output 6 parameters for affine transformation
stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3 stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3
stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2) stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2)
...@@ -52,7 +52,7 @@ class Model(ModelDesc): ...@@ -52,7 +52,7 @@ class Model(ModelDesc):
sampled = ImageSample('warp', [image, coor], borderMode='constant') sampled = ImageSample('warp', [image, coor], borderMode='constant')
return sampled return sampled
with argscope([Conv2D, FullyConnected], nl=tf.nn.relu): with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
with tf.variable_scope('STN1'): with tf.variable_scope('STN1'):
sampled1 = get_stn(image) sampled1 = get_stn(image)
with tf.variable_scope('STN2'): with tf.variable_scope('STN2'):
...@@ -71,9 +71,9 @@ class Model(ModelDesc): ...@@ -71,9 +71,9 @@ class Model(ModelDesc):
sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat') sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
logits = (LinearWrap(sampled) logits = (LinearWrap(sampled)
.FullyConnected('fc1', out_dim=256, nl=tf.nn.relu) .FullyConnected('fc1', 256, activation=tf.nn.relu)
.FullyConnected('fc2', out_dim=128, nl=tf.nn.relu) .FullyConnected('fc2', 128, activation=tf.nn.relu)
.FullyConnected('fct', out_dim=19, nl=tf.identity)()) .FullyConnected('fct', 19, activation=tf.identity)())
tf.nn.softmax(logits, name='prob') tf.nn.softmax(logits, name='prob')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
......
...@@ -63,8 +63,8 @@ class Model(GANModelDesc): ...@@ -63,8 +63,8 @@ class Model(GANModelDesc):
def resnet_block(x, name): def resnet_block(x, name):
with tf.variable_scope(name): with tf.variable_scope(name):
y = Conv2D('conv0', x, NF, nl=tf.nn.relu) y = Conv2D('conv0', x, NF, activation=tf.nn.relu)
y = Conv2D('conv1', y, NF, nl=tf.identity) y = Conv2D('conv1', y, NF, activation=tf.identity)
return x + y return x + y
def upsample(x, factor=2): def upsample(x, factor=2):
...@@ -74,7 +74,7 @@ class Model(GANModelDesc): ...@@ -74,7 +74,7 @@ class Model(GANModelDesc):
def generator(x, Ibicubic): def generator(x, Ibicubic):
x = x - VGG_MEAN_TENSOR / 255.0 x = x - VGG_MEAN_TENSOR / 255.0
with argscope(Conv2D, kernel_shape=3, stride=1, nl=tf.nn.relu): with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu):
x = Conv2D('conv1', x, NF) x = Conv2D('conv1', x, NF)
for i in range(10): for i in range(10):
x = resnet_block(x, 'block_%i' % i) x = resnet_block(x, 'block_%i' % i)
...@@ -83,27 +83,27 @@ class Model(GANModelDesc): ...@@ -83,27 +83,27 @@ class Model(GANModelDesc):
x = upsample(x) x = upsample(x)
x = Conv2D('conv_post_2', x, NF) x = Conv2D('conv_post_2', x, NF)
x = Conv2D('conv_post_3', x, NF) x = Conv2D('conv_post_3', x, NF)
Ires = Conv2D('conv_post_4', x, 3, nl=tf.identity) Ires = Conv2D('conv_post_4', x, 3, activation=tf.identity)
Iest = tf.add(Ibicubic, Ires, name='Iest') Iest = tf.add(Ibicubic, Ires, name='Iest')
return Iest # [0,1] return Iest # [0,1]
@auto_reuse_variable_scope @auto_reuse_variable_scope
def discriminator(x): def discriminator(x):
x = x - VGG_MEAN_TENSOR / 255.0 x = x - VGG_MEAN_TENSOR / 255.0
with argscope(Conv2D, kernel_shape=3, stride=1, nl=tf.nn.leaky_relu): with argscope(Conv2D, kernel_size=3, activation=tf.nn.leaky_relu):
x = Conv2D('conv0', x, 32) x = Conv2D('conv0', x, 32)
x = Conv2D('conv0b', x, 32, stride=2) x = Conv2D('conv0b', x, 32, strides=2)
x = Conv2D('conv1', x, 64) x = Conv2D('conv1', x, 64)
x = Conv2D('conv1b', x, 64, stride=2) x = Conv2D('conv1b', x, 64, strides=2)
x = Conv2D('conv2', x, 128) x = Conv2D('conv2', x, 128)
x = Conv2D('conv2b', x, 128, stride=2) x = Conv2D('conv2b', x, 128, strides=2)
x = Conv2D('conv3', x, 256) x = Conv2D('conv3', x, 256)
x = Conv2D('conv3b', x, 256, stride=2) x = Conv2D('conv3b', x, 256, strides=2)
x = Conv2D('conv4', x, 512) x = Conv2D('conv4', x, 512)
x = Conv2D('conv4b', x, 512, stride=2) x = Conv2D('conv4b', x, 512, strides=2)
x = FullyConnected('fc0', x, 1024, nl=tf.nn.leaky_relu) x = FullyConnected('fc0', x, 1024, activation=tf.nn.leaky_relu)
x = FullyConnected('fc1', x, 1, nl=tf.identity) x = FullyConnected('fc1', x, 1, activation=tf.identity)
return x return x
def additional_losses(a, b): def additional_losses(a, b):
...@@ -113,7 +113,7 @@ class Model(GANModelDesc): ...@@ -113,7 +113,7 @@ class Model(GANModelDesc):
x = x - VGG_MEAN_TENSOR x = x - VGG_MEAN_TENSOR
# VGG 19 # VGG 19
with varreplace.freeze_variables(): with varreplace.freeze_variables():
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu): with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu):
conv1_1 = Conv2D('conv1_1', x, 64) conv1_1 = Conv2D('conv1_1', x, 64)
conv1_2 = Conv2D('conv1_2', conv1_1, 64) conv1_2 = Conv2D('conv1_2', conv1_1, 64)
pool1 = MaxPooling('pool1', conv1_2, 2) # 64 pool1 = MaxPooling('pool1', conv1_2, 2) # 64
......
...@@ -98,18 +98,21 @@ class MinSaver(Callback): ...@@ -98,18 +98,21 @@ class MinSaver(Callback):
reverse (bool): if True, will save the maximum. reverse (bool): if True, will save the maximum.
filename (str): the name for the saved model. filename (str): the name for the saved model.
Defaults to ``min-{monitor_stat}.tfmodel``. Defaults to ``min-{monitor_stat}.tfmodel``.
Example: Example:
Save the model with minimum validation error to Save the model with minimum validation error to
"min-val-error.tfmodel": "min-val-error.tfmodel":
.. code-block:: python .. code-block:: python
MinSaver('val-error') MinSaver('val-error')
Note:
Notes:
It assumes that :class:`ModelSaver` is used with It assumes that :class:`ModelSaver` is used with
the same ``checkpoint_dir``. And it will save the same ``checkpoint_dir``. And it will save
the model to that directory as well. the model to that directory as well.
The default for both :class:`ModelSaver` and :class:`MinSaver` The default for both :class:`ModelSaver` and :class:`MinSaver`
is ``checkpoint_dir=logger.get_logger_dir()`` is ``checkpoint_dir=logger.get_logger_dir()``
""" """
self.monitor_stat = monitor_stat self.monitor_stat = monitor_stat
self.reverse = reverse self.reverse = reverse
......
...@@ -517,8 +517,7 @@ class StagingInput(FeedfreeInput): ...@@ -517,8 +517,7 @@ class StagingInput(FeedfreeInput):
Args: Args:
input (FeedfreeInput): input (FeedfreeInput):
nr_stage: number of elements to prefetch into each StagingArea, at the beginning. nr_stage: number of elements to prefetch into each StagingArea, at the beginning.
Since enqueue and dequeue are synchronized, prefetching 1 Since enqueue and dequeue are synchronized, prefetching 1 element should be sufficient.
element should be sufficient.
towers: deprecated towers: deprecated
device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'. device (str or None): if not None, place the StagingArea on a specific device. e.g., '/cpu:0'.
Otherwise, they are placed under where `get_inputs_tensors` Otherwise, they are placed under where `get_inputs_tensors`
......
...@@ -80,11 +80,13 @@ def set_logger_dir(dirname, action=None): ...@@ -80,11 +80,13 @@ def set_logger_dir(dirname, action=None):
Args: Args:
dirname(str): log directory dirname(str): log directory
action(str): an action of ("k","d","q") to be performed action(str): an action of ["k","d","q"] to be performed
when the directory exists. Will ask user by default. when the directory exists. Will ask user by default.
"d": delete the directory. Note that the deletion may fail when
"d": delete the directory. Note that the deletion may fail when
the directory is used by tensorboard. the directory is used by tensorboard.
"k": keep the directory. This is useful when you resume from a
"k": keep the directory. This is useful when you resume from a
previous training and want the directory to look as if the previous training and want the directory to look as if the
training was not interrupted. training was not interrupted.
Note that this option does not load old models or any other Note that this option does not load old models or any other
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment