Commit 53291500 authored by Yuxin Wu's avatar Yuxin Wu

docs and misc small fixes

parent bb9b43b6
......@@ -14,7 +14,7 @@ See some interesting [examples](examples) to learn about the framework:
Focus on modularity. You just have to define the following three components to start a training:
1. The model, or the graph. Define the graph as well as its inputs and outputs. `models/` has some scoped abstraction of common models.
1. The model, or the graph. `models/` has some scoped abstraction of common models.
`LinearWrap` and `argscope` makes large models look simpler.
2. The data. tensorpack allows and encourages complex data processing.
......
......@@ -33,6 +33,8 @@ Accuracy:
BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might
be a bit different, especially for learning rate.
With (W,A,G)=(32,32,32), 43.3% error.
Speed:
About 3.5 iteration/s on 4 Tesla M40. (Each epoch is set to 10000 iterations)
......
......@@ -31,7 +31,6 @@ def layer_register(summary_activation=False, log_shape=True):
:param log_shape: log input/output shape of this layer
"""
def wrapper(func):
@wraps(func)
def wrapped_func(name, inputs, *args, **kwargs):
......@@ -91,5 +90,5 @@ def shape2d(a):
raise RuntimeError("Illegal shape: {}".format(a))
def shape4d(a):
# for use with tensorflow
# for use with tensorflow NHWC ops
return [1] + shape2d(a) + [1]
......@@ -22,7 +22,7 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
Batch normalization layer as described in:
`Batch Normalization: Accelerating Deep Network Training by
Reducing Internal Covariate Shift <http://arxiv.org/abs/1502.03167>`_.
Reducing Internal Covariance Shift <http://arxiv.org/abs/1502.03167>`_.
Notes:
......@@ -52,10 +52,9 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
batch_mean = tf.identity(batch_mean, 'mean')
batch_var = tf.identity(batch_var, 'variance')
# XXX hack....
# XXX a hack to handle training tower & prediction tower together....
emaname = 'EMA'
in_main_tower = not batch_mean.name.startswith('towerp')
if in_main_tower:
if not batch_mean.name.startswith('towerp'):
with tf.name_scope(None): # https://github.com/tensorflow/tensorflow/issues/2740
ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
ema_apply_op = ema.apply([batch_mean, batch_var])
......
......@@ -23,7 +23,7 @@ def Conv2D(x, out_channel, kernel_shape,
:param kernel_shape: (h, w) or a int
:param stride: (h, w) or a int. default to 1
:param padding: 'valid' or 'same'. default to 'same'
:param split: split channels as used in Alexnet. a int default to 1
:param split: split channels as used in Alexnet. Default to 1 (no split)
:param W_init: initializer for W. default to `xavier_initializer_conv2d`.
:param b_init: initializer for b. default to zero initializer.
:param nl: nonlinearity. default to `relu`.
......
......@@ -9,7 +9,7 @@ from copy import copy
from ._common import *
from .batch_norm import BatchNorm
__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU', 'NonLinearity']
__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']
@layer_register(log_shape=False)
def Maxout(x, num_unit):
......@@ -44,9 +44,8 @@ def PReLU(x, init=tf.constant_initializer(0.001), name=None):
alpha = tf.get_variable('alpha', [], initializer=init)
x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None:
return x * 0.5
else:
return tf.mul(x, 0.5, name=name)
name = 'output'
return tf.mul(x, 0.5, name=name)
@layer_register(log_shape=False)
def LeakyReLU(x, alpha, name=None):
......@@ -61,11 +60,10 @@ def LeakyReLU(x, alpha, name=None):
alpha = float(alpha)
x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None:
return x * 0.5
else:
return tf.mul(x, 0.5, name=name)
name = 'output'
return tf.mul(x, 0.5, name=name)
# I'm not a layer, but I return a layer.
# I'm not a layer, but I return a nonlinearity.
def BNReLU(is_training, **kwargs):
"""
:param is_traning: boolean
......@@ -77,11 +75,3 @@ def BNReLU(is_training, **kwargs):
x = tf.nn.relu(x, name=name)
return x
return BNReLU
@layer_register(log_shape=False)
def NonLinearity(x, nl):
"""
:param input: any tensor.
:param nl: any Tensorflow Operation
"""
return nl(x, name='output')
......@@ -68,9 +68,12 @@ def print_stat(x, message=None):
"""
if message is None:
message = x.op.name
return tf.Print(x, [tf.reduce_mean(x), x], summarize=20, message=message)
return tf.Print(x, [tf.reduce_mean(x), x], summarize=20,
message=message, name='print_' + x.op.name)
def rms(x, name=None):
if name is None:
name = x.op.name + '/rms'
with tf.name_scope(None): # name already contains the scope
return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)
return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment