Commit 53291500 authored by Yuxin Wu's avatar Yuxin Wu

docs and misc small fixes

parent bb9b43b6
...@@ -14,7 +14,7 @@ See some interesting [examples](examples) to learn about the framework: ...@@ -14,7 +14,7 @@ See some interesting [examples](examples) to learn about the framework:
Focus on modularity. You just have to define the following three components to start a training: Focus on modularity. You just have to define the following three components to start a training:
1. The model, or the graph. Define the graph as well as its inputs and outputs. `models/` has some scoped abstraction of common models. 1. The model, or the graph. `models/` has some scoped abstraction of common models.
`LinearWrap` and `argscope` makes large models look simpler. `LinearWrap` and `argscope` makes large models look simpler.
2. The data. tensorpack allows and encourages complex data processing. 2. The data. tensorpack allows and encourages complex data processing.
......
...@@ -33,6 +33,8 @@ Accuracy: ...@@ -33,6 +33,8 @@ Accuracy:
BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might
be a bit different, especially for learning rate. be a bit different, especially for learning rate.
With (W,A,G)=(32,32,32), 43.3% error.
Speed: Speed:
About 3.5 iteration/s on 4 Tesla M40. (Each epoch is set to 10000 iterations) About 3.5 iteration/s on 4 Tesla M40. (Each epoch is set to 10000 iterations)
......
...@@ -31,7 +31,6 @@ def layer_register(summary_activation=False, log_shape=True): ...@@ -31,7 +31,6 @@ def layer_register(summary_activation=False, log_shape=True):
:param log_shape: log input/output shape of this layer :param log_shape: log input/output shape of this layer
""" """
def wrapper(func): def wrapper(func):
@wraps(func) @wraps(func)
def wrapped_func(name, inputs, *args, **kwargs): def wrapped_func(name, inputs, *args, **kwargs):
...@@ -91,5 +90,5 @@ def shape2d(a): ...@@ -91,5 +90,5 @@ def shape2d(a):
raise RuntimeError("Illegal shape: {}".format(a)) raise RuntimeError("Illegal shape: {}".format(a))
def shape4d(a): def shape4d(a):
# for use with tensorflow # for use with tensorflow NHWC ops
return [1] + shape2d(a) + [1] return [1] + shape2d(a) + [1]
...@@ -22,7 +22,7 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5): ...@@ -22,7 +22,7 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
Batch normalization layer as described in: Batch normalization layer as described in:
`Batch Normalization: Accelerating Deep Network Training by `Batch Normalization: Accelerating Deep Network Training by
Reducing Internal Covariate Shift <http://arxiv.org/abs/1502.03167>`_. Reducing Internal Covariance Shift <http://arxiv.org/abs/1502.03167>`_.
Notes: Notes:
...@@ -52,10 +52,9 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5): ...@@ -52,10 +52,9 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
batch_mean = tf.identity(batch_mean, 'mean') batch_mean = tf.identity(batch_mean, 'mean')
batch_var = tf.identity(batch_var, 'variance') batch_var = tf.identity(batch_var, 'variance')
# XXX hack.... # XXX a hack to handle training tower & prediction tower together....
emaname = 'EMA' emaname = 'EMA'
in_main_tower = not batch_mean.name.startswith('towerp') if not batch_mean.name.startswith('towerp'):
if in_main_tower:
with tf.name_scope(None): # https://github.com/tensorflow/tensorflow/issues/2740 with tf.name_scope(None): # https://github.com/tensorflow/tensorflow/issues/2740
ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname) ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
ema_apply_op = ema.apply([batch_mean, batch_var]) ema_apply_op = ema.apply([batch_mean, batch_var])
......
...@@ -23,7 +23,7 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -23,7 +23,7 @@ def Conv2D(x, out_channel, kernel_shape,
:param kernel_shape: (h, w) or a int :param kernel_shape: (h, w) or a int
:param stride: (h, w) or a int. default to 1 :param stride: (h, w) or a int. default to 1
:param padding: 'valid' or 'same'. default to 'same' :param padding: 'valid' or 'same'. default to 'same'
:param split: split channels as used in Alexnet. a int default to 1 :param split: split channels as used in Alexnet. Default to 1 (no split)
:param W_init: initializer for W. default to `xavier_initializer_conv2d`. :param W_init: initializer for W. default to `xavier_initializer_conv2d`.
:param b_init: initializer for b. default to zero initializer. :param b_init: initializer for b. default to zero initializer.
:param nl: nonlinearity. default to `relu`. :param nl: nonlinearity. default to `relu`.
......
...@@ -9,7 +9,7 @@ from copy import copy ...@@ -9,7 +9,7 @@ from copy import copy
from ._common import * from ._common import *
from .batch_norm import BatchNorm from .batch_norm import BatchNorm
__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU', 'NonLinearity'] __all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']
@layer_register(log_shape=False) @layer_register(log_shape=False)
def Maxout(x, num_unit): def Maxout(x, num_unit):
...@@ -44,9 +44,8 @@ def PReLU(x, init=tf.constant_initializer(0.001), name=None): ...@@ -44,9 +44,8 @@ def PReLU(x, init=tf.constant_initializer(0.001), name=None):
alpha = tf.get_variable('alpha', [], initializer=init) alpha = tf.get_variable('alpha', [], initializer=init)
x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x)) x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None: if name is None:
return x * 0.5 name = 'output'
else: return tf.mul(x, 0.5, name=name)
return tf.mul(x, 0.5, name=name)
@layer_register(log_shape=False) @layer_register(log_shape=False)
def LeakyReLU(x, alpha, name=None): def LeakyReLU(x, alpha, name=None):
...@@ -61,11 +60,10 @@ def LeakyReLU(x, alpha, name=None): ...@@ -61,11 +60,10 @@ def LeakyReLU(x, alpha, name=None):
alpha = float(alpha) alpha = float(alpha)
x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x)) x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None: if name is None:
return x * 0.5 name = 'output'
else: return tf.mul(x, 0.5, name=name)
return tf.mul(x, 0.5, name=name)
# I'm not a layer, but I return a layer. # I'm not a layer, but I return a nonlinearity.
def BNReLU(is_training, **kwargs): def BNReLU(is_training, **kwargs):
""" """
:param is_traning: boolean :param is_traning: boolean
...@@ -77,11 +75,3 @@ def BNReLU(is_training, **kwargs): ...@@ -77,11 +75,3 @@ def BNReLU(is_training, **kwargs):
x = tf.nn.relu(x, name=name) x = tf.nn.relu(x, name=name)
return x return x
return BNReLU return BNReLU
@layer_register(log_shape=False)
def NonLinearity(x, nl):
"""
:param input: any tensor.
:param nl: any Tensorflow Operation
"""
return nl(x, name='output')
...@@ -68,9 +68,12 @@ def print_stat(x, message=None): ...@@ -68,9 +68,12 @@ def print_stat(x, message=None):
""" """
if message is None: if message is None:
message = x.op.name message = x.op.name
return tf.Print(x, [tf.reduce_mean(x), x], summarize=20, message=message) return tf.Print(x, [tf.reduce_mean(x), x], summarize=20,
message=message, name='print_' + x.op.name)
def rms(x, name=None): def rms(x, name=None):
if name is None: if name is None:
name = x.op.name + '/rms' name = x.op.name + '/rms'
with tf.name_scope(None): # name already contains the scope
return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)
return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name) return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment