docs and misc small fixes

53291500 · Yuxin Wu · bb9b43b6 · 53291500 · 53291500 · 53291500
Commit 53291500 authored Jul 28, 2016 by Yuxin Wu
7 changed files
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ See some interesting [examples](examples) to learn about the framework:
 Focus on modularity. You just have to define the following three components to start a training:
-1. The model, or the graph. Define the graph as well as its inputs and outputs. `models/` has some scoped abstraction of common models.
+1. The model, or the graph. `models/` has some scoped abstraction of common models.
 	`LinearWrap` and `argscope` makes large models look simpler.
 2. The data. tensorpack allows and encourages complex data processing.

--- a/examples/DoReFa-Net/alexnet-dorefa.py
+++ b/examples/DoReFa-Net/alexnet-dorefa.py
@@ -33,6 +33,8 @@ Accuracy:
    BATCH_SIZE * NUM_GPU. With a different number of GPUs in use, things might
    be a bit different, especially for learning rate.
+    With (W,A,G)=(32,32,32), 43.3% error.
 Speed:
    About 3.5 iteration/s on 4 Tesla M40. (Each epoch is set to 10000 iterations)

--- a/tensorpack/models/_common.py
+++ b/tensorpack/models/_common.py
@@ -31,7 +31,6 @@ def layer_register(summary_activation=False, log_shape=True):
    :param log_shape: log input/output shape of this layer
    """
    def wrapper(func):
        @wraps(func)
        def wrapped_func(name, inputs, *args, **kwargs):
@@ -91,5 +90,5 @@ def shape2d(a):
    raise RuntimeError("Illegal shape: {}".format(a))
 def shape4d(a):
-    # for use with tensorflow
+    # for use with tensorflow NHWC ops
    return [1] + shape2d(a) + [1]
--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -22,7 +22,7 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
    Batch normalization layer as described in:
    `Batch Normalization: Accelerating Deep Network Training by
-    Reducing Internal Covariate Shift <http://arxiv.org/abs/1502.03167>`_.
+    Reducing Internal Covariance Shift <http://arxiv.org/abs/1502.03167>`_.
    Notes:
@@ -52,10 +52,9 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
    batch_mean = tf.identity(batch_mean, 'mean')
    batch_var = tf.identity(batch_var, 'variance')
-    # XXX hack....
+    # XXX a hack to handle training tower & prediction tower together....
    emaname = 'EMA'
-    in_main_tower = not batch_mean.name.startswith('towerp')
+    if not batch_mean.name.startswith('towerp'):
-    if in_main_tower:
        with tf.name_scope(None): # https://github.com/tensorflow/tensorflow/issues/2740
            ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
            ema_apply_op = ema.apply([batch_mean, batch_var])

--- a/tensorpack/models/conv2d.py
+++ b/tensorpack/models/conv2d.py
@@ -23,7 +23,7 @@ def Conv2D(x, out_channel, kernel_shape,
    :param kernel_shape: (h, w) or a int
    :param stride: (h, w) or a int. default to 1
    :param padding: 'valid' or 'same'. default to 'same'
-    :param split: split channels as used in Alexnet. a int default to 1
+    :param split: split channels as used in Alexnet. Default to 1 (no split)
    :param W_init: initializer for W. default to `xavier_initializer_conv2d`.
    :param b_init: initializer for b. default to zero initializer.
    :param nl: nonlinearity. default to `relu`.

--- a/tensorpack/models/nonlin.py
+++ b/tensorpack/models/nonlin.py
@@ -9,7 +9,7 @@ from copy import copy
 from ._common import *
 from .batch_norm import BatchNorm
-__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU', 'NonLinearity']
+__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']
 @layer_register(log_shape=False)
 def Maxout(x, num_unit):
@@ -44,9 +44,8 @@ def PReLU(x, init=tf.constant_initializer(0.001), name=None):
    alpha = tf.get_variable('alpha', [], initializer=init)
    x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
    if name is None:
-        return x * 0.5
+        name = 'output'
-    else:
+    return tf.mul(x, 0.5, name=name)
-        return tf.mul(x, 0.5, name=name)
 @layer_register(log_shape=False)
 def LeakyReLU(x, alpha, name=None):
@@ -61,11 +60,10 @@ def LeakyReLU(x, alpha, name=None):
    alpha = float(alpha)
    x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
    if name is None:
-        return x * 0.5
+        name = 'output'
-    else:
+    return tf.mul(x, 0.5, name=name)
-        return tf.mul(x, 0.5, name=name)
-# I'm not a layer, but I return a layer.
+# I'm not a layer, but I return a nonlinearity.
 def BNReLU(is_training, **kwargs):
    """
    :param is_traning: boolean
@@ -77,11 +75,3 @@ def BNReLU(is_training, **kwargs):
        x = tf.nn.relu(x, name=name)
        return x
    return BNReLU
-@layer_register(log_shape=False)
-def NonLinearity(x, nl):
-    """
-    :param input: any tensor.
-    :param nl: any Tensorflow Operation
-    """
-    return nl(x, name='output')
--- a/tensorpack/tfutils/symbolic_functions.py
+++ b/tensorpack/tfutils/symbolic_functions.py
@@ -68,9 +68,12 @@ def print_stat(x, message=None):
    """
    if message is None:
        message = x.op.name
-    return tf.Print(x, [tf.reduce_mean(x), x], summarize=20, message=message)
+    return tf.Print(x, [tf.reduce_mean(x), x], summarize=20,
+            message=message, name='print_' + x.op.name)
 def rms(x, name=None):
    if name is None:
        name = x.op.name + '/rms'
+        with tf.name_scope(None):   # name already contains the scope
+            return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)
    return tf.sqrt(tf.reduce_mean(tf.square(x)), name=name)