prelu as a layer and nonlin

4916f703 · Yuxin Wu · a97508f8 · 4916f703 · 4916f703 · 4916f703
Commit 4916f703 authored Feb 27, 2016 by Yuxin Wu
5 changed files
--- a/example_mnist.py
+++ b/example_mnist.py
@@ -21,7 +21,7 @@ from tensorpack.dataflow import *
 """
 MNIST ConvNet example.
-about 0.55% validation error after 50 epochs.
+about 0.6% validation error after 50 epochs.
 """
 BATCH_SIZE = 128
@@ -40,7 +40,7 @@ class Model(ModelDesc):
        image, label = input_vars
        image = tf.expand_dims(image, 3)    # add a single channel
-        nl = prelu
+        nl = PReLU.f
        image = image * 2 - 1
        l = Conv2D('conv0', image, out_channel=32, kernel_shape=3, nl=nl,
                   padding='VALID')

--- a/tensorpack/models/_common.py
+++ b/tensorpack/models/_common.py
@@ -23,32 +23,38 @@ def layer_register(summary_activation=False):
            Can be overriden when creating the layer.
    """
    def wrapper(func):
-        @wraps(func)
+        class WrapedObject(object):
-        def inner(*args, **kwargs):
+            def __init__(self, func):
-            name = args[0]
+                self.f = func
-            assert isinstance(name, basestring)
-            args = args[1:]
-            do_summary = kwargs.pop(
-                'summary_activation', summary_activation)
-            inputs = args[0]
-            with tf.variable_scope(name) as scope:
-                outputs = func(*args, **kwargs)
-                if name not in _layer_logged:
-                    # log shape info and add activation
-                    logger.info("{} input: {}".format(
-                        name, get_shape_str(inputs)))
-                    logger.info("{} output: {}".format(
-                        name, get_shape_str(outputs)))
-                    if do_summary:
+            @wraps(func)
-                        if isinstance(outputs, list):
+            def __call__(self, *args, **kwargs):
-                            for x in outputs:
+                name = args[0]
-                                add_activation_summary(x, scope.name)
+                assert isinstance(name, basestring), \
-                        else:
+                        'name must be either the first argument. Args: {}'.format(str(args))
-                            add_activation_summary(outputs, scope.name)
+                args = args[1:]
-                    _layer_logged.add(name)
-                return outputs
+                do_summary = kwargs.pop(
-        return inner
+                    'summary_activation', summary_activation)
+                inputs = args[0]
+                with tf.variable_scope(name) as scope:
+                    outputs = self.f(*args, **kwargs)
+                    if name not in _layer_logged:
+                        # log shape info and add activation
+                        logger.info("{} input: {}".format(
+                            name, get_shape_str(inputs)))
+                        logger.info("{} output: {}".format(
+                            name, get_shape_str(outputs)))
+                        if do_summary:
+                            if isinstance(outputs, list):
+                                for x in outputs:
+                                    add_activation_summary(x, scope.name)
+                            else:
+                                add_activation_summary(outputs, scope.name)
+                        _layer_logged.add(name)
+                    return outputs
+        return WrapedObject(func)
    return wrapper
 def shape2d(a):

--- a/tensorpack/models/conv2d.py
+++ b/tensorpack/models/conv2d.py
@@ -46,6 +46,6 @@ def Conv2D(x, out_channel, kernel_shape,
        outputs = [tf.nn.conv2d(i, k, stride, padding)
                   for i, k in zip(inputs, kernels)]
        conv = tf.concat(3, outputs)
-    return nl(tf.nn.bias_add(conv, b))
+    return nl(tf.nn.bias_add(conv, b), name='output')
--- a/tensorpack/models/fc.py
+++ b/tensorpack/models/fc.py
@@ -29,4 +29,4 @@ def FullyConnected(x, out_dim,
        if use_bias:
            b = tf.get_variable('b', [out_dim], initializer=b_init)
    prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
-    return nl(prod, name=tf.get_variable_scope().name + '_output')
+    return nl(prod, name='output')
--- a/tensorpack/models/nonl.py
+++ b/tensorpack/models/nonl.py
@@ -8,7 +8,7 @@ from copy import copy
 from ._common import *
-__all__ = ['Maxout', 'prelu']
+__all__ = ['Maxout', 'PReLU']
 @layer_register()
 def Maxout(x, num_unit):
@@ -19,6 +19,12 @@ def Maxout(x, num_unit):
    x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / 3, 3])
    return tf.reduce_max(x, 4, name='output')
-def PReLU(x, init=tf.constant_initializer(0.001)):
+@layer_register()
+def PReLU(x, init=tf.constant_initializer(0.001), name=None):
+    """ allow name to be compatible to other builtin nonlinearity function"""
    alpha = tf.get_variable('alpha', [], initializer=init)
-    return ((1 + alpha) * x + (1 - alpha) * tf.abs(x)) * 0.5
+    x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
+    if name is None:
+        return x * 0.5
+    else:
+        return tf.mul(x, 0.5, name=name)