Commit 4916f703 authored by Yuxin Wu's avatar Yuxin Wu

prelu as a layer and nonlin

parent a97508f8
...@@ -21,7 +21,7 @@ from tensorpack.dataflow import * ...@@ -21,7 +21,7 @@ from tensorpack.dataflow import *
""" """
MNIST ConvNet example. MNIST ConvNet example.
about 0.55% validation error after 50 epochs. about 0.6% validation error after 50 epochs.
""" """
BATCH_SIZE = 128 BATCH_SIZE = 128
...@@ -40,7 +40,7 @@ class Model(ModelDesc): ...@@ -40,7 +40,7 @@ class Model(ModelDesc):
image, label = input_vars image, label = input_vars
image = tf.expand_dims(image, 3) # add a single channel image = tf.expand_dims(image, 3) # add a single channel
nl = prelu nl = PReLU.f
image = image * 2 - 1 image = image * 2 - 1
l = Conv2D('conv0', image, out_channel=32, kernel_shape=3, nl=nl, l = Conv2D('conv0', image, out_channel=32, kernel_shape=3, nl=nl,
padding='VALID') padding='VALID')
......
...@@ -23,32 +23,38 @@ def layer_register(summary_activation=False): ...@@ -23,32 +23,38 @@ def layer_register(summary_activation=False):
Can be overriden when creating the layer. Can be overriden when creating the layer.
""" """
def wrapper(func): def wrapper(func):
@wraps(func) class WrapedObject(object):
def inner(*args, **kwargs): def __init__(self, func):
name = args[0] self.f = func
assert isinstance(name, basestring)
args = args[1:]
do_summary = kwargs.pop(
'summary_activation', summary_activation)
inputs = args[0]
with tf.variable_scope(name) as scope:
outputs = func(*args, **kwargs)
if name not in _layer_logged:
# log shape info and add activation
logger.info("{} input: {}".format(
name, get_shape_str(inputs)))
logger.info("{} output: {}".format(
name, get_shape_str(outputs)))
if do_summary: @wraps(func)
if isinstance(outputs, list): def __call__(self, *args, **kwargs):
for x in outputs: name = args[0]
add_activation_summary(x, scope.name) assert isinstance(name, basestring), \
else: 'name must be either the first argument. Args: {}'.format(str(args))
add_activation_summary(outputs, scope.name) args = args[1:]
_layer_logged.add(name)
return outputs do_summary = kwargs.pop(
return inner 'summary_activation', summary_activation)
inputs = args[0]
with tf.variable_scope(name) as scope:
outputs = self.f(*args, **kwargs)
if name not in _layer_logged:
# log shape info and add activation
logger.info("{} input: {}".format(
name, get_shape_str(inputs)))
logger.info("{} output: {}".format(
name, get_shape_str(outputs)))
if do_summary:
if isinstance(outputs, list):
for x in outputs:
add_activation_summary(x, scope.name)
else:
add_activation_summary(outputs, scope.name)
_layer_logged.add(name)
return outputs
return WrapedObject(func)
return wrapper return wrapper
def shape2d(a): def shape2d(a):
......
...@@ -46,6 +46,6 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -46,6 +46,6 @@ def Conv2D(x, out_channel, kernel_shape,
outputs = [tf.nn.conv2d(i, k, stride, padding) outputs = [tf.nn.conv2d(i, k, stride, padding)
for i, k in zip(inputs, kernels)] for i, k in zip(inputs, kernels)]
conv = tf.concat(3, outputs) conv = tf.concat(3, outputs)
return nl(tf.nn.bias_add(conv, b)) return nl(tf.nn.bias_add(conv, b), name='output')
...@@ -29,4 +29,4 @@ def FullyConnected(x, out_dim, ...@@ -29,4 +29,4 @@ def FullyConnected(x, out_dim,
if use_bias: if use_bias:
b = tf.get_variable('b', [out_dim], initializer=b_init) b = tf.get_variable('b', [out_dim], initializer=b_init)
prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W) prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
return nl(prod, name=tf.get_variable_scope().name + '_output') return nl(prod, name='output')
...@@ -8,7 +8,7 @@ from copy import copy ...@@ -8,7 +8,7 @@ from copy import copy
from ._common import * from ._common import *
__all__ = ['Maxout', 'prelu'] __all__ = ['Maxout', 'PReLU']
@layer_register() @layer_register()
def Maxout(x, num_unit): def Maxout(x, num_unit):
...@@ -19,6 +19,12 @@ def Maxout(x, num_unit): ...@@ -19,6 +19,12 @@ def Maxout(x, num_unit):
x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / 3, 3]) x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / 3, 3])
return tf.reduce_max(x, 4, name='output') return tf.reduce_max(x, 4, name='output')
def PReLU(x, init=tf.constant_initializer(0.001)): @layer_register()
def PReLU(x, init=tf.constant_initializer(0.001), name=None):
""" allow name to be compatible to other builtin nonlinearity function"""
alpha = tf.get_variable('alpha', [], initializer=init) alpha = tf.get_variable('alpha', [], initializer=init)
return ((1 + alpha) * x + (1 - alpha) * tf.abs(x)) * 0.5 x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None:
return x * 0.5
else:
return tf.mul(x, 0.5, name=name)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment