Commit 4916f703 authored by Yuxin Wu's avatar Yuxin Wu

prelu as a layer and nonlin

parent a97508f8
......@@ -21,7 +21,7 @@ from tensorpack.dataflow import *
"""
MNIST ConvNet example.
about 0.55% validation error after 50 epochs.
about 0.6% validation error after 50 epochs.
"""
BATCH_SIZE = 128
......@@ -40,7 +40,7 @@ class Model(ModelDesc):
image, label = input_vars
image = tf.expand_dims(image, 3) # add a single channel
nl = prelu
nl = PReLU.f
image = image * 2 - 1
l = Conv2D('conv0', image, out_channel=32, kernel_shape=3, nl=nl,
padding='VALID')
......
......@@ -23,16 +23,22 @@ def layer_register(summary_activation=False):
Can be overriden when creating the layer.
"""
def wrapper(func):
class WrapedObject(object):
def __init__(self, func):
self.f = func
@wraps(func)
def inner(*args, **kwargs):
def __call__(self, *args, **kwargs):
name = args[0]
assert isinstance(name, basestring)
assert isinstance(name, basestring), \
'name must be either the first argument. Args: {}'.format(str(args))
args = args[1:]
do_summary = kwargs.pop(
'summary_activation', summary_activation)
inputs = args[0]
with tf.variable_scope(name) as scope:
outputs = func(*args, **kwargs)
outputs = self.f(*args, **kwargs)
if name not in _layer_logged:
# log shape info and add activation
logger.info("{} input: {}".format(
......@@ -48,7 +54,7 @@ def layer_register(summary_activation=False):
add_activation_summary(outputs, scope.name)
_layer_logged.add(name)
return outputs
return inner
return WrapedObject(func)
return wrapper
def shape2d(a):
......
......@@ -46,6 +46,6 @@ def Conv2D(x, out_channel, kernel_shape,
outputs = [tf.nn.conv2d(i, k, stride, padding)
for i, k in zip(inputs, kernels)]
conv = tf.concat(3, outputs)
return nl(tf.nn.bias_add(conv, b))
return nl(tf.nn.bias_add(conv, b), name='output')
......@@ -29,4 +29,4 @@ def FullyConnected(x, out_dim,
if use_bias:
b = tf.get_variable('b', [out_dim], initializer=b_init)
prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
return nl(prod, name=tf.get_variable_scope().name + '_output')
return nl(prod, name='output')
......@@ -8,7 +8,7 @@ from copy import copy
from ._common import *
__all__ = ['Maxout', 'prelu']
__all__ = ['Maxout', 'PReLU']
@layer_register()
def Maxout(x, num_unit):
......@@ -19,6 +19,12 @@ def Maxout(x, num_unit):
x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / 3, 3])
return tf.reduce_max(x, 4, name='output')
def PReLU(x, init=tf.constant_initializer(0.001)):
@layer_register()
def PReLU(x, init=tf.constant_initializer(0.001), name=None):
""" allow name to be compatible to other builtin nonlinearity function"""
alpha = tf.get_variable('alpha', [], initializer=init)
return ((1 + alpha) * x + (1 - alpha) * tf.abs(x)) * 0.5
x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None:
return x * 0.5
else:
return tf.mul(x, 0.5, name=name)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment