maxout/prelu

59b16fa9 · Yuxin Wu · 0d894877 · 59b16fa9 · 59b16fa9 · 59b16fa9
Commit 59b16fa9 authored Feb 26, 2016 by Yuxin Wu
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 3 deletions

example_mnist.py example_mnist.py +2 -2

tensorpack/models/model_desc.py tensorpack/models/model_desc.py +1 -1

tensorpack/models/nonl.py tensorpack/models/nonl.py +24 -0

No files found.
--- a/example_mnist.py
+++ b/example_mnist.py
@@ -21,7 +21,7 @@ from tensorpack.dataflow import *

 """
 MNIST ConvNet example.
-99.25% validation accuracy after 50 epochs.
+about 0.55% validation error after 50 epochs.
 """

 BATCH_SIZE = 128
@@ -40,7 +40,7 @@ class Model(ModelDesc):
        image, label = input_vars
        image = tf.expand_dims(image, 3)    # add a single channel

-        nl = tf.nn.relu
+        nl = prelu
        image = image * 2 - 1
        l = Conv2D('conv0', image, out_channel=32, kernel_shape=3, nl=nl,
                   padding='VALID')

--- a/tensorpack/models/model_desc.py
+++ b/tensorpack/models/model_desc.py
@@ -72,4 +72,4 @@ class ModelDesc(object):

    def get_gradient_processor(self):
        """ Return a list of GradientProcessor. They will be executed in order"""
-        return [SummaryGradient(), CheckGradient()]
+        return [CheckGradient(), SummaryGradient()]
--- a/tensorpack/models/nonl.py
+++ b/tensorpack/models/nonl.py
+#!/usr/bin/env python2
+# -*- coding: UTF-8 -*-
+# File: nonl.py
+# Author: Yuxin Wu <ppwwyyxx@gmail.com>
+
+import tensorflow as tf
+from copy import copy
+
+from ._common import *
+
+__all__ = ['Maxout', 'prelu']
+
+@layer_register()
+def Maxout(x, num_unit):
+    input_shape = x.get_shape().as_list()
+    assert len(input_shape) == 4
+    ch = input_shape[3]
+    assert ch % num_unit == 0
+    x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / 3, 3])
+    return tf.reduce_max(x, 4, name='output')
+
+def PReLU(x, init=tf.constant_initializer(0.001)):
+    alpha = tf.get_variable('alpha', [], initializer=init)
+    return ((1 + alpha) * x + (1 - alpha) * tf.abs(x)) * 0.5