warning about default relu

6eb0bebe · Yuxin Wu · dc59ad5f · 6eb0bebe · 6eb0bebe · 6eb0bebe
Commit 6eb0bebe authored Oct 17, 2016 by Yuxin Wu
11 changed files
--- a/examples/DisturbLabel/mnist-disturb.py
+++ b/examples/DisturbLabel/mnist-disturb.py
@@ -29,13 +29,13 @@ class Model(mnist_example.Model):
        image, label = input_vars
        image = tf.expand_dims(image, 3)

-        with argscope(Conv2D, kernel_shape=5):
+        with argscope(Conv2D, kernel_shape=5, nl=tf.nn.relu):
            logits = (LinearWrap(image) # the starting brace is only for line-breaking
                    .Conv2D('conv0', out_channel=32, padding='VALID')
                    .MaxPooling('pool0', 2)
                    .Conv2D('conv1', out_channel=64, padding='VALID')
                    .MaxPooling('pool1', 2)
-                    .FullyConnected('fc0', 512)
+                    .FullyConnected('fc0', 512, nl=tf.nn.relu)
                    .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')


--- a/examples/HED/hed.py
+++ b/examples/HED/hed.py
@@ -34,7 +34,7 @@ class Model(ModelDesc):
                    up = up / 2
                return l

-        with argscope(Conv2D, kernel_shape=3):
+        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
            l = Conv2D('conv1_1', image, 64)
            l = Conv2D('conv1_2', l, 64)
            b1 = branch('branch1', l, 1)

--- a/examples/Inception/inception-bn.py
+++ b/examples/Inception/inception-bn.py
@@ -71,7 +71,7 @@ class Model(ModelDesc):
            l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max')

            br1 = Conv2D('loss1conv', l, 128, 1)
-            br1 = FullyConnected('loss1fc', br1, 1024)
+            br1 = FullyConnected('loss1fc', br1, 1024, nl=tf.nn.relu)
            br1 = FullyConnected('loss1logit', br1, 1000, nl=tf.identity)
            loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(br1, label)
            loss1 = tf.reduce_mean(loss1, name='loss1')
@@ -84,7 +84,7 @@ class Model(ModelDesc):
            l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max')

            br2 = Conv2D('loss2conv', l, 128, 1)
-            br2 = FullyConnected('loss2fc', br2, 1024)
+            br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu)
            br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity)
            loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(br2, label)
            loss2 = tf.reduce_mean(loss2, name='loss2')

--- a/examples/cifar-convnet.py
+++ b/examples/cifar-convnet.py
@@ -51,10 +51,10 @@ class Model(ModelDesc):
                    .MaxPooling('pool2', 3, stride=2, padding='SAME') \
                    .Conv2D('conv3.1', out_channel=128, padding='VALID') \
                    .Conv2D('conv3.2', out_channel=128, padding='VALID') \
-                    .FullyConnected('fc0', 1024 + 512,
+                    .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu,
                           b_init=tf.constant_initializer(0.1)) \
                    .tf.nn.dropout(keep_prob) \
-                    .FullyConnected('fc1', 512,
+                    .FullyConnected('fc1', 512, nl=tf.nn.relu,
                           b_init=tf.constant_initializer(0.1)) \
                    .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()


--- a/examples/load-alexnet.py
+++ b/examples/load-alexnet.py
@@ -29,21 +29,22 @@ class Model(ModelDesc):

        image, label = inputs

-        l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID')
-        l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
-        l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')
+        with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
+            l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID')
+            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
+            l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')

-        l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
-        l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
-        l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')
+            l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
+            l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
+            l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')

-        l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
-        l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
-        l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
-        l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
+            l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
+            l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
+            l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
+            l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')

-        l = FullyConnected('fc6', l, 4096)
-        l = FullyConnected('fc7', l, out_dim=4096)
+            l = FullyConnected('fc6', l, 4096)
+            l = FullyConnected('fc7', l, out_dim=4096)
        # fc will have activation summary by default. disable this for the output layer
        logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
        prob = tf.nn.softmax(logits, name='output')

--- a/examples/load-vgg16.py
+++ b/examples/load-vgg16.py
@@ -36,7 +36,7 @@ class Model(ModelDesc):

        image, label = inputs

-        with argscope(Conv2D, kernel_shape=3):
+        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
            # 224
            logits = (LinearWrap(image)
                .Conv2D('conv1_1', 64)
@@ -62,10 +62,9 @@ class Model(ModelDesc):
                .Conv2D('conv5_3', 512)
                .MaxPooling('pool5', 2)
                 # 7
-                .FullyConnected('fc6', 4096)
+                .FullyConnected('fc6', 4096, nl=tf.nn.relu)
                .Dropout('drop0', 0.5)
-                .print_tensor()
-                .FullyConnected('fc7', 4096)
+                .FullyConnected('fc7', 4096, nl=tf.nn.relu)
                .Dropout('drop1', 0.5)
                .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='output')

--- a/examples/mnist-convnet.py
+++ b/examples/mnist-convnet.py
@@ -54,7 +54,7 @@ class Model(ModelDesc):
                    .Conv2D('conv2')
                    .MaxPooling('pool1', 2)
                    .Conv2D('conv3')
-                    .FullyConnected('fc0', 512)
+                    .FullyConnected('fc0', 512, nl=tf.nn.relu)
                    .Dropout('dropout', 0.5)
                    .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

--- a/examples/svhn-digit-convnet.py
+++ b/examples/svhn-digit-convnet.py
@@ -29,17 +29,18 @@ class Model(ModelDesc):

        image = image / 128.0 - 1

-        logits = (LinearWrap(image)
-                .Conv2D('conv1', 24, 5, padding='VALID')
-                .MaxPooling('pool1', 2, padding='SAME')
-                .Conv2D('conv2', 32, 3, padding='VALID')
-                .Conv2D('conv3', 32, 3, padding='VALID')
-                .MaxPooling('pool2', 2, padding='SAME')
-                .Conv2D('conv4', 64, 3, padding='VALID')
-                .Dropout('drop', 0.5)
-                .FullyConnected('fc0', 512,
-                        b_init=tf.constant_initializer(0.1))
-                .FullyConnected('linear', out_dim=10, nl=tf.identity)())
+        with argscope(Conv2D, nl=tf.nn.relu):
+            logits = (LinearWrap(image)
+                    .Conv2D('conv1', 24, 5, padding='VALID')
+                    .MaxPooling('pool1', 2, padding='SAME')
+                    .Conv2D('conv2', 32, 3, padding='VALID')
+                    .Conv2D('conv3', 32, 3, padding='VALID')
+                    .MaxPooling('pool2', 2, padding='SAME')
+                    .Conv2D('conv4', 64, 3, padding='VALID')
+                    .Dropout('drop', 0.5)
+                    .FullyConnected('fc0', 512,
+                            b_init=tf.constant_initializer(0.1), nl=tf.nn.relu)
+                    .FullyConnected('linear', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='output')

        # compute the number of failed samples, for ClassificationError to use at test time

--- a/tensorpack/models/conv2d.py
+++ b/tensorpack/models/conv2d.py
@@ -7,7 +7,7 @@ import numpy as np
 import tensorflow as tf
 import math
 from ._common import *
-from ..utils import map_arg
+from ..utils import map_arg, logger

 __all__ = ['Conv2D']

@@ -15,7 +15,7 @@ __all__ = ['Conv2D']
 def Conv2D(x, out_channel, kernel_shape,
           padding='SAME', stride=1,
           W_init=None, b_init=None,
-           nl=tf.nn.relu, split=1, use_bias=True):
+           nl=None, split=1, use_bias=True):
    """
    2D convolution on 4D inputs.

@@ -59,5 +59,9 @@ def Conv2D(x, out_channel, kernel_shape,
        outputs = [tf.nn.conv2d(i, k, stride, padding)
                   for i, k in zip(inputs, kernels)]
        conv = tf.concat(3, outputs)
+    if nl is None:
+        logger.warn("[DEPRECATED] Default nonlinearity for Conv2D and FullyConnected will be deprecated.")
+        logger.warn("[DEPRECATED] Please use argscope instead.")
+        nl = tf.nn.relu
    return nl(tf.nn.bias_add(conv, b) if use_bias else conv, name='output')

--- a/tensorpack/models/fc.py
+++ b/tensorpack/models/fc.py
@@ -14,7 +14,7 @@ __all__ = ['FullyConnected']
 @layer_register()
 def FullyConnected(x, out_dim,
                   W_init=None, b_init=None,
-                   nl=tf.nn.relu, use_bias=True):
+                   nl=None, use_bias=True):
    """
    Fully-Connected layer.

@@ -39,4 +39,8 @@ def FullyConnected(x, out_dim,
    if use_bias:
        b = tf.get_variable('b', [out_dim], initializer=b_init)
    prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
+    if nl is None:
+        logger.warn("[DEPRECATED] Default nonlinearity for Conv2D and FullyConnected will be deprecated.")
+        logger.warn("[DEPRECATED] Please use argscope instead.")
+        nl = tf.nn.relu
    return nl(prod, name='output')
--- a/tensorpack/models/model_desc.py
+++ b/tensorpack/models/model_desc.py
@@ -133,7 +133,7 @@ class ModelDesc(object):
        :returns: the cost to minimize. a scalar variable
        """
        if len(inspect.getargspec(self._build_graph).args) == 3:
-            logger.warn("_build_graph(self, input_vars, is_training) is deprecated! \
+            logger.warn("[DEPRECATED] _build_graph(self, input_vars, is_training) is deprecated! \
 Use _build_graph(self, input_vars) and get_current_tower_context().is_training instead.")
            self._build_graph(model_inputs, get_current_tower_context().is_training)
        else: