Deprecated LeakyReLU to use tf.nn.leaky_relu

7da83a51 · Yuxin Wu · 8411d8cd · 7da83a51 · 7da83a51 · 7da83a51
Commit 7da83a51 authored Dec 28, 2017 by Yuxin Wu
14 changed files
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -374,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
        'StepTensorPrinter',
        'set_tower_func',
        'TryResumeTraining',
+        'LeakyReLU',

        'guided_relu', 'saliency_map', 'get_scalar_var',
        'prediction_incorrect', 'huber_loss',

--- a/docs/tutorial/trainer.md
+++ b/docs/tutorial/trainer.md
@@ -23,10 +23,10 @@ The concept of tower is used mainly to support:
 2. Automatically building the graph for inference, where a replicate is built under inference mode.

 You'll specify a tower function when you use `TowerTrainer`.
-If you use `ModelDesc`, the `build_graph` method is the tower function.
+If you use `ModelDesc`, the `build_graph` method will be the tower function.
 The function needs to follow some conventions:

-1. It will always be called under a :class:`TowerContext`.
+1. It will always be called under a `TowerContext`.
 	 which will contain information about reuse, training/inference, scope name, etc.
 2. It might get called multiple times for data-parallel training or inference.
 3. To respect variable reuse, use `tf.get_variable` instead of

--- a/examples/DeepQNetwork/DQN.py
+++ b/examples/DeepQNetwork/DQN.py
@@ -54,8 +54,7 @@ class Model(DQNModel):
    def _get_DQN_prediction(self, image):
        """ image: [0,255]"""
        image = image / 255.0
-        with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True), \
-                argscope(LeakyReLU, alpha=0.01):
+        with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True):
            l = (LinearWrap(image)
                 # Nature architecture
                 .Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4)
@@ -71,7 +70,8 @@ class Model(DQNModel):
                 # .MaxPooling('pool2', 2)
                 # .Conv2D('conv3', out_channel=64, kernel_shape=3)

-                 .FullyConnected('fc0', 512, nl=LeakyReLU)())
+                 .FullyConnected('fc0', 512)
+                 .tf.nn.leaky_relu(alpha=0.01)())
        if self.method != 'Dueling':
            Q = FullyConnected('fct', l, self.num_actions, nl=tf.identity)
        else:

--- a/examples/DoReFa-Net/alexnet-dorefa.py
+++ b/examples/DoReFa-Net/alexnet-dorefa.py
@@ -39,6 +39,8 @@ Accuracy:
    With (W,A,G)=(1,2,6), 47.6% error
    With (W,A,G)=(1,2,4), 58.4% error

+    Don't train with >4 GPUs because the batch size will be different.
+
 Speed:
    About 11 iteration/s on 4 P100s. (Each epoch is set to 10000 iterations)
    Note that this code was written early without using NCHW format. You

--- a/examples/DynamicFilterNetwork/steering-filter.py
+++ b/examples/DynamicFilterNetwork/steering-filter.py
@@ -111,8 +111,7 @@ class Model(ModelDesc):
        Returns:
            learned filter as [B, k, k, 1]
        """
-        with argscope(LeakyReLU, alpha=0.2), \
-                argscope(FullyConnected, nl=LeakyReLU):
+        with argscope(FullyConnected, nl=tf.nn.leaky_relu):
            net = FullyConnected('fc1', theta, 64)
            net = FullyConnected('fc2', net, 128)


--- a/examples/GAN/ConditionalGAN-mnist.py
+++ b/examples/GAN/ConditionalGAN-mnist.py
@@ -54,21 +54,22 @@ class Model(GANModelDesc):
        """ return a (b, 1) logits"""
        yv = y
        y = tf.reshape(y, [-1, 1, 1, 10])
-        with argscope(Conv2D, nl=tf.identity, kernel_shape=5, stride=2), \
-                argscope(LeakyReLU, alpha=0.2):
+        with argscope(Conv2D, nl=tf.identity, kernel_shape=5, stride=2):
            l = (LinearWrap(imgs)
                 .ConcatWith(tf.tile(y, [1, 28, 28, 1]), 3)
                 .Conv2D('conv0', 11)
-                 .LeakyReLU()
+                 .tf.nn.leaky_relu()

                 .ConcatWith(tf.tile(y, [1, 14, 14, 1]), 3)
                 .Conv2D('conv1', 74)
-                 .BatchNorm('bn1').LeakyReLU()
+                 .BatchNorm('bn1')
+                 .tf.nn.leaky_relu()

                 .apply(symbf.batch_flatten)
                 .ConcatWith(yv, 1)
                 .FullyConnected('fc1', 1024, nl=tf.identity)
-                 .BatchNorm('bn2').LeakyReLU()
+                 .BatchNorm('bn2')
+                 .tf.nn.leaky_relu()

                 .ConcatWith(yv, 1)
                 .FullyConnected('fct', 1, nl=tf.identity)())

--- a/examples/GAN/CycleGAN.py
+++ b/examples/GAN/CycleGAN.py
@@ -37,7 +37,7 @@ def INReLU(x, name=None):

 def INLReLU(x, name=None):
    x = InstanceNorm('inorm', x)
-    return LeakyReLU(x, name=name)
+    return tf.nn.leaky_relu(x, alpha=0.2, name=name)


 class Model(GANModelDesc):
@@ -78,7 +78,7 @@ class Model(GANModelDesc):
    def discriminator(self, img):
        with argscope(Conv2D, nl=INLReLU, kernel_shape=4, stride=2):
            l = (LinearWrap(img)
-                 .Conv2D('conv0', NF, nl=LeakyReLU)
+                 .Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
                 .Conv2D('conv1', NF * 2)
                 .Conv2D('conv2', NF * 4)
                 .Conv2D('conv3', NF * 8, stride=1)
@@ -103,8 +103,7 @@ class Model(GANModelDesc):
        # use the initializers from torch
        with argscope([Conv2D, Deconv2D], use_bias=False,
                      W_init=tf.random_normal_initializer(stddev=0.02)), \
-                argscope([Conv2D, Deconv2D, InstanceNorm], data_format='NCHW'), \
-                argscope(LeakyReLU, alpha=0.2):
+                argscope([Conv2D, Deconv2D, InstanceNorm], data_format='NCHW'):
            with tf.variable_scope('gen'):
                with tf.variable_scope('B'):
                    AB = self.generator(A)

--- a/examples/GAN/DCGAN.py
+++ b/examples/GAN/DCGAN.py
@@ -62,16 +62,18 @@ class Model(GANModelDesc):
    def discriminator(self, imgs):
        """ return a (b, 1) logits"""
        nf = 64
-        with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
-                argscope(LeakyReLU, alpha=0.2):
+        with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
            l = (LinearWrap(imgs)
-                 .Conv2D('conv0', nf, nl=LeakyReLU)
+                 .Conv2D('conv0', nf, nl=tf.nn.leaky_relu)
                 .Conv2D('conv1', nf * 2)
-                 .BatchNorm('bn1').LeakyReLU()
+                 .BatchNorm('bn1')
+                 .tf.nn.leaky_relu()
                 .Conv2D('conv2', nf * 4)
-                 .BatchNorm('bn2').LeakyReLU()
+                 .BatchNorm('bn2')
+                 .tf.nn.leaky_relu()
                 .Conv2D('conv3', nf * 8)
-                 .BatchNorm('bn3').LeakyReLU()
+                 .BatchNorm('bn3')
+                 .tf.nn.leaky_relu()
                 .FullyConnected('fct', 1, nl=tf.identity)())
        return l


--- a/examples/GAN/DiscoGAN-CelebA.py
+++ b/examples/GAN/DiscoGAN-CelebA.py
@@ -37,7 +37,7 @@ NF = 64  # channel size

 def BNLReLU(x, name=None):
    x = BatchNorm('bn', x)
-    return LeakyReLU(x, name=name)
+    return tf.nn.leaky_relu(x, alpha=0.2, name=name)


 class Model(GANModelDesc):
@@ -52,7 +52,7 @@ class Model(GANModelDesc):
                      nl=BNLReLU, kernel_shape=4, stride=2), \
                argscope(Deconv2D, nl=BNReLU):
            l = (LinearWrap(img)
-                 .Conv2D('conv0', NF, nl=LeakyReLU)
+                 .Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
                 .Conv2D('conv1', NF * 2)
                 .Conv2D('conv2', NF * 4)
                 .Conv2D('conv3', NF * 8)
@@ -66,7 +66,7 @@ class Model(GANModelDesc):
    @auto_reuse_variable_scope
    def discriminator(self, img):
        with argscope(Conv2D, nl=BNLReLU, kernel_shape=4, stride=2):
-            l = Conv2D('conv0', img, NF, nl=LeakyReLU)
+            l = Conv2D('conv0', img, NF, nl=tf.nn.leaky_relu)
            relu1 = Conv2D('conv1', l, NF * 2)
            relu2 = Conv2D('conv2', relu1, NF * 4)
            relu3 = Conv2D('conv3', relu2, NF * 8)
@@ -95,8 +95,7 @@ class Model(GANModelDesc):
                      W_init=tf.contrib.layers.variance_scaling_initializer(factor=0.333, uniform=True),
                      use_bias=False), \
                argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
-                argscope([Conv2D, Deconv2D, BatchNorm], data_format='NCHW'), \
-                argscope(LeakyReLU, alpha=0.2):
+                argscope([Conv2D, Deconv2D, BatchNorm], data_format='NCHW'):
            with tf.variable_scope('gen'):
                with tf.variable_scope('B'):
                    AB = self.generator(A)

--- a/examples/GAN/Image2Image.py
+++ b/examples/GAN/Image2Image.py
@@ -42,7 +42,7 @@ NF = 64  # number of filter

 def BNLReLU(x, name=None):
    x = BatchNorm('bn', x)
-    return LeakyReLU(x, name=name)
+    return tf.nn.leaky_relu(x, alpha=0.2, name=name)


 class Model(GANModelDesc):
@@ -58,7 +58,7 @@ class Model(GANModelDesc):
                argscope(Dropout, is_training=True):
            # always use local stat for BN, and apply dropout even in testing
            with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU):
-                e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU)
+                e1 = Conv2D('conv1', imgs, NF, nl=tf.nn.leaky_relu)
                e2 = Conv2D('conv2', e1, NF * 2)
                e3 = Conv2D('conv3', e2, NF * 4)
                e4 = Conv2D('conv4', e3, NF * 8)
@@ -93,7 +93,7 @@ class Model(GANModelDesc):
        l = tf.concat([inputs, outputs], 3)
        with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU):
            l = (LinearWrap(l)
-                 .Conv2D('conv0', NF, nl=LeakyReLU)
+                 .Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
                 .Conv2D('conv1', NF * 2)
                 .Conv2D('conv2', NF * 4)
                 .Conv2D('conv3', NF * 8, stride=1, padding='VALID')
@@ -104,9 +104,7 @@ class Model(GANModelDesc):
        input, output = inputs
        input, output = input / 128.0 - 1, output / 128.0 - 1

-        with argscope([Conv2D, Deconv2D],
-                      W_init=tf.truncated_normal_initializer(stddev=0.02)), \
-                argscope(LeakyReLU, alpha=0.2):
+        with argscope([Conv2D, Deconv2D], W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_output = self.generator(input)
            with tf.variable_scope('discrim'):

--- a/examples/GAN/Improved-WGAN.py
+++ b/examples/GAN/Improved-WGAN.py
@@ -28,16 +28,18 @@ class Model(DCGAN.Model):
    @auto_reuse_variable_scope
    def discriminator(self, imgs):
        nf = 64
-        with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
-                argscope(LeakyReLU, alpha=0.2):
+        with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
            l = (LinearWrap(imgs)
-                 .Conv2D('conv0', nf, nl=LeakyReLU)
+                 .Conv2D('conv0', nf, nl=tf.nn.leaky_relu)
                 .Conv2D('conv1', nf * 2)
-                 .LayerNorm('ln1').LeakyReLU()
+                 .LayerNorm('ln1')
+                 .tf.nn.leaky_relu()
                 .Conv2D('conv2', nf * 4)
-                 .LayerNorm('ln2').LeakyReLU()
+                 .LayerNorm('ln2')
+                 .tf.nn.leaky_relu()
                 .Conv2D('conv3', nf * 8)
-                 .LayerNorm('ln3').LeakyReLU()
+                 .LayerNorm('ln3')
+                 .tf.nn.leaky_relu()
                 .FullyConnected('fct', 1, nl=tf.identity)())
        return tf.reshape(l, [-1])


--- a/examples/GAN/InfoGAN-mnist.py
+++ b/examples/GAN/InfoGAN-mnist.py
@@ -91,20 +91,22 @@ class Model(GANModelDesc):

    @auto_reuse_variable_scope
    def discriminator(self, imgs):
-        with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
-                argscope(LeakyReLU, alpha=0.2):
+        with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
            l = (LinearWrap(imgs)
                 .Conv2D('conv0', 64)
-                 .LeakyReLU()
+                 .tf.nn.leaky_relu()
                 .Conv2D('conv1', 128)
-                 .BatchNorm('bn1').LeakyReLU()
+                 .BatchNorm('bn1')
+                 .tf.nn.leaky_relu()
                 .FullyConnected('fc1', 1024, nl=tf.identity)
-                 .BatchNorm('bn2').LeakyReLU()())
+                 .BatchNorm('bn2')
+                 .tf.nn.leaky_relu()())

            logits = FullyConnected('fct', l, 1, nl=tf.identity)
            encoder = (LinearWrap(l)
                       .FullyConnected('fce1', 128, nl=tf.identity)
-                       .BatchNorm('bne').LeakyReLU()
+                       .BatchNorm('bne')
+                       .tf.nn.leaky_relu()
                       .FullyConnected('fce-out', DIST_PARAM_DIM, nl=tf.identity)())
        return logits, encoder


--- a/examples/SuperResolution/enet-pat.py
+++ b/examples/SuperResolution/enet-pat.py
@@ -85,21 +85,20 @@ class Model(GANModelDesc):

        @auto_reuse_variable_scope
        def discriminator(x):
-            with argscope(LeakyReLU, alpha=0.2):
-                with argscope(Conv2D, kernel_shape=3, stride=1, nl=LeakyReLU):
-                    x = Conv2D('conv0', x, 32)
-                    x = Conv2D('conv0b', x, 32, stride=2)
-                    x = Conv2D('conv1', x, 64)
-                    x = Conv2D('conv1b', x, 64, stride=2)
-                    x = Conv2D('conv2', x, 128)
-                    x = Conv2D('conv2b', x, 128, stride=2)
-                    x = Conv2D('conv3', x, 256)
-                    x = Conv2D('conv3b', x, 256, stride=2)
-                    x = Conv2D('conv4', x, 512)
-                    x = Conv2D('conv4b', x, 512, stride=2)
-
-                x = FullyConnected('fc0', x, 1024, nl=LeakyReLU)
-                x = FullyConnected('fc1', x, 1, nl=tf.identity)
+            with argscope(Conv2D, kernel_shape=3, stride=1, nl=tf.nn.leaky_relu):
+                x = Conv2D('conv0', x, 32)
+                x = Conv2D('conv0b', x, 32, stride=2)
+                x = Conv2D('conv1', x, 64)
+                x = Conv2D('conv1b', x, 64, stride=2)
+                x = Conv2D('conv2', x, 128)
+                x = Conv2D('conv2b', x, 128, stride=2)
+                x = Conv2D('conv3', x, 256)
+                x = Conv2D('conv3b', x, 256, stride=2)
+                x = Conv2D('conv4', x, 512)
+                x = Conv2D('conv4b', x, 512, stride=2)
+
+            x = FullyConnected('fc0', x, 1024, nl=tf.nn.leaky_relu)
+            x = FullyConnected('fc1', x, 1, nl=tf.identity)
            return x

        def additional_losses(a, b):

--- a/tensorpack/models/nonlin.py
+++ b/tensorpack/models/nonlin.py
@@ -7,8 +7,7 @@ import tensorflow as tf

 from .common import layer_register, VariableHolder
 from .batch_norm import BatchNorm
-from ..tfutils.common import get_tf_version_number
-from ..utils import logger
+from ..utils.develop import deprecated

 __all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']

@@ -63,6 +62,7 @@ def PReLU(x, init=0.001, name='output'):


 @layer_register(use_scope=None)
+@deprecated("Use tf.nn.leaky_relu in TF 1.4 instead!", "2018-03-30")
 def LeakyReLU(x, alpha, name='output'):
    """
    Leaky ReLU as in paper `Rectifier Nonlinearities Improve Neural Network Acoustic
@@ -73,9 +73,6 @@ def LeakyReLU(x, alpha, name='output'):
        x (tf.Tensor): input
        alpha (float): the slope.
    """
-    # TODO
-    if get_tf_version_number() >= 1.4:
-        logger.warn("You are recommended to use tf.nn.leaky_relu available since TF 1.4 rather than models.LeakyReLU.")
    return tf.maximum(x, alpha * x, name=name)