Commit 7da83a51 authored by Yuxin Wu's avatar Yuxin Wu

Deprecated LeakyReLU to use tf.nn.leaky_relu

parent 8411d8cd
......@@ -374,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
'StepTensorPrinter',
'set_tower_func',
'TryResumeTraining',
'LeakyReLU',
'guided_relu', 'saliency_map', 'get_scalar_var',
'prediction_incorrect', 'huber_loss',
......
......@@ -23,10 +23,10 @@ The concept of tower is used mainly to support:
2. Automatically building the graph for inference, where a replicate is built under inference mode.
You'll specify a tower function when you use `TowerTrainer`.
If you use `ModelDesc`, the `build_graph` method is the tower function.
If you use `ModelDesc`, the `build_graph` method will be the tower function.
The function needs to follow some conventions:
1. It will always be called under a :class:`TowerContext`.
1. It will always be called under a `TowerContext`.
which will contain information about reuse, training/inference, scope name, etc.
2. It might get called multiple times for data-parallel training or inference.
3. To respect variable reuse, use `tf.get_variable` instead of
......
......@@ -54,8 +54,7 @@ class Model(DQNModel):
def _get_DQN_prediction(self, image):
""" image: [0,255]"""
image = image / 255.0
with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True), \
argscope(LeakyReLU, alpha=0.01):
with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True):
l = (LinearWrap(image)
# Nature architecture
.Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4)
......@@ -71,7 +70,8 @@ class Model(DQNModel):
# .MaxPooling('pool2', 2)
# .Conv2D('conv3', out_channel=64, kernel_shape=3)
.FullyConnected('fc0', 512, nl=LeakyReLU)())
.FullyConnected('fc0', 512)
.tf.nn.leaky_relu(alpha=0.01)())
if self.method != 'Dueling':
Q = FullyConnected('fct', l, self.num_actions, nl=tf.identity)
else:
......
......@@ -39,6 +39,8 @@ Accuracy:
With (W,A,G)=(1,2,6), 47.6% error
With (W,A,G)=(1,2,4), 58.4% error
Don't train with >4 GPUs because the batch size will be different.
Speed:
About 11 iteration/s on 4 P100s. (Each epoch is set to 10000 iterations)
Note that this code was written early without using NCHW format. You
......
......@@ -111,8 +111,7 @@ class Model(ModelDesc):
Returns:
learned filter as [B, k, k, 1]
"""
with argscope(LeakyReLU, alpha=0.2), \
argscope(FullyConnected, nl=LeakyReLU):
with argscope(FullyConnected, nl=tf.nn.leaky_relu):
net = FullyConnected('fc1', theta, 64)
net = FullyConnected('fc2', net, 128)
......
......@@ -54,21 +54,22 @@ class Model(GANModelDesc):
""" return a (b, 1) logits"""
yv = y
y = tf.reshape(y, [-1, 1, 1, 10])
with argscope(Conv2D, nl=tf.identity, kernel_shape=5, stride=2), \
argscope(LeakyReLU, alpha=0.2):
with argscope(Conv2D, nl=tf.identity, kernel_shape=5, stride=2):
l = (LinearWrap(imgs)
.ConcatWith(tf.tile(y, [1, 28, 28, 1]), 3)
.Conv2D('conv0', 11)
.LeakyReLU()
.tf.nn.leaky_relu()
.ConcatWith(tf.tile(y, [1, 14, 14, 1]), 3)
.Conv2D('conv1', 74)
.BatchNorm('bn1').LeakyReLU()
.BatchNorm('bn1')
.tf.nn.leaky_relu()
.apply(symbf.batch_flatten)
.ConcatWith(yv, 1)
.FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU()
.BatchNorm('bn2')
.tf.nn.leaky_relu()
.ConcatWith(yv, 1)
.FullyConnected('fct', 1, nl=tf.identity)())
......
......@@ -37,7 +37,7 @@ def INReLU(x, name=None):
def INLReLU(x, name=None):
x = InstanceNorm('inorm', x)
return LeakyReLU(x, name=name)
return tf.nn.leaky_relu(x, alpha=0.2, name=name)
class Model(GANModelDesc):
......@@ -78,7 +78,7 @@ class Model(GANModelDesc):
def discriminator(self, img):
with argscope(Conv2D, nl=INLReLU, kernel_shape=4, stride=2):
l = (LinearWrap(img)
.Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
.Conv2D('conv1', NF * 2)
.Conv2D('conv2', NF * 4)
.Conv2D('conv3', NF * 8, stride=1)
......@@ -103,8 +103,7 @@ class Model(GANModelDesc):
# use the initializers from torch
with argscope([Conv2D, Deconv2D], use_bias=False,
W_init=tf.random_normal_initializer(stddev=0.02)), \
argscope([Conv2D, Deconv2D, InstanceNorm], data_format='NCHW'), \
argscope(LeakyReLU, alpha=0.2):
argscope([Conv2D, Deconv2D, InstanceNorm], data_format='NCHW'):
with tf.variable_scope('gen'):
with tf.variable_scope('B'):
AB = self.generator(A)
......
......@@ -62,16 +62,18 @@ class Model(GANModelDesc):
def discriminator(self, imgs):
""" return a (b, 1) logits"""
nf = 64
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv0', nf, nl=tf.nn.leaky_relu)
.Conv2D('conv1', nf * 2)
.BatchNorm('bn1').LeakyReLU()
.BatchNorm('bn1')
.tf.nn.leaky_relu()
.Conv2D('conv2', nf * 4)
.BatchNorm('bn2').LeakyReLU()
.BatchNorm('bn2')
.tf.nn.leaky_relu()
.Conv2D('conv3', nf * 8)
.BatchNorm('bn3').LeakyReLU()
.BatchNorm('bn3')
.tf.nn.leaky_relu()
.FullyConnected('fct', 1, nl=tf.identity)())
return l
......
......@@ -37,7 +37,7 @@ NF = 64 # channel size
def BNLReLU(x, name=None):
x = BatchNorm('bn', x)
return LeakyReLU(x, name=name)
return tf.nn.leaky_relu(x, alpha=0.2, name=name)
class Model(GANModelDesc):
......@@ -52,7 +52,7 @@ class Model(GANModelDesc):
nl=BNLReLU, kernel_shape=4, stride=2), \
argscope(Deconv2D, nl=BNReLU):
l = (LinearWrap(img)
.Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
.Conv2D('conv1', NF * 2)
.Conv2D('conv2', NF * 4)
.Conv2D('conv3', NF * 8)
......@@ -66,7 +66,7 @@ class Model(GANModelDesc):
@auto_reuse_variable_scope
def discriminator(self, img):
with argscope(Conv2D, nl=BNLReLU, kernel_shape=4, stride=2):
l = Conv2D('conv0', img, NF, nl=LeakyReLU)
l = Conv2D('conv0', img, NF, nl=tf.nn.leaky_relu)
relu1 = Conv2D('conv1', l, NF * 2)
relu2 = Conv2D('conv2', relu1, NF * 4)
relu3 = Conv2D('conv3', relu2, NF * 8)
......@@ -95,8 +95,7 @@ class Model(GANModelDesc):
W_init=tf.contrib.layers.variance_scaling_initializer(factor=0.333, uniform=True),
use_bias=False), \
argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
argscope([Conv2D, Deconv2D, BatchNorm], data_format='NCHW'), \
argscope(LeakyReLU, alpha=0.2):
argscope([Conv2D, Deconv2D, BatchNorm], data_format='NCHW'):
with tf.variable_scope('gen'):
with tf.variable_scope('B'):
AB = self.generator(A)
......
......@@ -42,7 +42,7 @@ NF = 64 # number of filter
def BNLReLU(x, name=None):
x = BatchNorm('bn', x)
return LeakyReLU(x, name=name)
return tf.nn.leaky_relu(x, alpha=0.2, name=name)
class Model(GANModelDesc):
......@@ -58,7 +58,7 @@ class Model(GANModelDesc):
argscope(Dropout, is_training=True):
# always use local stat for BN, and apply dropout even in testing
with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU):
e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU)
e1 = Conv2D('conv1', imgs, NF, nl=tf.nn.leaky_relu)
e2 = Conv2D('conv2', e1, NF * 2)
e3 = Conv2D('conv3', e2, NF * 4)
e4 = Conv2D('conv4', e3, NF * 8)
......@@ -93,7 +93,7 @@ class Model(GANModelDesc):
l = tf.concat([inputs, outputs], 3)
with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU):
l = (LinearWrap(l)
.Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
.Conv2D('conv1', NF * 2)
.Conv2D('conv2', NF * 4)
.Conv2D('conv3', NF * 8, stride=1, padding='VALID')
......@@ -104,9 +104,7 @@ class Model(GANModelDesc):
input, output = inputs
input, output = input / 128.0 - 1, output / 128.0 - 1
with argscope([Conv2D, Deconv2D],
W_init=tf.truncated_normal_initializer(stddev=0.02)), \
argscope(LeakyReLU, alpha=0.2):
with argscope([Conv2D, Deconv2D], W_init=tf.truncated_normal_initializer(stddev=0.02)):
with tf.variable_scope('gen'):
fake_output = self.generator(input)
with tf.variable_scope('discrim'):
......
......@@ -28,16 +28,18 @@ class Model(DCGAN.Model):
@auto_reuse_variable_scope
def discriminator(self, imgs):
nf = 64
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv0', nf, nl=tf.nn.leaky_relu)
.Conv2D('conv1', nf * 2)
.LayerNorm('ln1').LeakyReLU()
.LayerNorm('ln1')
.tf.nn.leaky_relu()
.Conv2D('conv2', nf * 4)
.LayerNorm('ln2').LeakyReLU()
.LayerNorm('ln2')
.tf.nn.leaky_relu()
.Conv2D('conv3', nf * 8)
.LayerNorm('ln3').LeakyReLU()
.LayerNorm('ln3')
.tf.nn.leaky_relu()
.FullyConnected('fct', 1, nl=tf.identity)())
return tf.reshape(l, [-1])
......
......@@ -91,20 +91,22 @@ class Model(GANModelDesc):
@auto_reuse_variable_scope
def discriminator(self, imgs):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(imgs)
.Conv2D('conv0', 64)
.LeakyReLU()
.tf.nn.leaky_relu()
.Conv2D('conv1', 128)
.BatchNorm('bn1').LeakyReLU()
.BatchNorm('bn1')
.tf.nn.leaky_relu()
.FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU()())
.BatchNorm('bn2')
.tf.nn.leaky_relu()())
logits = FullyConnected('fct', l, 1, nl=tf.identity)
encoder = (LinearWrap(l)
.FullyConnected('fce1', 128, nl=tf.identity)
.BatchNorm('bne').LeakyReLU()
.BatchNorm('bne')
.tf.nn.leaky_relu()
.FullyConnected('fce-out', DIST_PARAM_DIM, nl=tf.identity)())
return logits, encoder
......
......@@ -85,21 +85,20 @@ class Model(GANModelDesc):
@auto_reuse_variable_scope
def discriminator(x):
with argscope(LeakyReLU, alpha=0.2):
with argscope(Conv2D, kernel_shape=3, stride=1, nl=LeakyReLU):
x = Conv2D('conv0', x, 32)
x = Conv2D('conv0b', x, 32, stride=2)
x = Conv2D('conv1', x, 64)
x = Conv2D('conv1b', x, 64, stride=2)
x = Conv2D('conv2', x, 128)
x = Conv2D('conv2b', x, 128, stride=2)
x = Conv2D('conv3', x, 256)
x = Conv2D('conv3b', x, 256, stride=2)
x = Conv2D('conv4', x, 512)
x = Conv2D('conv4b', x, 512, stride=2)
x = FullyConnected('fc0', x, 1024, nl=LeakyReLU)
x = FullyConnected('fc1', x, 1, nl=tf.identity)
with argscope(Conv2D, kernel_shape=3, stride=1, nl=tf.nn.leaky_relu):
x = Conv2D('conv0', x, 32)
x = Conv2D('conv0b', x, 32, stride=2)
x = Conv2D('conv1', x, 64)
x = Conv2D('conv1b', x, 64, stride=2)
x = Conv2D('conv2', x, 128)
x = Conv2D('conv2b', x, 128, stride=2)
x = Conv2D('conv3', x, 256)
x = Conv2D('conv3b', x, 256, stride=2)
x = Conv2D('conv4', x, 512)
x = Conv2D('conv4b', x, 512, stride=2)
x = FullyConnected('fc0', x, 1024, nl=tf.nn.leaky_relu)
x = FullyConnected('fc1', x, 1, nl=tf.identity)
return x
def additional_losses(a, b):
......
......@@ -7,8 +7,7 @@ import tensorflow as tf
from .common import layer_register, VariableHolder
from .batch_norm import BatchNorm
from ..tfutils.common import get_tf_version_number
from ..utils import logger
from ..utils.develop import deprecated
__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']
......@@ -63,6 +62,7 @@ def PReLU(x, init=0.001, name='output'):
@layer_register(use_scope=None)
@deprecated("Use tf.nn.leaky_relu in TF 1.4 instead!", "2018-03-30")
def LeakyReLU(x, alpha, name='output'):
"""
Leaky ReLU as in paper `Rectifier Nonlinearities Improve Neural Network Acoustic
......@@ -73,9 +73,6 @@ def LeakyReLU(x, alpha, name='output'):
x (tf.Tensor): input
alpha (float): the slope.
"""
# TODO
if get_tf_version_number() >= 1.4:
logger.warn("You are recommended to use tf.nn.leaky_relu available since TF 1.4 rather than models.LeakyReLU.")
return tf.maximum(x, alpha * x, name=name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment