Commit 7da83a51 authored by Yuxin Wu's avatar Yuxin Wu

Deprecated LeakyReLU to use tf.nn.leaky_relu

parent 8411d8cd
...@@ -374,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options): ...@@ -374,6 +374,7 @@ def autodoc_skip_member(app, what, name, obj, skip, options):
'StepTensorPrinter', 'StepTensorPrinter',
'set_tower_func', 'set_tower_func',
'TryResumeTraining', 'TryResumeTraining',
'LeakyReLU',
'guided_relu', 'saliency_map', 'get_scalar_var', 'guided_relu', 'saliency_map', 'get_scalar_var',
'prediction_incorrect', 'huber_loss', 'prediction_incorrect', 'huber_loss',
......
...@@ -23,10 +23,10 @@ The concept of tower is used mainly to support: ...@@ -23,10 +23,10 @@ The concept of tower is used mainly to support:
2. Automatically building the graph for inference, where a replicate is built under inference mode. 2. Automatically building the graph for inference, where a replicate is built under inference mode.
You'll specify a tower function when you use `TowerTrainer`. You'll specify a tower function when you use `TowerTrainer`.
If you use `ModelDesc`, the `build_graph` method is the tower function. If you use `ModelDesc`, the `build_graph` method will be the tower function.
The function needs to follow some conventions: The function needs to follow some conventions:
1. It will always be called under a :class:`TowerContext`. 1. It will always be called under a `TowerContext`.
which will contain information about reuse, training/inference, scope name, etc. which will contain information about reuse, training/inference, scope name, etc.
2. It might get called multiple times for data-parallel training or inference. 2. It might get called multiple times for data-parallel training or inference.
3. To respect variable reuse, use `tf.get_variable` instead of 3. To respect variable reuse, use `tf.get_variable` instead of
......
...@@ -54,8 +54,7 @@ class Model(DQNModel): ...@@ -54,8 +54,7 @@ class Model(DQNModel):
def _get_DQN_prediction(self, image): def _get_DQN_prediction(self, image):
""" image: [0,255]""" """ image: [0,255]"""
image = image / 255.0 image = image / 255.0
with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True), \ with argscope(Conv2D, nl=PReLU.symbolic_function, use_bias=True):
argscope(LeakyReLU, alpha=0.01):
l = (LinearWrap(image) l = (LinearWrap(image)
# Nature architecture # Nature architecture
.Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4) .Conv2D('conv0', out_channel=32, kernel_shape=8, stride=4)
...@@ -71,7 +70,8 @@ class Model(DQNModel): ...@@ -71,7 +70,8 @@ class Model(DQNModel):
# .MaxPooling('pool2', 2) # .MaxPooling('pool2', 2)
# .Conv2D('conv3', out_channel=64, kernel_shape=3) # .Conv2D('conv3', out_channel=64, kernel_shape=3)
.FullyConnected('fc0', 512, nl=LeakyReLU)()) .FullyConnected('fc0', 512)
.tf.nn.leaky_relu(alpha=0.01)())
if self.method != 'Dueling': if self.method != 'Dueling':
Q = FullyConnected('fct', l, self.num_actions, nl=tf.identity) Q = FullyConnected('fct', l, self.num_actions, nl=tf.identity)
else: else:
......
...@@ -39,6 +39,8 @@ Accuracy: ...@@ -39,6 +39,8 @@ Accuracy:
With (W,A,G)=(1,2,6), 47.6% error With (W,A,G)=(1,2,6), 47.6% error
With (W,A,G)=(1,2,4), 58.4% error With (W,A,G)=(1,2,4), 58.4% error
Don't train with >4 GPUs because the batch size will be different.
Speed: Speed:
About 11 iteration/s on 4 P100s. (Each epoch is set to 10000 iterations) About 11 iteration/s on 4 P100s. (Each epoch is set to 10000 iterations)
Note that this code was written early without using NCHW format. You Note that this code was written early without using NCHW format. You
......
...@@ -111,8 +111,7 @@ class Model(ModelDesc): ...@@ -111,8 +111,7 @@ class Model(ModelDesc):
Returns: Returns:
learned filter as [B, k, k, 1] learned filter as [B, k, k, 1]
""" """
with argscope(LeakyReLU, alpha=0.2), \ with argscope(FullyConnected, nl=tf.nn.leaky_relu):
argscope(FullyConnected, nl=LeakyReLU):
net = FullyConnected('fc1', theta, 64) net = FullyConnected('fc1', theta, 64)
net = FullyConnected('fc2', net, 128) net = FullyConnected('fc2', net, 128)
......
...@@ -54,21 +54,22 @@ class Model(GANModelDesc): ...@@ -54,21 +54,22 @@ class Model(GANModelDesc):
""" return a (b, 1) logits""" """ return a (b, 1) logits"""
yv = y yv = y
y = tf.reshape(y, [-1, 1, 1, 10]) y = tf.reshape(y, [-1, 1, 1, 10])
with argscope(Conv2D, nl=tf.identity, kernel_shape=5, stride=2), \ with argscope(Conv2D, nl=tf.identity, kernel_shape=5, stride=2):
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.ConcatWith(tf.tile(y, [1, 28, 28, 1]), 3) .ConcatWith(tf.tile(y, [1, 28, 28, 1]), 3)
.Conv2D('conv0', 11) .Conv2D('conv0', 11)
.LeakyReLU() .tf.nn.leaky_relu()
.ConcatWith(tf.tile(y, [1, 14, 14, 1]), 3) .ConcatWith(tf.tile(y, [1, 14, 14, 1]), 3)
.Conv2D('conv1', 74) .Conv2D('conv1', 74)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1')
.tf.nn.leaky_relu()
.apply(symbf.batch_flatten) .apply(symbf.batch_flatten)
.ConcatWith(yv, 1) .ConcatWith(yv, 1)
.FullyConnected('fc1', 1024, nl=tf.identity) .FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2')
.tf.nn.leaky_relu()
.ConcatWith(yv, 1) .ConcatWith(yv, 1)
.FullyConnected('fct', 1, nl=tf.identity)()) .FullyConnected('fct', 1, nl=tf.identity)())
......
...@@ -37,7 +37,7 @@ def INReLU(x, name=None): ...@@ -37,7 +37,7 @@ def INReLU(x, name=None):
def INLReLU(x, name=None): def INLReLU(x, name=None):
x = InstanceNorm('inorm', x) x = InstanceNorm('inorm', x)
return LeakyReLU(x, name=name) return tf.nn.leaky_relu(x, alpha=0.2, name=name)
class Model(GANModelDesc): class Model(GANModelDesc):
...@@ -78,7 +78,7 @@ class Model(GANModelDesc): ...@@ -78,7 +78,7 @@ class Model(GANModelDesc):
def discriminator(self, img): def discriminator(self, img):
with argscope(Conv2D, nl=INLReLU, kernel_shape=4, stride=2): with argscope(Conv2D, nl=INLReLU, kernel_shape=4, stride=2):
l = (LinearWrap(img) l = (LinearWrap(img)
.Conv2D('conv0', NF, nl=LeakyReLU) .Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
.Conv2D('conv1', NF * 2) .Conv2D('conv1', NF * 2)
.Conv2D('conv2', NF * 4) .Conv2D('conv2', NF * 4)
.Conv2D('conv3', NF * 8, stride=1) .Conv2D('conv3', NF * 8, stride=1)
...@@ -103,8 +103,7 @@ class Model(GANModelDesc): ...@@ -103,8 +103,7 @@ class Model(GANModelDesc):
# use the initializers from torch # use the initializers from torch
with argscope([Conv2D, Deconv2D], use_bias=False, with argscope([Conv2D, Deconv2D], use_bias=False,
W_init=tf.random_normal_initializer(stddev=0.02)), \ W_init=tf.random_normal_initializer(stddev=0.02)), \
argscope([Conv2D, Deconv2D, InstanceNorm], data_format='NCHW'), \ argscope([Conv2D, Deconv2D, InstanceNorm], data_format='NCHW'):
argscope(LeakyReLU, alpha=0.2):
with tf.variable_scope('gen'): with tf.variable_scope('gen'):
with tf.variable_scope('B'): with tf.variable_scope('B'):
AB = self.generator(A) AB = self.generator(A)
......
...@@ -62,16 +62,18 @@ class Model(GANModelDesc): ...@@ -62,16 +62,18 @@ class Model(GANModelDesc):
def discriminator(self, imgs): def discriminator(self, imgs):
""" return a (b, 1) logits""" """ return a (b, 1) logits"""
nf = 64 nf = 64
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \ with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU) .Conv2D('conv0', nf, nl=tf.nn.leaky_relu)
.Conv2D('conv1', nf * 2) .Conv2D('conv1', nf * 2)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1')
.tf.nn.leaky_relu()
.Conv2D('conv2', nf * 4) .Conv2D('conv2', nf * 4)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2')
.tf.nn.leaky_relu()
.Conv2D('conv3', nf * 8) .Conv2D('conv3', nf * 8)
.BatchNorm('bn3').LeakyReLU() .BatchNorm('bn3')
.tf.nn.leaky_relu()
.FullyConnected('fct', 1, nl=tf.identity)()) .FullyConnected('fct', 1, nl=tf.identity)())
return l return l
......
...@@ -37,7 +37,7 @@ NF = 64 # channel size ...@@ -37,7 +37,7 @@ NF = 64 # channel size
def BNLReLU(x, name=None): def BNLReLU(x, name=None):
x = BatchNorm('bn', x) x = BatchNorm('bn', x)
return LeakyReLU(x, name=name) return tf.nn.leaky_relu(x, alpha=0.2, name=name)
class Model(GANModelDesc): class Model(GANModelDesc):
...@@ -52,7 +52,7 @@ class Model(GANModelDesc): ...@@ -52,7 +52,7 @@ class Model(GANModelDesc):
nl=BNLReLU, kernel_shape=4, stride=2), \ nl=BNLReLU, kernel_shape=4, stride=2), \
argscope(Deconv2D, nl=BNReLU): argscope(Deconv2D, nl=BNReLU):
l = (LinearWrap(img) l = (LinearWrap(img)
.Conv2D('conv0', NF, nl=LeakyReLU) .Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
.Conv2D('conv1', NF * 2) .Conv2D('conv1', NF * 2)
.Conv2D('conv2', NF * 4) .Conv2D('conv2', NF * 4)
.Conv2D('conv3', NF * 8) .Conv2D('conv3', NF * 8)
...@@ -66,7 +66,7 @@ class Model(GANModelDesc): ...@@ -66,7 +66,7 @@ class Model(GANModelDesc):
@auto_reuse_variable_scope @auto_reuse_variable_scope
def discriminator(self, img): def discriminator(self, img):
with argscope(Conv2D, nl=BNLReLU, kernel_shape=4, stride=2): with argscope(Conv2D, nl=BNLReLU, kernel_shape=4, stride=2):
l = Conv2D('conv0', img, NF, nl=LeakyReLU) l = Conv2D('conv0', img, NF, nl=tf.nn.leaky_relu)
relu1 = Conv2D('conv1', l, NF * 2) relu1 = Conv2D('conv1', l, NF * 2)
relu2 = Conv2D('conv2', relu1, NF * 4) relu2 = Conv2D('conv2', relu1, NF * 4)
relu3 = Conv2D('conv3', relu2, NF * 8) relu3 = Conv2D('conv3', relu2, NF * 8)
...@@ -95,8 +95,7 @@ class Model(GANModelDesc): ...@@ -95,8 +95,7 @@ class Model(GANModelDesc):
W_init=tf.contrib.layers.variance_scaling_initializer(factor=0.333, uniform=True), W_init=tf.contrib.layers.variance_scaling_initializer(factor=0.333, uniform=True),
use_bias=False), \ use_bias=False), \
argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \ argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
argscope([Conv2D, Deconv2D, BatchNorm], data_format='NCHW'), \ argscope([Conv2D, Deconv2D, BatchNorm], data_format='NCHW'):
argscope(LeakyReLU, alpha=0.2):
with tf.variable_scope('gen'): with tf.variable_scope('gen'):
with tf.variable_scope('B'): with tf.variable_scope('B'):
AB = self.generator(A) AB = self.generator(A)
......
...@@ -42,7 +42,7 @@ NF = 64 # number of filter ...@@ -42,7 +42,7 @@ NF = 64 # number of filter
def BNLReLU(x, name=None): def BNLReLU(x, name=None):
x = BatchNorm('bn', x) x = BatchNorm('bn', x)
return LeakyReLU(x, name=name) return tf.nn.leaky_relu(x, alpha=0.2, name=name)
class Model(GANModelDesc): class Model(GANModelDesc):
...@@ -58,7 +58,7 @@ class Model(GANModelDesc): ...@@ -58,7 +58,7 @@ class Model(GANModelDesc):
argscope(Dropout, is_training=True): argscope(Dropout, is_training=True):
# always use local stat for BN, and apply dropout even in testing # always use local stat for BN, and apply dropout even in testing
with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU): with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU):
e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU) e1 = Conv2D('conv1', imgs, NF, nl=tf.nn.leaky_relu)
e2 = Conv2D('conv2', e1, NF * 2) e2 = Conv2D('conv2', e1, NF * 2)
e3 = Conv2D('conv3', e2, NF * 4) e3 = Conv2D('conv3', e2, NF * 4)
e4 = Conv2D('conv4', e3, NF * 8) e4 = Conv2D('conv4', e3, NF * 8)
...@@ -93,7 +93,7 @@ class Model(GANModelDesc): ...@@ -93,7 +93,7 @@ class Model(GANModelDesc):
l = tf.concat([inputs, outputs], 3) l = tf.concat([inputs, outputs], 3)
with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU): with argscope(Conv2D, kernel_shape=4, stride=2, nl=BNLReLU):
l = (LinearWrap(l) l = (LinearWrap(l)
.Conv2D('conv0', NF, nl=LeakyReLU) .Conv2D('conv0', NF, nl=tf.nn.leaky_relu)
.Conv2D('conv1', NF * 2) .Conv2D('conv1', NF * 2)
.Conv2D('conv2', NF * 4) .Conv2D('conv2', NF * 4)
.Conv2D('conv3', NF * 8, stride=1, padding='VALID') .Conv2D('conv3', NF * 8, stride=1, padding='VALID')
...@@ -104,9 +104,7 @@ class Model(GANModelDesc): ...@@ -104,9 +104,7 @@ class Model(GANModelDesc):
input, output = inputs input, output = inputs
input, output = input / 128.0 - 1, output / 128.0 - 1 input, output = input / 128.0 - 1, output / 128.0 - 1
with argscope([Conv2D, Deconv2D], with argscope([Conv2D, Deconv2D], W_init=tf.truncated_normal_initializer(stddev=0.02)):
W_init=tf.truncated_normal_initializer(stddev=0.02)), \
argscope(LeakyReLU, alpha=0.2):
with tf.variable_scope('gen'): with tf.variable_scope('gen'):
fake_output = self.generator(input) fake_output = self.generator(input)
with tf.variable_scope('discrim'): with tf.variable_scope('discrim'):
......
...@@ -28,16 +28,18 @@ class Model(DCGAN.Model): ...@@ -28,16 +28,18 @@ class Model(DCGAN.Model):
@auto_reuse_variable_scope @auto_reuse_variable_scope
def discriminator(self, imgs): def discriminator(self, imgs):
nf = 64 nf = 64
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \ with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU) .Conv2D('conv0', nf, nl=tf.nn.leaky_relu)
.Conv2D('conv1', nf * 2) .Conv2D('conv1', nf * 2)
.LayerNorm('ln1').LeakyReLU() .LayerNorm('ln1')
.tf.nn.leaky_relu()
.Conv2D('conv2', nf * 4) .Conv2D('conv2', nf * 4)
.LayerNorm('ln2').LeakyReLU() .LayerNorm('ln2')
.tf.nn.leaky_relu()
.Conv2D('conv3', nf * 8) .Conv2D('conv3', nf * 8)
.LayerNorm('ln3').LeakyReLU() .LayerNorm('ln3')
.tf.nn.leaky_relu()
.FullyConnected('fct', 1, nl=tf.identity)()) .FullyConnected('fct', 1, nl=tf.identity)())
return tf.reshape(l, [-1]) return tf.reshape(l, [-1])
......
...@@ -91,20 +91,22 @@ class Model(GANModelDesc): ...@@ -91,20 +91,22 @@ class Model(GANModelDesc):
@auto_reuse_variable_scope @auto_reuse_variable_scope
def discriminator(self, imgs): def discriminator(self, imgs):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \ with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.Conv2D('conv0', 64) .Conv2D('conv0', 64)
.LeakyReLU() .tf.nn.leaky_relu()
.Conv2D('conv1', 128) .Conv2D('conv1', 128)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1')
.tf.nn.leaky_relu()
.FullyConnected('fc1', 1024, nl=tf.identity) .FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU()()) .BatchNorm('bn2')
.tf.nn.leaky_relu()())
logits = FullyConnected('fct', l, 1, nl=tf.identity) logits = FullyConnected('fct', l, 1, nl=tf.identity)
encoder = (LinearWrap(l) encoder = (LinearWrap(l)
.FullyConnected('fce1', 128, nl=tf.identity) .FullyConnected('fce1', 128, nl=tf.identity)
.BatchNorm('bne').LeakyReLU() .BatchNorm('bne')
.tf.nn.leaky_relu()
.FullyConnected('fce-out', DIST_PARAM_DIM, nl=tf.identity)()) .FullyConnected('fce-out', DIST_PARAM_DIM, nl=tf.identity)())
return logits, encoder return logits, encoder
......
...@@ -85,8 +85,7 @@ class Model(GANModelDesc): ...@@ -85,8 +85,7 @@ class Model(GANModelDesc):
@auto_reuse_variable_scope @auto_reuse_variable_scope
def discriminator(x): def discriminator(x):
with argscope(LeakyReLU, alpha=0.2): with argscope(Conv2D, kernel_shape=3, stride=1, nl=tf.nn.leaky_relu):
with argscope(Conv2D, kernel_shape=3, stride=1, nl=LeakyReLU):
x = Conv2D('conv0', x, 32) x = Conv2D('conv0', x, 32)
x = Conv2D('conv0b', x, 32, stride=2) x = Conv2D('conv0b', x, 32, stride=2)
x = Conv2D('conv1', x, 64) x = Conv2D('conv1', x, 64)
...@@ -98,7 +97,7 @@ class Model(GANModelDesc): ...@@ -98,7 +97,7 @@ class Model(GANModelDesc):
x = Conv2D('conv4', x, 512) x = Conv2D('conv4', x, 512)
x = Conv2D('conv4b', x, 512, stride=2) x = Conv2D('conv4b', x, 512, stride=2)
x = FullyConnected('fc0', x, 1024, nl=LeakyReLU) x = FullyConnected('fc0', x, 1024, nl=tf.nn.leaky_relu)
x = FullyConnected('fc1', x, 1, nl=tf.identity) x = FullyConnected('fc1', x, 1, nl=tf.identity)
return x return x
......
...@@ -7,8 +7,7 @@ import tensorflow as tf ...@@ -7,8 +7,7 @@ import tensorflow as tf
from .common import layer_register, VariableHolder from .common import layer_register, VariableHolder
from .batch_norm import BatchNorm from .batch_norm import BatchNorm
from ..tfutils.common import get_tf_version_number from ..utils.develop import deprecated
from ..utils import logger
__all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU'] __all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']
...@@ -63,6 +62,7 @@ def PReLU(x, init=0.001, name='output'): ...@@ -63,6 +62,7 @@ def PReLU(x, init=0.001, name='output'):
@layer_register(use_scope=None) @layer_register(use_scope=None)
@deprecated("Use tf.nn.leaky_relu in TF 1.4 instead!", "2018-03-30")
def LeakyReLU(x, alpha, name='output'): def LeakyReLU(x, alpha, name='output'):
""" """
Leaky ReLU as in paper `Rectifier Nonlinearities Improve Neural Network Acoustic Leaky ReLU as in paper `Rectifier Nonlinearities Improve Neural Network Acoustic
...@@ -73,9 +73,6 @@ def LeakyReLU(x, alpha, name='output'): ...@@ -73,9 +73,6 @@ def LeakyReLU(x, alpha, name='output'):
x (tf.Tensor): input x (tf.Tensor): input
alpha (float): the slope. alpha (float): the slope.
""" """
# TODO
if get_tf_version_number() >= 1.4:
logger.warn("You are recommended to use tf.nn.leaky_relu available since TF 1.4 rather than models.LeakyReLU.")
return tf.maximum(x, alpha * x, name=name) return tf.maximum(x, alpha * x, name=name)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment