Commit 6eb0bebe authored by Yuxin Wu's avatar Yuxin Wu

warning about default relu

parent dc59ad5f
...@@ -29,13 +29,13 @@ class Model(mnist_example.Model): ...@@ -29,13 +29,13 @@ class Model(mnist_example.Model):
image, label = input_vars image, label = input_vars
image = tf.expand_dims(image, 3) image = tf.expand_dims(image, 3)
with argscope(Conv2D, kernel_shape=5): with argscope(Conv2D, kernel_shape=5, nl=tf.nn.relu):
logits = (LinearWrap(image) # the starting brace is only for line-breaking logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0', out_channel=32, padding='VALID') .Conv2D('conv0', out_channel=32, padding='VALID')
.MaxPooling('pool0', 2) .MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=64, padding='VALID') .Conv2D('conv1', out_channel=64, padding='VALID')
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
.FullyConnected('fc0', 512) .FullyConnected('fc0', 512, nl=tf.nn.relu)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)()) .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
......
...@@ -34,7 +34,7 @@ class Model(ModelDesc): ...@@ -34,7 +34,7 @@ class Model(ModelDesc):
up = up / 2 up = up / 2
return l return l
with argscope(Conv2D, kernel_shape=3): with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
l = Conv2D('conv1_1', image, 64) l = Conv2D('conv1_1', image, 64)
l = Conv2D('conv1_2', l, 64) l = Conv2D('conv1_2', l, 64)
b1 = branch('branch1', l, 1) b1 = branch('branch1', l, 1)
......
...@@ -71,7 +71,7 @@ class Model(ModelDesc): ...@@ -71,7 +71,7 @@ class Model(ModelDesc):
l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max') l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max')
br1 = Conv2D('loss1conv', l, 128, 1) br1 = Conv2D('loss1conv', l, 128, 1)
br1 = FullyConnected('loss1fc', br1, 1024) br1 = FullyConnected('loss1fc', br1, 1024, nl=tf.nn.relu)
br1 = FullyConnected('loss1logit', br1, 1000, nl=tf.identity) br1 = FullyConnected('loss1logit', br1, 1000, nl=tf.identity)
loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(br1, label) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(br1, label)
loss1 = tf.reduce_mean(loss1, name='loss1') loss1 = tf.reduce_mean(loss1, name='loss1')
...@@ -84,7 +84,7 @@ class Model(ModelDesc): ...@@ -84,7 +84,7 @@ class Model(ModelDesc):
l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max') l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max')
br2 = Conv2D('loss2conv', l, 128, 1) br2 = Conv2D('loss2conv', l, 128, 1)
br2 = FullyConnected('loss2fc', br2, 1024) br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu)
br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity) br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity)
loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(br2, label) loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(br2, label)
loss2 = tf.reduce_mean(loss2, name='loss2') loss2 = tf.reduce_mean(loss2, name='loss2')
......
...@@ -51,10 +51,10 @@ class Model(ModelDesc): ...@@ -51,10 +51,10 @@ class Model(ModelDesc):
.MaxPooling('pool2', 3, stride=2, padding='SAME') \ .MaxPooling('pool2', 3, stride=2, padding='SAME') \
.Conv2D('conv3.1', out_channel=128, padding='VALID') \ .Conv2D('conv3.1', out_channel=128, padding='VALID') \
.Conv2D('conv3.2', out_channel=128, padding='VALID') \ .Conv2D('conv3.2', out_channel=128, padding='VALID') \
.FullyConnected('fc0', 1024 + 512, .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu,
b_init=tf.constant_initializer(0.1)) \ b_init=tf.constant_initializer(0.1)) \
.tf.nn.dropout(keep_prob) \ .tf.nn.dropout(keep_prob) \
.FullyConnected('fc1', 512, .FullyConnected('fc1', 512, nl=tf.nn.relu,
b_init=tf.constant_initializer(0.1)) \ b_init=tf.constant_initializer(0.1)) \
.FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)() .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()
......
...@@ -29,21 +29,22 @@ class Model(ModelDesc): ...@@ -29,21 +29,22 @@ class Model(ModelDesc):
image, label = inputs image, label = inputs
l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID') with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1') l = Conv2D('conv1', image, out_channel=96, kernel_shape=11, stride=4, padding='VALID')
l = MaxPooling('pool1', l, 3, stride=2, padding='VALID') l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')
l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2) l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2') l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
l = MaxPooling('pool2', l, 3, stride=2, padding='VALID') l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')
l = Conv2D('conv3', l, out_channel=384, kernel_shape=3) l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2) l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2) l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
l = MaxPooling('pool3', l, 3, stride=2, padding='VALID') l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
l = FullyConnected('fc6', l, 4096) l = FullyConnected('fc6', l, 4096)
l = FullyConnected('fc7', l, out_dim=4096) l = FullyConnected('fc7', l, out_dim=4096)
# fc will have activation summary by default. disable this for the output layer # fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
......
...@@ -36,7 +36,7 @@ class Model(ModelDesc): ...@@ -36,7 +36,7 @@ class Model(ModelDesc):
image, label = inputs image, label = inputs
with argscope(Conv2D, kernel_shape=3): with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
# 224 # 224
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv1_1', 64) .Conv2D('conv1_1', 64)
...@@ -62,10 +62,9 @@ class Model(ModelDesc): ...@@ -62,10 +62,9 @@ class Model(ModelDesc):
.Conv2D('conv5_3', 512) .Conv2D('conv5_3', 512)
.MaxPooling('pool5', 2) .MaxPooling('pool5', 2)
# 7 # 7
.FullyConnected('fc6', 4096) .FullyConnected('fc6', 4096, nl=tf.nn.relu)
.Dropout('drop0', 0.5) .Dropout('drop0', 0.5)
.print_tensor() .FullyConnected('fc7', 4096, nl=tf.nn.relu)
.FullyConnected('fc7', 4096)
.Dropout('drop1', 0.5) .Dropout('drop1', 0.5)
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)()) .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
......
...@@ -54,7 +54,7 @@ class Model(ModelDesc): ...@@ -54,7 +54,7 @@ class Model(ModelDesc):
.Conv2D('conv2') .Conv2D('conv2')
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
.Conv2D('conv3') .Conv2D('conv3')
.FullyConnected('fc0', 512) .FullyConnected('fc0', 512, nl=tf.nn.relu)
.Dropout('dropout', 0.5) .Dropout('dropout', 0.5)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)()) .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
......
...@@ -29,17 +29,18 @@ class Model(ModelDesc): ...@@ -29,17 +29,18 @@ class Model(ModelDesc):
image = image / 128.0 - 1 image = image / 128.0 - 1
logits = (LinearWrap(image) with argscope(Conv2D, nl=tf.nn.relu):
.Conv2D('conv1', 24, 5, padding='VALID') logits = (LinearWrap(image)
.MaxPooling('pool1', 2, padding='SAME') .Conv2D('conv1', 24, 5, padding='VALID')
.Conv2D('conv2', 32, 3, padding='VALID') .MaxPooling('pool1', 2, padding='SAME')
.Conv2D('conv3', 32, 3, padding='VALID') .Conv2D('conv2', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME') .Conv2D('conv3', 32, 3, padding='VALID')
.Conv2D('conv4', 64, 3, padding='VALID') .MaxPooling('pool2', 2, padding='SAME')
.Dropout('drop', 0.5) .Conv2D('conv4', 64, 3, padding='VALID')
.FullyConnected('fc0', 512, .Dropout('drop', 0.5)
b_init=tf.constant_initializer(0.1)) .FullyConnected('fc0', 512,
.FullyConnected('linear', out_dim=10, nl=tf.identity)()) b_init=tf.constant_initializer(0.1), nl=tf.nn.relu)
.FullyConnected('linear', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
# compute the number of failed samples, for ClassificationError to use at test time # compute the number of failed samples, for ClassificationError to use at test time
......
...@@ -7,7 +7,7 @@ import numpy as np ...@@ -7,7 +7,7 @@ import numpy as np
import tensorflow as tf import tensorflow as tf
import math import math
from ._common import * from ._common import *
from ..utils import map_arg from ..utils import map_arg, logger
__all__ = ['Conv2D'] __all__ = ['Conv2D']
...@@ -15,7 +15,7 @@ __all__ = ['Conv2D'] ...@@ -15,7 +15,7 @@ __all__ = ['Conv2D']
def Conv2D(x, out_channel, kernel_shape, def Conv2D(x, out_channel, kernel_shape,
padding='SAME', stride=1, padding='SAME', stride=1,
W_init=None, b_init=None, W_init=None, b_init=None,
nl=tf.nn.relu, split=1, use_bias=True): nl=None, split=1, use_bias=True):
""" """
2D convolution on 4D inputs. 2D convolution on 4D inputs.
...@@ -59,5 +59,9 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -59,5 +59,9 @@ def Conv2D(x, out_channel, kernel_shape,
outputs = [tf.nn.conv2d(i, k, stride, padding) outputs = [tf.nn.conv2d(i, k, stride, padding)
for i, k in zip(inputs, kernels)] for i, k in zip(inputs, kernels)]
conv = tf.concat(3, outputs) conv = tf.concat(3, outputs)
if nl is None:
logger.warn("[DEPRECATED] Default nonlinearity for Conv2D and FullyConnected will be deprecated.")
logger.warn("[DEPRECATED] Please use argscope instead.")
nl = tf.nn.relu
return nl(tf.nn.bias_add(conv, b) if use_bias else conv, name='output') return nl(tf.nn.bias_add(conv, b) if use_bias else conv, name='output')
...@@ -14,7 +14,7 @@ __all__ = ['FullyConnected'] ...@@ -14,7 +14,7 @@ __all__ = ['FullyConnected']
@layer_register() @layer_register()
def FullyConnected(x, out_dim, def FullyConnected(x, out_dim,
W_init=None, b_init=None, W_init=None, b_init=None,
nl=tf.nn.relu, use_bias=True): nl=None, use_bias=True):
""" """
Fully-Connected layer. Fully-Connected layer.
...@@ -39,4 +39,8 @@ def FullyConnected(x, out_dim, ...@@ -39,4 +39,8 @@ def FullyConnected(x, out_dim,
if use_bias: if use_bias:
b = tf.get_variable('b', [out_dim], initializer=b_init) b = tf.get_variable('b', [out_dim], initializer=b_init)
prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W) prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
if nl is None:
logger.warn("[DEPRECATED] Default nonlinearity for Conv2D and FullyConnected will be deprecated.")
logger.warn("[DEPRECATED] Please use argscope instead.")
nl = tf.nn.relu
return nl(prod, name='output') return nl(prod, name='output')
...@@ -133,7 +133,7 @@ class ModelDesc(object): ...@@ -133,7 +133,7 @@ class ModelDesc(object):
:returns: the cost to minimize. a scalar variable :returns: the cost to minimize. a scalar variable
""" """
if len(inspect.getargspec(self._build_graph).args) == 3: if len(inspect.getargspec(self._build_graph).args) == 3:
logger.warn("_build_graph(self, input_vars, is_training) is deprecated! \ logger.warn("[DEPRECATED] _build_graph(self, input_vars, is_training) is deprecated! \
Use _build_graph(self, input_vars) and get_current_tower_context().is_training instead.") Use _build_graph(self, input_vars) and get_current_tower_context().is_training instead.")
self._build_graph(model_inputs, get_current_tower_context().is_training) self._build_graph(model_inputs, get_current_tower_context().is_training)
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment