Commit edecca96 authored by Yuxin Wu's avatar Yuxin Wu

sphinx docs for models/

parent b335a7ba
......@@ -36,6 +36,7 @@ import mock
MOCK_MODULES = ['scipy',
'tensorflow', 'tensorflow.contrib',
'tensorflow.python.ops',
'tensorflow.contrib.framework',
'tensorflow.models',
'tensorflow.models.rnn',
......@@ -64,10 +65,15 @@ from tensorpack.models import *
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.coverage',
'sphinx.ext.mathjax',
'sphinx.ext.napoleon',
#'sphinx.ext.coverage',
#'sphinx.ext.mathjax',
'sphinx.ext.mathbase',
'sphinx.ext.viewcode',
]
napoleon_google_docstring = True
napoleon_numpy_docstring = False
napoleon_use_rtype = False
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
......
......@@ -2,6 +2,11 @@
# File: update.sh
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
PROG_NAME=`readlink -f $0`
PROG_DIR=`dirname "$PROG_NAME"`
cd "$PROG_DIR"
make clean
#sphinx-apidoc -o modules ../tensorpack -f -d 10
make html
......@@ -69,7 +69,7 @@ BITW = 1
BITA = 2
BITG = 6
TOTAL_BATCH_SIZE = 128
BATCH_SIZE = 64
BATCH_SIZE = None
class Model(ModelDesc):
......
......@@ -7,7 +7,7 @@ from types import ModuleType
import six
import os
import os.path
# this line is necessary for TFModuleFunc to work
# this line is necessary for _TFModuleFunc to work
import tensorflow as tf # noqa: F401
from ..utils import logger
......@@ -34,8 +34,7 @@ class LinearWrap(object):
consisting of layers / symbolic functions with only one input & output.
"""
class TFModuleFunc(object):
class _TFModuleFunc(object):
def __init__(self, mod, tensor):
self._mod = mod
self._t = tensor
......@@ -43,7 +42,7 @@ class LinearWrap(object):
def __getattr__(self, name):
ret = getattr(self._mod, name)
if isinstance(ret, ModuleType):
return LinearWrap.TFModuleFunc(ret, self._t)
return LinearWrap._TFModuleFunc(ret, self._t)
else:
# assume to be a tf function
def f(*args, **kwargs):
......@@ -52,6 +51,10 @@ class LinearWrap(object):
return f
def __init__(self, tensor):
"""
Args:
tensor (tf.Tensor): the tensor to wrap
"""
self._t = tensor
def __getattr__(self, layer_name):
......@@ -76,10 +79,15 @@ class LinearWrap(object):
if layer_name != 'tf':
logger.warn("You're calling LinearWrap.__getattr__ with something neither a layer nor 'tf'!")
assert isinstance(layer, ModuleType)
return LinearWrap.TFModuleFunc(layer, self._t)
return LinearWrap._TFModuleFunc(layer, self._t)
def apply(self, func, *args, **kwargs):
""" send tensor to the first argument of a simple func"""
"""
Apply a function on the wrapped tensor.
Returns:
LinearWrap: ``LinearWrap(func(self.tensor(), *args, **kwargs))``.
"""
ret = func(self._t, *args, **kwargs)
return LinearWrap(ret)
......@@ -87,8 +95,20 @@ class LinearWrap(object):
return self._t
def tensor(self):
"""
Equivalent to ``self.__call__()``.
Returns:
tf.Tensor: the underlying wrapped tensor.
"""
return self._t
def print_tensor(self):
"""
Print the underlying tensor and return self. Can be useful to get the
name of tensors inside :class:`LinearWrap`.
:return: self
"""
print(self._t)
return self
......@@ -20,21 +20,6 @@ __all__ = ['BatchNorm', 'BatchNormV1', 'BatchNormV2']
@layer_register(log_shape=False)
def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
"""
Batch normalization layer as described in:
`Batch Normalization: Accelerating Deep Network Training by
Reducing Internal Covariance Shift <http://arxiv.org/abs/1502.03167>`_.
:param input: a NHWC or NC tensor
:param use_local_stat: bool. whether to use mean/var of this batch or the moving average.
Default to True in training and False in inference.
:param decay: decay rate. default to 0.9.
:param epsilon: default to 1e-5.
Note that only the first training tower maintains a moving average.
"""
shape = x.get_shape().as_list()
assert len(shape) in [2, 4]
......@@ -114,18 +99,8 @@ def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
@layer_register(log_shape=False)
def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
"""
Batch normalization layer as described in:
`Batch Normalization: Accelerating Deep Network Training by
Reducing Internal Covariance Shift <http://arxiv.org/abs/1502.03167>`_.
:param input: a NHWC or NC tensor
:param use_local_stat: bool. whether to use mean/var of this batch or the moving average.
Default to True in training and False in inference.
:param decay: decay rate. default to 0.9.
:param epsilon: default to 1e-5.
Note that only the first training tower maintains a moving average.
A slightly faster but equivalent version of BatchNormV1, which uses
``fused_batch_norm`` in training.
"""
shape = x.get_shape().as_list()
assert len(shape) in [2, 4]
......@@ -185,8 +160,27 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
return tf.identity(xn, name='output')
if get_tf_version() >= 12:
BatchNorm = BatchNormV2
else:
logger.warn("BatchNorm might be faster if you update TensorFlow")
BatchNorm = BatchNormV1
def BatchNorm(*args, **kwargs):
"""
Batch normalization layer, as described in the paper:
`Batch Normalization: Accelerating Deep Network Training by
Reducing Internal Covariance Shift <http://arxiv.org/abs/1502.03167>`_.
Args:
x (tf.Tensor): a NHWC or NC tensor.
use_local_stat (bool): whether to use mean/var of the current batch or the moving average.
Defaults to True in training and False in inference.
decay (float): decay rate of moving average.
epsilon (float): epsilon to avoid divide-by-zero.
Note:
* In multi-tower training, only the first training tower maintains a moving average.
* It automatically selects :meth:`BatchNormV1` or :meth:`BatchNormV2`
according to availability.
"""
if get_tf_version() >= 12:
return BatchNormV2(*args, **kwargs)
else:
logger.warn("BatchNorm might be faster if you update TensorFlow")
return BatchNormV1(*args, **kwargs)
......@@ -5,7 +5,6 @@
import tensorflow as tf
from ._common import layer_register, shape2d, shape4d
from ..utils import logger
__all__ = ['Conv2D', 'Deconv2D']
......@@ -14,21 +13,22 @@ __all__ = ['Conv2D', 'Deconv2D']
def Conv2D(x, out_channel, kernel_shape,
padding='SAME', stride=1,
W_init=None, b_init=None,
nl=None, split=1, use_bias=True):
nl=tf.identity, split=1, use_bias=True):
"""
2D convolution on 4D inputs.
:param input: a tensor of shape NHWC
:param out_channel: number of output channel
:param kernel_shape: (h, w) or a int
:param stride: (h, w) or a int. default to 1
:param padding: 'valid' or 'same'. default to 'same'
:param split: split channels as used in Alexnet. Default to 1 (no split)
:param W_init: initializer for W. default to `xavier_initializer_conv2d`.
:param b_init: initializer for b. default to zero initializer.
:param nl: nonlinearity
:param use_bias: whether to use bias. a boolean default to True
:returns: a NHWC tensor
Args:
x (tf.Tensor): a tensor of shape NHWC.
Must have known number of channels, but can have other unknown dimensions.
out_channel (int): number of output channel.
kernel_shape: (h, w) tuple or a int.
stride: (h, w) tuple or a int.
padding (str): 'valid' or 'same'. Case insensitive.
split (int): Split channels as used in Alexnet. Defaults to 1 (no split).
W_init: initializer for W. Defaults to `variance_scaling_initializer`.
b_init: initializer for b. Defaults to zero.
nl: a nonlinearity function.
use_bias (bool): whether to use bias.
"""
in_shape = x.get_shape().as_list()
in_channel = in_shape[-1]
......@@ -53,22 +53,15 @@ def Conv2D(x, out_channel, kernel_shape,
if split == 1:
conv = tf.nn.conv2d(x, W, stride, padding)
else:
# TODO rename to split later
inputs = tf.split(x, split, 3)
kernels = tf.split(W, split, 3)
outputs = [tf.nn.conv2d(i, k, stride, padding)
for i, k in zip(inputs, kernels)]
conv = tf.concat_v2(outputs, 3)
if nl is None:
logger.warn(
"[DEPRECATED] Default ReLU nonlinearity for Conv2D and FullyConnected will be deprecated. "
"Please use argscope instead.")
nl = tf.nn.relu
return nl(tf.nn.bias_add(conv, b) if use_bias else conv, name='output')
class StaticDynamicShape(object):
def __init__(self, static, dynamic):
self.static = static
self.dynamic = dynamic
......@@ -89,17 +82,18 @@ def Deconv2D(x, out_shape, kernel_shape,
"""
2D deconvolution on 4D inputs.
:param input: a tensor of shape NHWC
:param out_shape: either (h, w, channel), or just channel,
then h, w will calculated by input_shape * stride
:param kernel_shape: (h, w) or a int
:param stride: (h, w) or a int
:param padding: 'valid' or 'same'. default to 'same'
:param W_init: initializer for W. default to `xavier_initializer_conv2d`.
:param b_init: initializer for b. default to zero initializer.
:param nl: nonlinearity.
:param use_bias: whether to use bias. a boolean default to True
:returns: a NHWC tensor
Args:
x (tf.Tensor): a tensor of shape NHWC.
Must have known number of channels, but can have other unknown dimensions.
out_shape: (h, w, channel) tuple, or just a integer channel,
then (h, w) will be calculated by input_shape * stride
kernel_shape: (h, w) tuple or a int.
stride: (h, w) tuple or a int.
padding (str): 'valid' or 'same'. Case insensitive.
W_init: initializer for W. Defaults to `variance_scaling_initializer`.
b_init: initializer for b. Defaults to zero.
nl: a nonlinearity function.
use_bias (bool): whether to use bias.
"""
in_shape = x.get_shape().as_list()[1:]
in_channel = in_shape[-1]
......
......@@ -7,7 +7,6 @@ import tensorflow as tf
from ._common import layer_register
from ..tfutils import symbolic_functions as symbf
from ..utils import logger
__all__ = ['FullyConnected']
......@@ -15,17 +14,17 @@ __all__ = ['FullyConnected']
@layer_register()
def FullyConnected(x, out_dim,
W_init=None, b_init=None,
nl=None, use_bias=True):
nl=tf.identity, use_bias=True):
"""
Fully-Connected layer.
:param input: a tensor to be flattened except the first dimension.
:param out_dim: output dimension
:param W_init: initializer for W. default to `xavier_initializer_conv2d`.
:param b_init: initializer for b. default to zero initializer.
:param nl: nonlinearity
:param use_bias: whether to use bias. a boolean default to True
:returns: a 2D tensor
Fully-Connected layer. Takes a N>1D tensor and returns a 2D tensor.
Args:
x (tf.Tensor): a tensor to be flattened except for the first dimension.
out_dim (int): output dimension
W_init: initializer for W. Defaults to `variance_scaling_initializer`.
b_init: initializer for b. Defaults to zero.
nl: a nonlinearity function
use_bias (bool): whether to use bias.
"""
x = symbf.batch_flatten(x)
in_dim = x.get_shape().as_list()[1]
......@@ -39,9 +38,4 @@ def FullyConnected(x, out_dim,
if use_bias:
b = tf.get_variable('b', [out_dim], initializer=b_init)
prod = tf.nn.xw_plus_b(x, W, b) if use_bias else tf.matmul(x, W)
if nl is None:
logger.warn(
"[DEPRECATED] Default ReLU nonlinearity for Conv2D and FullyConnected will be deprecated."
" Please use argscope instead.")
nl = tf.nn.relu
return nl(prod, name='output')
......@@ -48,15 +48,18 @@ def sample(img, coords):
@layer_register()
def ImageSample(inputs, borderMode='repeat'):
"""
Sample the template image, using the given coordinate, by bilinear interpolation.
It mimics the same behavior described in:
Sample the template image using the given coordinate, by bilinear interpolation.
This was described in the paper:
`Spatial Transformer Networks <http://arxiv.org/abs/1506.02025>`_.
:param input: [template, mapping]. template of shape NHWC.
mapping of shape NHW2, where each pair of the last dimension is a (y, x) real-value
coordinate.
:param borderMode: either 'repeat' or 'constant' (0)
:returns: a NHWC output tensor.
Args:
inputs (list): [template, coords]. template has shape NHWC.
coords has shape (N,H',W',2), where each pair of the last dimension is a (y, x) real-value
coordinate.
borderMode: either "repeat" or "constant" (zero-filled)
Returns:
a (N,H',W',C) tensor.
"""
# TODO borderValue
template, mapping = inputs
......
......@@ -17,7 +17,15 @@ __all__ = ['ModelDesc', 'InputVar', 'ModelFromMetaGraph']
class InputVar(object):
""" Store metadata about input placeholders. """
def __init__(self, type, shape, name, sparse=False):
"""
Args:
type: tf type of the tensor.
shape (list):
name (str):
sparse (bool): whether to use ``tf.sparse_placeholder``.
"""
self.type = type
self.shape = shape
self.name = name
......@@ -39,7 +47,8 @@ class ModelDesc(object):
"""
Create or return (if already created) raw input TF placeholder vars in the graph.
:returns: the list of raw input vars in the graph
Returns:
list[tf.Tensor]: the list of input placeholders in the graph.
"""
if hasattr(self, 'reuse_input_vars'):
return self.reuse_input_vars
......@@ -51,7 +60,12 @@ class ModelDesc(object):
get_reuse_placehdrs = get_input_vars
def build_placeholders(self, prefix=''):
""" build placeholders with optional prefix, for each InputVar
"""
For each InputVar, create new placeholders with optional prefix and
return them. Useful when building new towers.
Returns:
list[tf.Tensor]: the list of built placeholders.
"""
input_vars = self._get_input_vars()
for v in input_vars:
......@@ -65,20 +79,25 @@ class ModelDesc(object):
return ret
def get_input_vars_desc(self):
""" return a list of `InputVar` instance"""
"""
Returns:
list[:class:`InputVar`]: list of the underlying :class:`InputVar`.
"""
return self._get_input_vars()
@abstractmethod
def _get_input_vars(self):
""":returns: a list of InputVar """
"""
:returns: a list of InputVar
"""
def build_graph(self, model_inputs):
"""
Setup the whole graph.
Build the whole symbolic graph.
:param model_inputs: a list of input variable in the graph.
:param is_training: a boolean
:returns: the cost to minimize. a scalar variable
Args:
model_inputs (list[tf.Tensor]): a list of inputs, corresponding to
InputVars of this model.
"""
if len(inspect.getargspec(self._build_graph).args) == 3:
logger.warn("[DEPRECATED] _build_graph(self, input_vars, is_training) is deprecated! \
......@@ -92,13 +111,19 @@ Use _build_graph(self, input_vars) and get_current_tower_context().is_training i
pass
def get_cost(self):
"""
Return the cost tensor in the graph. Called by some of the :class:`tensorpack.train.Trainer` which
assumes single-cost models.
"""
return self._get_cost()
def _get_cost(self, *args):
return self.cost
def get_gradient_processor(self):
""" Return a list of GradientProcessor. They will be executed in order"""
""" Return a list of :class:`tensorpack.tfutils.GradientProcessor`.
They will be executed by the trainer in the given order.
"""
return [ # SummaryGradient(),
CheckGradient()
]
......@@ -106,11 +131,15 @@ Use _build_graph(self, input_vars) and get_current_tower_context().is_training i
class ModelFromMetaGraph(ModelDesc):
"""
Load the whole exact TF graph from a saved meta_graph.
Load the exact TF graph from a saved meta_graph.
Only useful for inference.
"""
def __init__(self, filename):
"""
Args:
filename(str): file name of the saved meta graph.
"""
tf.train.import_meta_graph(filename)
all_coll = tf.get_default_graph().get_all_collection_keys()
for k in [INPUT_VARS_KEY, tf.GraphKeys.TRAINABLE_VARIABLES,
......
......@@ -14,11 +14,14 @@ __all__ = ['Maxout', 'PReLU', 'LeakyReLU', 'BNReLU']
@layer_register()
def Maxout(x, num_unit):
"""
Maxout as in `Maxout Networks <http://arxiv.org/abs/1302.4389>`_.
Maxout as in the paper `Maxout Networks <http://arxiv.org/abs/1302.4389>`_.
:param input: a NHWC or NC tensor.
:param num_unit: a int. must be divisible by C.
:returns: a NHW(C/num_unit) tensor
Args:
x (tf.Tensor): a NHWC or NC tensor. Channel has to be known.
num_unit (int): a int. Must be divisible by C.
Returns:
tf.Tensor: of shape NHW(C/num_unit).
"""
input_shape = x.get_shape().as_list()
ndim = len(input_shape)
......@@ -33,42 +36,42 @@ def Maxout(x, num_unit):
@layer_register(log_shape=False)
def PReLU(x, init=tf.constant_initializer(0.001), name=None):
def PReLU(x, init=0.001, name='output'):
"""
Parameterized relu as in `Delving Deep into Rectifiers: Surpassing
Parameterized ReLU as in the paper `Delving Deep into Rectifiers: Surpassing
Human-Level Performance on ImageNet Classification
<http://arxiv.org/abs/1502.01852>`_.
:param input: any tensor.
:param init: initializer for the p. default to 0.001.
Args:
x (tf.Tensor): input
init (float): initial value for the learnable slope.
name (str): name of the output.
"""
init = tf.constant_initializer(init)
alpha = tf.get_variable('alpha', [], initializer=init)
x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
if name is None:
name = 'output'
return tf.mul(x, 0.5, name=name)
@layer_register(use_scope=False, log_shape=False)
def LeakyReLU(x, alpha, name=None):
def LeakyReLU(x, alpha, name='output'):
"""
Leaky relu as in `Rectifier Nonlinearities Improve Neural Network Acoustic
Leaky ReLU as in paper `Rectifier Nonlinearities Improve Neural Network Acoustic
Models
<http://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf>`_.
:param input: any tensor.
:param alpha: the negative slope.
Args:
x (tf.Tensor): input
alpha (float): the slope.
"""
if name is None:
name = 'output'
return tf.maximum(x, alpha * x, name=name)
# alpha = float(alpha)
# x = ((1 + alpha) * x + (1 - alpha) * tf.abs(x))
# return tf.mul(x, 0.5, name=name)
@layer_register(log_shape=False, use_scope=False)
def BNReLU(x, name=None):
"""
A shorthand of BatchNormalization + ReLU.
"""
x = BatchNorm('bn', x)
x = tf.nn.relu(x, name=name)
return x
......@@ -18,13 +18,13 @@ __all__ = ['MaxPooling', 'FixedUnPooling', 'AvgPooling', 'GlobalAvgPooling',
@layer_register()
def MaxPooling(x, shape, stride=None, padding='VALID'):
"""
MaxPooling on images.
Max Pooling on 4D tensors.
:param input: NHWC tensor.
:param shape: int or [h, w]
:param stride: int or [h, w]. default to be shape.
:param padding: 'valid' or 'same'. default to 'valid'
:returns: NHWC tensor.
Args:
x (tf.Tensor): a NHWC tensor.
shape: int or (h, w) tuple
stride: int or (h, w) tuple. Defaults to be the same as shape.
padding (str): 'valid' or 'same'.
"""
padding = padding.upper()
shape = shape4d(shape)
......@@ -39,13 +39,13 @@ def MaxPooling(x, shape, stride=None, padding='VALID'):
@layer_register()
def AvgPooling(x, shape, stride=None, padding='VALID'):
"""
Average pooling on images.
Average Pooling on 4D tensors.
:param input: NHWC tensor.
:param shape: int or [h, w]
:param stride: int or [h, w]. default to be shape.
:param padding: 'valid' or 'same'. default to 'valid'
:returns: NHWC tensor.
Args:
x (tf.Tensor): a NHWC tensor.
shape: int or (h, w) tuple
stride: int or (h, w) tuple. Defaults to be the same as shape.
padding (str): 'valid' or 'same'.
"""
padding = padding.upper()
shape = shape4d(shape)
......@@ -60,19 +60,20 @@ def AvgPooling(x, shape, stride=None, padding='VALID'):
@layer_register()
def GlobalAvgPooling(x):
"""
Global average pooling as in `Network In Network
Global average pooling as in the paper `Network In Network
<http://arxiv.org/abs/1312.4400>`_.
:param input: NHWC tensor.
:returns: NC tensor.
Args:
x (tf.Tensor): a NHWC tensor.
Returns:
tf.Tensor: a NC tensor.
"""
assert x.get_shape().ndims == 4
return tf.reduce_mean(x, [1, 2])
# https://github.com/tensorflow/tensorflow/issues/2169
def UnPooling2x2ZeroFilled(x):
# https://github.com/tensorflow/tensorflow/issues/2169
out = tf.concat_v2([x, tf.zeros_like(x)], 3)
out = tf.concat_v2([out, tf.zeros_like(out)], 2)
......@@ -90,13 +91,13 @@ def UnPooling2x2ZeroFilled(x):
@layer_register()
def FixedUnPooling(x, shape, unpool_mat=None):
"""
Unpool the input with a fixed mat to perform kronecker product with.
Unpool the input with a fixed matrix to perform kronecker product with.
:param input: NHWC tensor
:param shape: int or [h, w]
:param unpool_mat: a tf/np matrix with size=shape. If None, will use a mat
with 1 at top-left corner.
:returns: NHWC tensor
Args:
x (tf.Tensor): a NHWC tensor
shape: int or (h, w) tuple
unpool_mat: a tf.Tensor or np.ndarray 2D matrix with size=shape.
If is None, will use a matrix with 1 at top-left corner.
"""
shape = shape2d(shape)
......@@ -129,9 +130,11 @@ def FixedUnPooling(x, shape, unpool_mat=None):
@layer_register()
def BilinearUpSample(x, shape):
"""
Deterministic bilinear upsample the input images.
:param x: input NHWC tensor
:param shape: an integer, the upsample factor
Deterministic bilinearly-upsample the input images.
Args:
x (tf.Tensor): a NHWC tensor
shape (int): the upsample factor
"""
# inp_shape = tf.shape(x)
# return tf.image.resize_bilinear(x,
......
......@@ -22,11 +22,21 @@ l2_regularizer = tf.contrib.layers.l2_regularizer
l1_regularizer = tf.contrib.layers.l1_regularizer
def regularize_cost(regex, func, name=None):
def regularize_cost(regex, func, name='regularize_cost'):
"""
Apply a regularizer on every trainable variable matching the regex.
:param func: a function that takes a tensor and return a scalar.
Args:
regex (str): a regex to match variable names, e.g. "conv.*/W"
func: the regularization function, which takes a tensor and returns a scalar tensor.
Returns:
tf.Tensor: the total regularization cost.
Example:
.. code-block:: python
cost = cost + regularize_cost("fc.*/W", l2_regularizer(1e-5))
"""
G = tf.get_default_graph()
params = G.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
......@@ -45,7 +55,14 @@ def regularize_cost(regex, func, name=None):
@layer_register(log_shape=False, use_scope=False)
def Dropout(x, keep_prob=0.5, is_training=None):
"""
:param is_training: if None, will use the current context by default.
Dropout layer as in the paper `Dropout: a Simple Way to Prevent
Neural Networks from Overfitting <http://dl.acm.org/citation.cfm?id=2670313>`_.
Args:
keep_prob: the probability that each element is kept. It is only used
when is_training=True.
is_training: If None, will use the current :class:`tensorpack.tfutils.TowerContext`
to figure out.
"""
if is_training is None:
is_training = get_current_tower_context().is_training
......
......@@ -12,12 +12,15 @@ __all__ = ['ConcatWith']
@layer_register(use_scope=False, log_shape=False)
def ConcatWith(x, dim, tensor):
"""
A wrapper around `tf.concat_v2` to support `LinearWrap`
:param x: the input tensor
:param dim: the dimension along which to concatenate
:param tensor: a tensor or list of tensor to concatenate with x.
x will be at the beginning
:return: tf.concat_v2([x] + [tensor], dim)
A wrapper around ``tf.concat`` to cooperate with :class:`LinearWrap`.
Args:
x (tf.Tensor): input
dim (int): the dimension along which to concatenate
tensor (list[tf.Tensor]): a tensor or list of tensors to concatenate with x.
x will be at the beginning
Returns:
tf.Tensor: ``tf.concat_v2([x] + tensor, dim)``
"""
if type(tensor) != list:
tensor = [tensor]
......
......@@ -12,8 +12,14 @@ __all__ = ['SoftMax']
@layer_register()
def SoftMax(x, use_temperature=False, temperature_init=1.0):
"""
A SoftMax layer (no linear projection) with optional temperature
:param x: a 2D tensor
A SoftMax layer (w/o linear projection) with optional temperature, as
defined in the paper `Distilling the Knowledge in a Neural Network
<https://arxiv.org/abs/1503.02531>`_.
Args:
x (tf.Tensor): input
use_temperature (bool): use a learnable temperature or not.
temperature_init (float): initial value of the temperature.
"""
if use_temperature:
t = tf.get_variable('invtemp', [],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment