add variable names to docs of models/

562f0906 · Yuxin Wu · 4b99af0a · 562f0906 · 562f0906 · 562f0906
Commit 562f0906 authored Jan 17, 2017 by Yuxin Wu
10 changed files
--- a/examples/DoReFa-Net/README.md
+++ b/examples/DoReFa-Net/README.md
@@ -11,13 +11,16 @@ Pretrained model for (1,4,32)-ResNet18 and (1,2,6)-AlexNet are available at
 They're provided in the format of numpy dictionary, so it should be very easy to port into other applications.
 The __binary-weight 4-bit-activation ResNet-18__ model has 59.2% top-1 validation accuracy.
+Note that when (W,A,G) is set to (1,32,32), this code is also an implementation of [Binary Weight Network](https://arxiv.org/abs/1511.00363).
+But with (W,A,G) set to (1,1,32), it is not equivalent to [XNOR-Net](https://arxiv.org/abs/1603.05279), although it won't be hard to implement it.
 Alternative link to this page: [http://dorefa.net](http://dorefa.net)
 ## Preparation:
 To use the script. You'll need:
-+ TensorFlow >= 0.12
+ TensorFlow >= 0.12.1
 + OpenCV bindings for Python

--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -7,17 +7,17 @@ import tensorflow as tf
 from tensorflow.contrib.framework import add_model_variable
 from tensorflow.python.training import moving_averages
-from ..tfutils.common import get_tf_version
 from ..tfutils.tower import get_current_tower_context
-from ..utils import logger, building_rtfd
+from ..utils import logger
 from .common import layer_register
-__all__ = ['BatchNorm', 'BatchNormV1', 'BatchNormV2']
+__all__ = ['BatchNorm']
 # decay: being too close to 1 leads to slow start-up. torch use 0.9.
 # eps: torch: 1e-5. Lasagne: 1e-4
+# Deprecated. Only kept for future reference.
 @layer_register(log_shape=False)
 def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
    shape = x.get_shape().as_list()
@@ -110,6 +110,16 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
        decay (float): decay rate of moving average.
        epsilon (float): epsilon to avoid divide-by-zero.
+    Returns:
+        tf.Tensor: a tensor named ``output`` with the same shape of x.
+    Variable Names:
+    * ``beta``: the bias term.
+    * ``gamma``: the scale term. Input will be transformed by ``x * gamma + beta``.
+    * ``mean/EMA``: the moving average of mean.
+    * ``variance/EMA``: the moving average of variance.
    Note:
        * In multi-tower training, only the first training tower maintains a moving average.
          This is consistent with most frameworks.
@@ -171,6 +181,7 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
            x, moving_mean, moving_var, beta, gamma, epsilon)
    # TODO for other towers, maybe can make it depend some op later
+    # TODO update it later (similar to slim) might be faster?
    if ctx.is_main_training_tower:
        with tf.control_dependencies([update_op1, update_op2]):
            return tf.identity(xn, name='output')
@@ -178,8 +189,4 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
        return tf.identity(xn, name='output')
-if building_rtfd() or get_tf_version() >= 12:
+BatchNorm = BatchNormV2
-    BatchNorm = BatchNormV2
-else:
-    logger.warn("BatchNorm might be faster if you update TensorFlow")
-    BatchNorm = BatchNormV1
--- a/tensorpack/models/conv2d.py
+++ b/tensorpack/models/conv2d.py
@@ -30,6 +30,14 @@ def Conv2D(x, out_channel, kernel_shape,
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function.
        use_bias (bool): whether to use bias.
+    Returns:
+        tf.Tensor: a NHWC tensor named ``output``.
+    Variable Names:
+    * ``W``: weights
+    * ``b``: bias
    """
    in_shape = x.get_shape().as_list()
    in_channel = in_shape[-1]
@@ -95,6 +103,14 @@ def Deconv2D(x, out_shape, kernel_shape,
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function.
        use_bias (bool): whether to use bias.
+    Returns:
+        tf.Tensor: a NHWC tensor named ``output``.
+    Variable Names:
+    * ``W``: weights
+    * ``b``: bias
    """
    in_shape = x.get_shape().as_list()[1:]
    in_channel = in_shape[-1]

--- a/tensorpack/models/fc.py
+++ b/tensorpack/models/fc.py
@@ -25,6 +25,14 @@ def FullyConnected(x, out_dim,
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function
        use_bias (bool): whether to use bias.
+    Returns:
+        tf.Tensor: a NC tensor named ``output``.
+    Variable Names:
+    * ``W``: weights
+    * ``b``: bias
    """
    x = symbf.batch_flatten(x)
    in_dim = x.get_shape().as_list()[1]

--- a/tensorpack/models/image_sample.py
+++ b/tensorpack/models/image_sample.py
@@ -59,7 +59,7 @@ def ImageSample(inputs, borderMode='repeat'):
        borderMode: either "repeat" or "constant" (zero-filled)
    Returns:
-        a (N,H',W',C) tensor.
+        tf.Tensor: a tensor named ``output`` of shape (N,H',W',C).
    """
    # TODO borderValue
    template, mapping = inputs
@@ -102,7 +102,7 @@ def ImageSample(inputs, borderMode='repeat'):
        mask = tf.reduce_all(mask, [3])  # bxh2xw2 boolean
        mask = tf.expand_dims(mask, 3)
        ret = ret * tf.cast(mask, tf.float32)
-    return ret
+    return tf.identity(ret, name='output')
 class TestSample(TestModel):

--- a/tensorpack/models/nonlin.py
+++ b/tensorpack/models/nonlin.py
@@ -21,7 +21,7 @@ def Maxout(x, num_unit):
        num_unit (int): a int. Must be divisible by C.
    Returns:
-        tf.Tensor: of shape NHW(C/num_unit).
+        tf.Tensor: of shape NHW(C/num_unit) named ``output``.
    """
    input_shape = x.get_shape().as_list()
    ndim = len(input_shape)
@@ -46,6 +46,10 @@ def PReLU(x, init=0.001, name='output'):
        x (tf.Tensor): input
        init (float): initial value for the learnable slope.
        name (str): name of the output.
+    Variable Names:
+    * ``alpha``: learnable slope.
    """
    init = tf.constant_initializer(init)
    alpha = tf.get_variable('alpha', [], initializer=init)

--- a/tensorpack/models/pool.py
+++ b/tensorpack/models/pool.py
@@ -25,6 +25,9 @@ def MaxPooling(x, shape, stride=None, padding='VALID'):
        shape: int or (h, w) tuple
        stride: int or (h, w) tuple. Defaults to be the same as shape.
        padding (str): 'valid' or 'same'.
+    Returns:
+        tf.Tensor: a NHWC tensor named ``output``.
    """
    padding = padding.upper()
    shape = shape4d(shape)
@@ -48,6 +51,9 @@ def AvgPooling(x, shape, stride=None, padding='VALID'):
        shape: int or (h, w) tuple
        stride: int or (h, w) tuple. Defaults to be the same as shape.
        padding (str): 'valid' or 'same'.
+    Returns:
+        tf.Tensor: a NHWC tensor named ``output``.
    """
    padding = padding.upper()
    shape = shape4d(shape)
@@ -69,7 +75,7 @@ def GlobalAvgPooling(x):
    Args:
        x (tf.Tensor): a NHWC tensor.
    Returns:
-        tf.Tensor: a NC tensor.
+        tf.Tensor: a NC tensor named ``output``.
    """
    assert x.get_shape().ndims == 4
    return tf.reduce_mean(x, [1, 2], name='output')
@@ -101,6 +107,9 @@ def FixedUnPooling(x, shape, unpool_mat=None):
        shape: int or (h, w) tuple
        unpool_mat: a tf.Tensor or np.ndarray 2D matrix with size=shape.
            If is None, will use a matrix with 1 at top-left corner.
+    Returns:
+        tf.Tensor: a NHWC tensor.
    """
    shape = shape2d(shape)
@@ -138,6 +147,9 @@ def BilinearUpSample(x, shape):
    Args:
        x (tf.Tensor): a NHWC tensor
        shape (int): the upsample factor
+    Returns:
+        tf.Tensor: a NHWC tensor.
    """
    # inp_shape = tf.shape(x)
    # return tf.image.resize_bilinear(x,

--- a/tensorpack/models/shapes.py
+++ b/tensorpack/models/shapes.py
@@ -19,6 +19,7 @@ def ConcatWith(x, dim, tensor):
        dim (int): the dimension along which to concatenate
        tensor (list[tf.Tensor]): a tensor or list of tensors to concatenate with x.
            x will be at the beginning
    Returns:
        tf.Tensor: ``tf.concat_v2([x] + tensor, dim)``
    """

--- a/tensorpack/models/softmax.py
+++ b/tensorpack/models/softmax.py
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*- File: softmax.py
-# File: softmax.py
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
 import tensorflow as tf
@@ -17,9 +16,17 @@ def SoftMax(x, use_temperature=False, temperature_init=1.0):
    <https://arxiv.org/abs/1503.02531>`_.
    Args:
-        x (tf.Tensor): input
+        x (tf.Tensor): input of any dimension. Softmax will be performed on
+            the last dimension.
        use_temperature (bool): use a learnable temperature or not.
        temperature_init (float): initial value of the temperature.
+    Returns:
+        tf.Tensor: a tensor of the same shape named ``output``.
+    Variable Names:
+    * ``invtemp``: 1.0/temperature.
    """
    if use_temperature:
        t = tf.get_variable('invtemp', [],

--- a/tensorpack/utils/viz.py
+++ b/tensorpack/utils/viz.py
@@ -182,7 +182,7 @@ def build_patch_list(patch_list,
 def dump_dataflow_images(df, index=0, batched=True,
                         number=1000, output_dir=None,
                         scale=1, resize=None, viz=None,
-                         flipRGB=False, exit_after=True):
+                         flipRGB=False):
    """
    Dump or visualize images of a :class:`DataFlow`.
@@ -199,7 +199,6 @@ def dump_dataflow_images(df, index=0, batched=True,
            with :func:`build_patch_list` for visualization. No visualization will happen by
            default.
        flipRGB (bool): apply a RGB<->BGR conversion or not.
-        exit_after (bool): ``sys.exit()`` after this function.
    """
    if output_dir:
        mkdir_p(output_dir)
@@ -221,10 +220,7 @@ def dump_dataflow_images(df, index=0, batched=True,
            for img in imgbatch:
                cnt += 1
                if cnt == number:
-                    if exit_after:
+                    return
-                        sys.exit()
-                    else:
-                        return
                if scale != 1:
                    img = img * scale
                if resize is not None: