Commit 562f0906 authored by Yuxin Wu's avatar Yuxin Wu

add variable names to docs of models/

parent 4b99af0a
...@@ -11,13 +11,16 @@ Pretrained model for (1,4,32)-ResNet18 and (1,2,6)-AlexNet are available at ...@@ -11,13 +11,16 @@ Pretrained model for (1,4,32)-ResNet18 and (1,2,6)-AlexNet are available at
They're provided in the format of numpy dictionary, so it should be very easy to port into other applications. They're provided in the format of numpy dictionary, so it should be very easy to port into other applications.
The __binary-weight 4-bit-activation ResNet-18__ model has 59.2% top-1 validation accuracy. The __binary-weight 4-bit-activation ResNet-18__ model has 59.2% top-1 validation accuracy.
Note that when (W,A,G) is set to (1,32,32), this code is also an implementation of [Binary Weight Network](https://arxiv.org/abs/1511.00363).
But with (W,A,G) set to (1,1,32), it is not equivalent to [XNOR-Net](https://arxiv.org/abs/1603.05279), although it won't be hard to implement it.
Alternative link to this page: [http://dorefa.net](http://dorefa.net) Alternative link to this page: [http://dorefa.net](http://dorefa.net)
## Preparation: ## Preparation:
To use the script. You'll need: To use the script. You'll need:
+ TensorFlow >= 0.12 + TensorFlow >= 0.12.1
+ OpenCV bindings for Python + OpenCV bindings for Python
......
...@@ -7,17 +7,17 @@ import tensorflow as tf ...@@ -7,17 +7,17 @@ import tensorflow as tf
from tensorflow.contrib.framework import add_model_variable from tensorflow.contrib.framework import add_model_variable
from tensorflow.python.training import moving_averages from tensorflow.python.training import moving_averages
from ..tfutils.common import get_tf_version
from ..tfutils.tower import get_current_tower_context from ..tfutils.tower import get_current_tower_context
from ..utils import logger, building_rtfd from ..utils import logger
from .common import layer_register from .common import layer_register
__all__ = ['BatchNorm', 'BatchNormV1', 'BatchNormV2'] __all__ = ['BatchNorm']
# decay: being too close to 1 leads to slow start-up. torch use 0.9. # decay: being too close to 1 leads to slow start-up. torch use 0.9.
# eps: torch: 1e-5. Lasagne: 1e-4 # eps: torch: 1e-5. Lasagne: 1e-4
# Deprecated. Only kept for future reference.
@layer_register(log_shape=False) @layer_register(log_shape=False)
def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5): def BatchNormV1(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
shape = x.get_shape().as_list() shape = x.get_shape().as_list()
...@@ -110,6 +110,16 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5): ...@@ -110,6 +110,16 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
decay (float): decay rate of moving average. decay (float): decay rate of moving average.
epsilon (float): epsilon to avoid divide-by-zero. epsilon (float): epsilon to avoid divide-by-zero.
Returns:
tf.Tensor: a tensor named ``output`` with the same shape of x.
Variable Names:
* ``beta``: the bias term.
* ``gamma``: the scale term. Input will be transformed by ``x * gamma + beta``.
* ``mean/EMA``: the moving average of mean.
* ``variance/EMA``: the moving average of variance.
Note: Note:
* In multi-tower training, only the first training tower maintains a moving average. * In multi-tower training, only the first training tower maintains a moving average.
This is consistent with most frameworks. This is consistent with most frameworks.
...@@ -171,6 +181,7 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5): ...@@ -171,6 +181,7 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
x, moving_mean, moving_var, beta, gamma, epsilon) x, moving_mean, moving_var, beta, gamma, epsilon)
# TODO for other towers, maybe can make it depend some op later # TODO for other towers, maybe can make it depend some op later
# TODO update it later (similar to slim) might be faster?
if ctx.is_main_training_tower: if ctx.is_main_training_tower:
with tf.control_dependencies([update_op1, update_op2]): with tf.control_dependencies([update_op1, update_op2]):
return tf.identity(xn, name='output') return tf.identity(xn, name='output')
...@@ -178,8 +189,4 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5): ...@@ -178,8 +189,4 @@ def BatchNormV2(x, use_local_stat=None, decay=0.9, epsilon=1e-5):
return tf.identity(xn, name='output') return tf.identity(xn, name='output')
if building_rtfd() or get_tf_version() >= 12: BatchNorm = BatchNormV2
BatchNorm = BatchNormV2
else:
logger.warn("BatchNorm might be faster if you update TensorFlow")
BatchNorm = BatchNormV1
...@@ -30,6 +30,14 @@ def Conv2D(x, out_channel, kernel_shape, ...@@ -30,6 +30,14 @@ def Conv2D(x, out_channel, kernel_shape,
b_init: initializer for b. Defaults to zero. b_init: initializer for b. Defaults to zero.
nl: a nonlinearity function. nl: a nonlinearity function.
use_bias (bool): whether to use bias. use_bias (bool): whether to use bias.
Returns:
tf.Tensor: a NHWC tensor named ``output``.
Variable Names:
* ``W``: weights
* ``b``: bias
""" """
in_shape = x.get_shape().as_list() in_shape = x.get_shape().as_list()
in_channel = in_shape[-1] in_channel = in_shape[-1]
...@@ -95,6 +103,14 @@ def Deconv2D(x, out_shape, kernel_shape, ...@@ -95,6 +103,14 @@ def Deconv2D(x, out_shape, kernel_shape,
b_init: initializer for b. Defaults to zero. b_init: initializer for b. Defaults to zero.
nl: a nonlinearity function. nl: a nonlinearity function.
use_bias (bool): whether to use bias. use_bias (bool): whether to use bias.
Returns:
tf.Tensor: a NHWC tensor named ``output``.
Variable Names:
* ``W``: weights
* ``b``: bias
""" """
in_shape = x.get_shape().as_list()[1:] in_shape = x.get_shape().as_list()[1:]
in_channel = in_shape[-1] in_channel = in_shape[-1]
......
...@@ -25,6 +25,14 @@ def FullyConnected(x, out_dim, ...@@ -25,6 +25,14 @@ def FullyConnected(x, out_dim,
b_init: initializer for b. Defaults to zero. b_init: initializer for b. Defaults to zero.
nl: a nonlinearity function nl: a nonlinearity function
use_bias (bool): whether to use bias. use_bias (bool): whether to use bias.
Returns:
tf.Tensor: a NC tensor named ``output``.
Variable Names:
* ``W``: weights
* ``b``: bias
""" """
x = symbf.batch_flatten(x) x = symbf.batch_flatten(x)
in_dim = x.get_shape().as_list()[1] in_dim = x.get_shape().as_list()[1]
......
...@@ -59,7 +59,7 @@ def ImageSample(inputs, borderMode='repeat'): ...@@ -59,7 +59,7 @@ def ImageSample(inputs, borderMode='repeat'):
borderMode: either "repeat" or "constant" (zero-filled) borderMode: either "repeat" or "constant" (zero-filled)
Returns: Returns:
a (N,H',W',C) tensor. tf.Tensor: a tensor named ``output`` of shape (N,H',W',C).
""" """
# TODO borderValue # TODO borderValue
template, mapping = inputs template, mapping = inputs
...@@ -102,7 +102,7 @@ def ImageSample(inputs, borderMode='repeat'): ...@@ -102,7 +102,7 @@ def ImageSample(inputs, borderMode='repeat'):
mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean mask = tf.reduce_all(mask, [3]) # bxh2xw2 boolean
mask = tf.expand_dims(mask, 3) mask = tf.expand_dims(mask, 3)
ret = ret * tf.cast(mask, tf.float32) ret = ret * tf.cast(mask, tf.float32)
return ret return tf.identity(ret, name='output')
class TestSample(TestModel): class TestSample(TestModel):
......
...@@ -21,7 +21,7 @@ def Maxout(x, num_unit): ...@@ -21,7 +21,7 @@ def Maxout(x, num_unit):
num_unit (int): a int. Must be divisible by C. num_unit (int): a int. Must be divisible by C.
Returns: Returns:
tf.Tensor: of shape NHW(C/num_unit). tf.Tensor: of shape NHW(C/num_unit) named ``output``.
""" """
input_shape = x.get_shape().as_list() input_shape = x.get_shape().as_list()
ndim = len(input_shape) ndim = len(input_shape)
...@@ -46,6 +46,10 @@ def PReLU(x, init=0.001, name='output'): ...@@ -46,6 +46,10 @@ def PReLU(x, init=0.001, name='output'):
x (tf.Tensor): input x (tf.Tensor): input
init (float): initial value for the learnable slope. init (float): initial value for the learnable slope.
name (str): name of the output. name (str): name of the output.
Variable Names:
* ``alpha``: learnable slope.
""" """
init = tf.constant_initializer(init) init = tf.constant_initializer(init)
alpha = tf.get_variable('alpha', [], initializer=init) alpha = tf.get_variable('alpha', [], initializer=init)
......
...@@ -25,6 +25,9 @@ def MaxPooling(x, shape, stride=None, padding='VALID'): ...@@ -25,6 +25,9 @@ def MaxPooling(x, shape, stride=None, padding='VALID'):
shape: int or (h, w) tuple shape: int or (h, w) tuple
stride: int or (h, w) tuple. Defaults to be the same as shape. stride: int or (h, w) tuple. Defaults to be the same as shape.
padding (str): 'valid' or 'same'. padding (str): 'valid' or 'same'.
Returns:
tf.Tensor: a NHWC tensor named ``output``.
""" """
padding = padding.upper() padding = padding.upper()
shape = shape4d(shape) shape = shape4d(shape)
...@@ -48,6 +51,9 @@ def AvgPooling(x, shape, stride=None, padding='VALID'): ...@@ -48,6 +51,9 @@ def AvgPooling(x, shape, stride=None, padding='VALID'):
shape: int or (h, w) tuple shape: int or (h, w) tuple
stride: int or (h, w) tuple. Defaults to be the same as shape. stride: int or (h, w) tuple. Defaults to be the same as shape.
padding (str): 'valid' or 'same'. padding (str): 'valid' or 'same'.
Returns:
tf.Tensor: a NHWC tensor named ``output``.
""" """
padding = padding.upper() padding = padding.upper()
shape = shape4d(shape) shape = shape4d(shape)
...@@ -69,7 +75,7 @@ def GlobalAvgPooling(x): ...@@ -69,7 +75,7 @@ def GlobalAvgPooling(x):
Args: Args:
x (tf.Tensor): a NHWC tensor. x (tf.Tensor): a NHWC tensor.
Returns: Returns:
tf.Tensor: a NC tensor. tf.Tensor: a NC tensor named ``output``.
""" """
assert x.get_shape().ndims == 4 assert x.get_shape().ndims == 4
return tf.reduce_mean(x, [1, 2], name='output') return tf.reduce_mean(x, [1, 2], name='output')
...@@ -101,6 +107,9 @@ def FixedUnPooling(x, shape, unpool_mat=None): ...@@ -101,6 +107,9 @@ def FixedUnPooling(x, shape, unpool_mat=None):
shape: int or (h, w) tuple shape: int or (h, w) tuple
unpool_mat: a tf.Tensor or np.ndarray 2D matrix with size=shape. unpool_mat: a tf.Tensor or np.ndarray 2D matrix with size=shape.
If is None, will use a matrix with 1 at top-left corner. If is None, will use a matrix with 1 at top-left corner.
Returns:
tf.Tensor: a NHWC tensor.
""" """
shape = shape2d(shape) shape = shape2d(shape)
...@@ -138,6 +147,9 @@ def BilinearUpSample(x, shape): ...@@ -138,6 +147,9 @@ def BilinearUpSample(x, shape):
Args: Args:
x (tf.Tensor): a NHWC tensor x (tf.Tensor): a NHWC tensor
shape (int): the upsample factor shape (int): the upsample factor
Returns:
tf.Tensor: a NHWC tensor.
""" """
# inp_shape = tf.shape(x) # inp_shape = tf.shape(x)
# return tf.image.resize_bilinear(x, # return tf.image.resize_bilinear(x,
......
...@@ -19,6 +19,7 @@ def ConcatWith(x, dim, tensor): ...@@ -19,6 +19,7 @@ def ConcatWith(x, dim, tensor):
dim (int): the dimension along which to concatenate dim (int): the dimension along which to concatenate
tensor (list[tf.Tensor]): a tensor or list of tensors to concatenate with x. tensor (list[tf.Tensor]): a tensor or list of tensors to concatenate with x.
x will be at the beginning x will be at the beginning
Returns: Returns:
tf.Tensor: ``tf.concat_v2([x] + tensor, dim)`` tf.Tensor: ``tf.concat_v2([x] + tensor, dim)``
""" """
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*- File: softmax.py
# File: softmax.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import tensorflow as tf import tensorflow as tf
...@@ -17,9 +16,17 @@ def SoftMax(x, use_temperature=False, temperature_init=1.0): ...@@ -17,9 +16,17 @@ def SoftMax(x, use_temperature=False, temperature_init=1.0):
<https://arxiv.org/abs/1503.02531>`_. <https://arxiv.org/abs/1503.02531>`_.
Args: Args:
x (tf.Tensor): input x (tf.Tensor): input of any dimension. Softmax will be performed on
the last dimension.
use_temperature (bool): use a learnable temperature or not. use_temperature (bool): use a learnable temperature or not.
temperature_init (float): initial value of the temperature. temperature_init (float): initial value of the temperature.
Returns:
tf.Tensor: a tensor of the same shape named ``output``.
Variable Names:
* ``invtemp``: 1.0/temperature.
""" """
if use_temperature: if use_temperature:
t = tf.get_variable('invtemp', [], t = tf.get_variable('invtemp', [],
......
...@@ -182,7 +182,7 @@ def build_patch_list(patch_list, ...@@ -182,7 +182,7 @@ def build_patch_list(patch_list,
def dump_dataflow_images(df, index=0, batched=True, def dump_dataflow_images(df, index=0, batched=True,
number=1000, output_dir=None, number=1000, output_dir=None,
scale=1, resize=None, viz=None, scale=1, resize=None, viz=None,
flipRGB=False, exit_after=True): flipRGB=False):
""" """
Dump or visualize images of a :class:`DataFlow`. Dump or visualize images of a :class:`DataFlow`.
...@@ -199,7 +199,6 @@ def dump_dataflow_images(df, index=0, batched=True, ...@@ -199,7 +199,6 @@ def dump_dataflow_images(df, index=0, batched=True,
with :func:`build_patch_list` for visualization. No visualization will happen by with :func:`build_patch_list` for visualization. No visualization will happen by
default. default.
flipRGB (bool): apply a RGB<->BGR conversion or not. flipRGB (bool): apply a RGB<->BGR conversion or not.
exit_after (bool): ``sys.exit()`` after this function.
""" """
if output_dir: if output_dir:
mkdir_p(output_dir) mkdir_p(output_dir)
...@@ -221,10 +220,7 @@ def dump_dataflow_images(df, index=0, batched=True, ...@@ -221,10 +220,7 @@ def dump_dataflow_images(df, index=0, batched=True,
for img in imgbatch: for img in imgbatch:
cnt += 1 cnt += 1
if cnt == number: if cnt == number:
if exit_after: return
sys.exit()
else:
return
if scale != 1: if scale != 1:
img = img * scale img = img * scale
if resize is not None: if resize is not None:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment