Commit 117fb29f authored by Yuxin Wu's avatar Yuxin Wu

misc updates

parent 6b18f4c5
...@@ -135,7 +135,6 @@ class Model(ModelDesc): ...@@ -135,7 +135,6 @@ class Model(ModelDesc):
SummaryGradient()] SummaryGradient()]
def predictor(self, state): def predictor(self, state):
# TODO use multitower predictor to speed up training
return self.predict_value.eval(feed_dict={'state:0': [state]})[0] return self.predict_value.eval(feed_dict={'state:0': [state]})[0]
def get_config(): def get_config():
......
...@@ -24,9 +24,9 @@ pip install --user -r tensorpack/requirements.txt ...@@ -24,9 +24,9 @@ pip install --user -r tensorpack/requirements.txt
export PYTHONPATH=$PYTHONPATH:`readlink -f tensorpack` export PYTHONPATH=$PYTHONPATH:`readlink -f tensorpack`
``` ```
+ To perform training, you'll also need [pyzmq](https://github.com/zeromq/pyzmq): + To perform training, you'll also need [pyzmq](https://github.com/zeromq/pyzmq) and [scipy](https://www.scipy.org/):
``` ```
pip install --user pyzmq pip install --user pyzmq scipy
``` ```
+ Pretrained model is hosted at [google drive](https://drive.google.com/open?id=0B308TeQzmFDLa0xOeVQwcXg1ZjQ) + Pretrained model is hosted at [google drive](https://drive.google.com/open?id=0B308TeQzmFDLa0xOeVQwcXg1ZjQ)
...@@ -56,8 +56,8 @@ To eval on ILSVRC12, `path/to/ILSVRC12` must have a subdirectory named 'val' con ...@@ -56,8 +56,8 @@ To eval on ILSVRC12, `path/to/ILSVRC12` must have a subdirectory named 'val' con
Please use [github issues](https://github.com/ppwwyyxx/tensorpack/issues) for any issues related to the code. Please use [github issues](https://github.com/ppwwyyxx/tensorpack/issues) for any issues related to the code.
Send email to the authors for other questions related to the paper. Send email to the authors for other questions related to the paper.
Note that although the model uses low bitwidth weights, activations and gradients, those numbers in Note that although the it uses low bitwidth weights, activations and gradients, these values
this script are still represented in `tf.float32`. We're not releasing our run-time kernel to speed up. here are still represented in `tf.float32`, since TensorFlow doesn't natively support low bitwidth computation.
## Citation ## Citation
......
...@@ -21,7 +21,8 @@ The original experiements are performed on a proprietary framework. ...@@ -21,7 +21,8 @@ The original experiements are performed on a proprietary framework.
This is our attempt to reproduce it on tensorpack. This is our attempt to reproduce it on tensorpack.
This config, with (W,A,G)=(1,1,4), can reach 3.1~3.2% error after 150 epochs. This config, with (W,A,G)=(1,1,4), can reach 3.1~3.2% error after 150 epochs.
With the GaussianDeform augmentor, it will reach 2.8~2.9%. With the GaussianDeform augmentor, it will reach 2.8~2.9%
(we are not using this augmentor in the paper).
""" """
BITW = 1 BITW = 1
...@@ -65,13 +66,13 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -65,13 +66,13 @@ def get_dorefa(bitW, bitA, bitG):
x = tf.clip_by_value(x, 0.0, 1.0) x = tf.clip_by_value(x, 0.0, 1.0)
x = quantize(x, bitG) - 0.5 x = quantize(x, bitG) - 0.5
return x * maxx * 2 return x * maxx * 2
GRAD_DEFINED = True
def fg(x): def fg(x):
if bitG == 32: if bitG == 32:
return x return x
with G.gradient_override_map({"Identity": "FGGrad"}): with G.gradient_override_map({"Identity": "FGGrad"}):
return tf.identity(x) return tf.identity(x)
GRAD_DEFINED = True
return fw, fa, fg return fw, fa, fg
class Model(ModelDesc): class Model(ModelDesc):
......
...@@ -39,17 +39,13 @@ class Model(ModelDesc): ...@@ -39,17 +39,13 @@ class Model(ModelDesc):
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1') l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
l = MaxPooling('pool1', l, 3, stride=2, padding='VALID') l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')
l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
padding='SAME', split=2)
l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2') l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
l = MaxPooling('pool2', l, 3, stride=2, padding='VALID') l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')
l = Conv2D('conv3', l, out_channel=384, kernel_shape=3, l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
padding='SAME') l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
padding='SAME', split=2)
l = Conv2D('conv5', l, out_channel=256, kernel_shape=3,
padding='SAME', split=2)
l = MaxPooling('pool3', l, 3, stride=2, padding='VALID') l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
l = FullyConnected('fc6', l, 4096) l = FullyConnected('fc6', l, 4096)
......
...@@ -28,17 +28,17 @@ class Model(ModelDesc): ...@@ -28,17 +28,17 @@ class Model(ModelDesc):
image = image / 128.0 - 1 image = image / 128.0 - 1
logits = LinearWrap(image) \ logits = (LinearWrap(image)
.Conv2D('conv1', 24, 5, padding='VALID') \ .Conv2D('conv1', 24, 5, padding='VALID')
.MaxPooling('pool1', 2, padding='SAME') \ .MaxPooling('pool1', 2, padding='SAME')
.Conv2D('conv2', 32, 3, padding='VALID') \ .Conv2D('conv2', 32, 3, padding='VALID')
.Conv2D('conv3', 32, 3, padding='VALID') \ .Conv2D('conv3', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME') \ .MaxPooling('pool2', 2, padding='SAME')
.Conv2D('conv4', 64, 3, padding='VALID') \ .Conv2D('conv4', 64, 3, padding='VALID')
.tf.nn.dropout(keep_prob) \ .tf.nn.dropout(keep_prob)
.FullyConnected('fc0', 512, .FullyConnected('fc0', 512,
b_init=tf.constant_initializer(0.1)) \ b_init=tf.constant_initializer(0.1))
.FullyConnected('linear', out_dim=10, nl=tf.identity)() .FullyConnected('linear', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
......
...@@ -30,6 +30,7 @@ class LinearWrap(object): ...@@ -30,6 +30,7 @@ class LinearWrap(object):
def __init__(self, mod, tensor): def __init__(self, mod, tensor):
self._mod = mod self._mod = mod
self._t = tensor self._t = tensor
def __getattr__(self, name): def __getattr__(self, name):
ret = getattr(self._mod, name) ret = getattr(self._mod, name)
if isinstance(ret, ModuleType): if isinstance(ret, ModuleType):
...@@ -66,4 +67,7 @@ class LinearWrap(object): ...@@ -66,4 +67,7 @@ class LinearWrap(object):
def __call__(self): def __call__(self):
return self._t return self._t
def tensor(self):
return self._t
...@@ -16,16 +16,20 @@ def Maxout(x, num_unit): ...@@ -16,16 +16,20 @@ def Maxout(x, num_unit):
""" """
Maxout networks as in `Maxout Networks <http://arxiv.org/abs/1302.4389>`_. Maxout networks as in `Maxout Networks <http://arxiv.org/abs/1302.4389>`_.
:param input: a NHWC tensor. :param input: a NHWC or NC tensor.
:param num_unit: a int. must be divisible by C. :param num_unit: a int. must be divisible by C.
:returns: a NHW(C/num_unit) tensor :returns: a NHW(C/num_unit) tensor
""" """
input_shape = x.get_shape().as_list() input_shape = x.get_shape().as_list()
assert len(input_shape) == 4 ndim = len(input_shape)
ch = input_shape[3] assert ndim == 4 or ndim == 2
assert ch % num_unit == 0 ch = input_shape[-1]
assert ch is not None and ch % num_unit == 0
if ndim == 4:
x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / num_unit, num_unit]) x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / num_unit, num_unit])
return tf.reduce_max(x, 4, name='output') else:
x = tf.reshape(x, [-1, ch / num_unit, num_unit])
return tf.reduce_max(x, ndim, name='output')
@layer_register(log_shape=False) @layer_register(log_shape=False)
def PReLU(x, init=tf.constant_initializer(0.001), name=None): def PReLU(x, init=tf.constant_initializer(0.001), name=None):
......
...@@ -117,7 +117,7 @@ def FixedUnPooling(x, shape, unpool_mat=None): ...@@ -117,7 +117,7 @@ def FixedUnPooling(x, shape, unpool_mat=None):
@layer_register() @layer_register()
def BilinearUpSample(x, shape): def BilinearUpSample(x, shape):
""" """
Bilinear upsample the input images. Non-parametric bilinear upsample the input images.
:param x: input NHWC tensor :param x: input NHWC tensor
:param shape: an integer :param shape: an integer
""" """
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment