misc updates

117fb29f · Yuxin Wu · 6b18f4c5 · 117fb29f · 117fb29f · 117fb29f
Commit 117fb29f authored Jun 25, 2016 by Yuxin Wu
8 changed files
--- a/examples/Atari2600/DQN.py
+++ b/examples/Atari2600/DQN.py
@@ -135,7 +135,6 @@ class Model(ModelDesc):
                SummaryGradient()]
    def predictor(self, state):
-        # TODO use multitower predictor to speed up training
        return self.predict_value.eval(feed_dict={'state:0': [state]})[0]
 def get_config():

--- a/examples/DoReFa-Net/README.md
+++ b/examples/DoReFa-Net/README.md
@@ -24,9 +24,9 @@ pip install --user -r tensorpack/requirements.txt
 export PYTHONPATH=$PYTHONPATH:`readlink -f tensorpack`
 ```
-+ To perform training, you'll also need [pyzmq](https://github.com/zeromq/pyzmq):
+ To perform training, you'll also need [pyzmq](https://github.com/zeromq/pyzmq) and [scipy](https://www.scipy.org/):
 ```
-pip install --user pyzmq
+pip install --user pyzmq scipy
 ```
 + Pretrained model is hosted at [google drive](https://drive.google.com/open?id=0B308TeQzmFDLa0xOeVQwcXg1ZjQ)
@@ -56,8 +56,8 @@ To eval on ILSVRC12, `path/to/ILSVRC12` must have a subdirectory named 'val' con
 Please use [github issues](https://github.com/ppwwyyxx/tensorpack/issues) for any issues related to the code.
 Send email to the authors for other questions related to the paper.
-Note that although the model uses low bitwidth weights, activations and gradients, those numbers in
+Note that although the it uses low bitwidth weights, activations and gradients, these values
-this script are still represented in `tf.float32`. We're not releasing our run-time kernel to speed up.
+here are still represented in `tf.float32`, since TensorFlow doesn't natively support low bitwidth computation.
 ## Citation

--- a/examples/DoReFa-Net/svhn-digit-dorefa.py
+++ b/examples/DoReFa-Net/svhn-digit-dorefa.py
@@ -21,7 +21,8 @@ The original experiements are performed on a proprietary framework.
 This is our attempt to reproduce it on tensorpack.
 This config, with (W,A,G)=(1,1,4), can reach 3.1~3.2% error after 150 epochs.
-With the GaussianDeform augmentor, it will reach 2.8~2.9%.
+With the GaussianDeform augmentor, it will reach 2.8~2.9%
+(we are not using this augmentor in the paper).
 """
 BITW = 1
@@ -65,13 +66,13 @@ def get_dorefa(bitW, bitA, bitG):
            x = tf.clip_by_value(x, 0.0, 1.0)
            x = quantize(x, bitG) - 0.5
            return x * maxx * 2
+    GRAD_DEFINED = True
    def fg(x):
        if bitG == 32:
            return x
        with G.gradient_override_map({"Identity": "FGGrad"}):
            return tf.identity(x)
-    GRAD_DEFINED = True
    return fw, fa, fg
 class Model(ModelDesc):

--- a/examples/load-alexnet.py
+++ b/examples/load-alexnet.py
@@ -39,17 +39,13 @@ class Model(ModelDesc):
        l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm1')
        l = MaxPooling('pool1', l, 3, stride=2, padding='VALID')
-        l = Conv2D('conv2', l, out_channel=256, kernel_shape=5,
+        l = Conv2D('conv2', l, out_channel=256, kernel_shape=5, split=2)
-                       padding='SAME', split=2)
        l = tf.nn.lrn(l, 2, bias=1.0, alpha=2e-5, beta=0.75, name='norm2')
        l = MaxPooling('pool2', l, 3, stride=2, padding='VALID')
-        l = Conv2D('conv3', l, out_channel=384, kernel_shape=3,
+        l = Conv2D('conv3', l, out_channel=384, kernel_shape=3)
-                       padding='SAME')
+        l = Conv2D('conv4', l, out_channel=384, kernel_shape=3, split=2)
-        l = Conv2D('conv4', l, out_channel=384, kernel_shape=3,
+        l = Conv2D('conv5', l, out_channel=256, kernel_shape=3, split=2)
-                       padding='SAME', split=2)
-        l = Conv2D('conv5', l, out_channel=256, kernel_shape=3,
-                       padding='SAME', split=2)
        l = MaxPooling('pool3', l, 3, stride=2, padding='VALID')
        l = FullyConnected('fc6', l, 4096)

--- a/examples/svhn-digit-convnet.py
+++ b/examples/svhn-digit-convnet.py
@@ -28,17 +28,17 @@ class Model(ModelDesc):
        image = image / 128.0 - 1
-        logits = LinearWrap(image) \
+        logits = (LinearWrap(image)
-                .Conv2D('conv1', 24, 5, padding='VALID') \
+                .Conv2D('conv1', 24, 5, padding='VALID')
-                .MaxPooling('pool1', 2, padding='SAME') \
+                .MaxPooling('pool1', 2, padding='SAME')
-                .Conv2D('conv2', 32, 3, padding='VALID') \
+                .Conv2D('conv2', 32, 3, padding='VALID')
-                .Conv2D('conv3', 32, 3, padding='VALID') \
+                .Conv2D('conv3', 32, 3, padding='VALID')
-                .MaxPooling('pool2', 2, padding='SAME') \
+                .MaxPooling('pool2', 2, padding='SAME')
-                .Conv2D('conv4', 64, 3, padding='VALID') \
+                .Conv2D('conv4', 64, 3, padding='VALID')
-                .tf.nn.dropout(keep_prob) \
+                .tf.nn.dropout(keep_prob)
                .FullyConnected('fc0', 512,
-                        b_init=tf.constant_initializer(0.1)) \
+                        b_init=tf.constant_initializer(0.1))
-                .FullyConnected('linear', out_dim=10, nl=tf.identity)()
+                .FullyConnected('linear', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='output')
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)

--- a/tensorpack/models/__init__.py
+++ b/tensorpack/models/__init__.py
@@ -30,6 +30,7 @@ class LinearWrap(object):
        def __init__(self, mod, tensor):
            self._mod = mod
            self._t = tensor
        def __getattr__(self, name):
            ret = getattr(self._mod, name)
            if isinstance(ret, ModuleType):
@@ -66,4 +67,7 @@ class LinearWrap(object):
    def __call__(self):
        return self._t
+    def tensor(self):
+        return self._t
--- a/tensorpack/models/nonlin.py
+++ b/tensorpack/models/nonlin.py
@@ -16,16 +16,20 @@ def Maxout(x, num_unit):
    """
    Maxout networks as in `Maxout Networks <http://arxiv.org/abs/1302.4389>`_.
-    :param input: a NHWC tensor.
+    :param input: a NHWC or NC tensor.
    :param num_unit: a int. must be divisible by C.
    :returns: a NHW(C/num_unit) tensor
    """
    input_shape = x.get_shape().as_list()
-    assert len(input_shape) == 4
+    ndim = len(input_shape)
-    ch = input_shape[3]
+    assert ndim == 4 or ndim == 2
-    assert ch % num_unit == 0
+    ch = input_shape[-1]
+    assert ch is not None and ch % num_unit == 0
+    if ndim == 4:
        x = tf.reshape(x, [-1, input_shape[1], input_shape[2], ch / num_unit, num_unit])
-    return tf.reduce_max(x, 4, name='output')
+    else:
+        x = tf.reshape(x, [-1, ch / num_unit, num_unit])
+    return tf.reduce_max(x, ndim, name='output')
 @layer_register(log_shape=False)
 def PReLU(x, init=tf.constant_initializer(0.001), name=None):

--- a/tensorpack/models/pool.py
+++ b/tensorpack/models/pool.py
@@ -117,7 +117,7 @@ def FixedUnPooling(x, shape, unpool_mat=None):
 @layer_register()
 def BilinearUpSample(x, shape):
    """
-    Bilinear upsample the input images.
+    Non-parametric bilinear upsample the input images.
    :param x: input NHWC tensor
    :param shape: an integer
    """