some examples. update resnet

fe548cf0 · Yuxin Wu · 8a748e61 · fe548cf0 · fe548cf0 · fe548cf0
Commit fe548cf0 authored Mar 14, 2016 by Yuxin Wu
4 changed files
--- a/examples/cifar10_convnet.py
+++ b/examples/cifar10_convnet.py
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-# File: example_cifar10.py
+# File: cifar10_convnet.py
 # Author: Yuxin Wu <ppwwyyxx@gmail.com>
 import tensorflow as tf
@@ -160,7 +160,6 @@ if __name__ == '__main__':
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
    with tf.Graph().as_default():
-        with tf.device('/cpu:0'):
        config = get_config()
        if args.load:
            config.session_init = SaverRestore(args.load)

--- a/examples/cifar10_resnet.py
+++ b/examples/cifar10_resnet.py
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-# File: tryagain.py
+# File: cifar10-resnet-deeper.py
 # Author: Yuxin Wu <ppwwyyxx@gmail.com>
 import tensorflow as tf
@@ -19,13 +19,20 @@ from tensorpack.dataflow import imgaug
 """
 CIFAR10-resnet example.
-91.5% validation accuracy after 82 epoch (32k step)
+I can reproduce the results in:
-92.6 validation accuracy after 190 epoch
+Deep Residual Learning for Image Recognition, arxiv:1512.03385
+for n=5 and 18
+This model achieves slightly better results due to the use of the
+whole training set instead of a 95:5 train-val split.
 """
 BATCH_SIZE = 128
 class Model(ModelDesc):
+    def __init__(self, n):
+        super(Model, self).__init__()
+        self.n = n
    def _get_input_vars(self):
        return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
                InputVar(tf.int32, [None], 'label')
@@ -63,25 +70,24 @@ class Model(ModelDesc):
                    l = tf.pad(l, [[0,0], [0,0], [0,0], [in_channel//2, in_channel//2]])
                l = b2 + l
-                l = tf.nn.relu(
+                l = tf.nn.relu(l)
-                    BatchNorm('bno', l, is_training))
                return l
        l = conv('conv1', image, 16, 1)
        l = BatchNorm('bn1', l, is_training)
        l = tf.nn.relu(l)
-        for k in range(5):
+        for k in range(self.n):
            l = residual('res1.{}'.format(k), l)
        # 32,c=16
        l = residual('res2.0', l, increase_dim=True)
-        for k in range(1, 5):
+        for k in range(1, self.n):
            l = residual('res2.{}'.format(k), l)
        # 16,c=32
        l = residual('res3.0', l, increase_dim=True)
-        for k in range(1, 5):
+        for k in range(1, self.n):
            l = residual('res3.' + str(k), l)
        # 8,c=64
        l = GlobalAvgPooling('gap', l)
@@ -146,7 +152,7 @@ def get_config():
    lr = tf.train.exponential_decay(
        learning_rate=1e-1,
        global_step=get_global_step_var(),
-        decay_steps=32000,
+        decay_steps=36000,
        decay_rate=0.1, staircase=True, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)
@@ -159,7 +165,7 @@ def get_config():
            ValidationError(dataset_test, prefix='test'),
        ]),
        session_config=sess_config,
-        model=Model(),
+        model=Model(n=18),
        step_per_epoch=step_per_epoch,
        max_epoch=500,
    )

--- a/examples/svhn_digit_convnet.py
+++ b/examples/svhn_digit_convnet.py
 #!/usr/bin/env python
 # -*- coding: UTF-8 -*-
-# File: svhn_fast.py
+# File: svhn_digit_convnet.py
 # Author: Yuxin Wu <ppwwyyxx@gmail.com>
 import tensorflow as tf
@@ -137,7 +137,6 @@ if __name__ == '__main__':
        os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    with tf.Graph().as_default():
-        with tf.device('/cpu:0'):
        config = get_config()
        if args.load:
            config.session_init = SaverRestore(args.load)

--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -42,13 +42,6 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
        'gamma', [n_out],
        initializer=tf.constant_initializer(1.0))
-    # XXX hack to clear shape. see tensorflow#1162
-    if shape[0] is not None:
-        x = tf.tile(x, tf.pack([1,1,1,1]))
-        hack_shape = copy(shape)
-        hack_shape[0] = None
-        x.set_shape(hack_shape)
    batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments')
    ema = tf.train.ExponentialMovingAverage(decay=decay)