small updates

e7884dd0 · Yuxin Wu · a9563678 · e7884dd0 · e7884dd0 · e7884dd0
Commit e7884dd0 authored Mar 27, 2016 by Yuxin Wu
4 changed files
--- a/examples/cifar10_resnet.py
+++ b/examples/cifar10_resnet.py
@@ -22,9 +22,7 @@ from tensorpack.dataflow import imgaug
 CIFAR10-resnet example.
 I can reproduce the results in:
 Deep Residual Learning for Image Recognition, arxiv:1512.03385
-for n=5 and 18
-This model achieves slightly better results due to the use of the
-whole training set instead of a 95:5 train-val split.
+for n=5 and 18 (6.5% val error)
 """

 BATCH_SIZE = 128
@@ -108,7 +106,8 @@ class Model(ModelDesc):
            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
-        wd_cost = regularize_cost('.*/W', l2_regularizer(0.0002), name='regularize_loss')
+        #wd_cost = regularize_cost('.*/W', l2_regularizer(0.0002), name='regularize_loss')
+        wd_cost = 0.0001 * regularize_cost('.*/W', tf.nn.l2_loss)
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)

        add_param_summary([('.*/W', ['histogram', 'sparsity'])])   # monitor W

--- a/examples/svhn_digit_convnet.py
+++ b/examples/svhn_digit_convnet.py
@@ -20,7 +20,7 @@ from tensorpack.dataflow import imgaug

 """
 SVHN convnet.
-About 3.0% validation error after 120 epoch.  2.7% after 250 epoch.
+About 3.0% validation error after 120 epoch.  2.7% after 300 epoch.
 """

 class Model(ModelDesc):
@@ -63,9 +63,7 @@ class Model(ModelDesc):
            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
-        wd_cost = tf.mul(0.00001,
-                         regularize_cost('fc.*/W', tf.nn.l2_loss),
-                         name='regularize_loss')
+        wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001))
        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)

        add_param_summary([('.*/W', ['histogram', 'sparsity'])])   # monitor W
@@ -83,7 +81,7 @@ def get_config():
        imgaug.Resize((40, 40)),
        imgaug.BrightnessAdd(30),
        imgaug.Contrast((0.5,1.5)),
-        imgaug.GaussianDeform(
+        imgaug.GaussianDeform(  # this is slow
            [(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
            (40,40), 0.2, 3),
    ]

--- a/tensorpack/dataflow/imgaug/deform.py
+++ b/tensorpack/dataflow/imgaug/deform.py
@@ -7,6 +7,8 @@ import numpy as np

 __all__ = ['GaussianDeform', 'GaussianMap']

+# TODO really needs speedup
+
 class GaussianMap(object):
    def __init__(self, image_shape, sigma=0.5):
        assert len(image_shape) == 2

--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -13,10 +13,10 @@ __all__ = ['BatchNorm']

 # http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
 # TF batch_norm only works for 4D tensor right now: #804
-# decay: 0.999 not good for resnet, torch use 0.9 by default
+# decay: being too close to 1 leads to slow start-up, but ends up better
 # eps: torch: 1e-5. Lasagne: 1e-4
 @layer_register()
-def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
+def BatchNorm(x, use_local_stat=True, decay=0.999, epsilon=1e-5):
    """
    Batch normalization layer as described in:
    Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift