Commit e7884dd0 authored by Yuxin Wu's avatar Yuxin Wu

small updates

parent a9563678
...@@ -22,9 +22,7 @@ from tensorpack.dataflow import imgaug ...@@ -22,9 +22,7 @@ from tensorpack.dataflow import imgaug
CIFAR10-resnet example. CIFAR10-resnet example.
I can reproduce the results in: I can reproduce the results in:
Deep Residual Learning for Image Recognition, arxiv:1512.03385 Deep Residual Learning for Image Recognition, arxiv:1512.03385
for n=5 and 18 for n=5 and 18 (6.5% val error)
This model achieves slightly better results due to the use of the
whole training set instead of a 95:5 train-val split.
""" """
BATCH_SIZE = 128 BATCH_SIZE = 128
...@@ -108,7 +106,8 @@ class Model(ModelDesc): ...@@ -108,7 +106,8 @@ class Model(ModelDesc):
MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers # weight decay on all W of fc layers
wd_cost = regularize_cost('.*/W', l2_regularizer(0.0002), name='regularize_loss') #wd_cost = regularize_cost('.*/W', l2_regularizer(0.0002), name='regularize_loss')
wd_cost = 0.0001 * regularize_cost('.*/W', tf.nn.l2_loss)
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W
......
...@@ -20,7 +20,7 @@ from tensorpack.dataflow import imgaug ...@@ -20,7 +20,7 @@ from tensorpack.dataflow import imgaug
""" """
SVHN convnet. SVHN convnet.
About 3.0% validation error after 120 epoch. 2.7% after 250 epoch. About 3.0% validation error after 120 epoch. 2.7% after 300 epoch.
""" """
class Model(ModelDesc): class Model(ModelDesc):
...@@ -63,9 +63,7 @@ class Model(ModelDesc): ...@@ -63,9 +63,7 @@ class Model(ModelDesc):
MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers # weight decay on all W of fc layers
wd_cost = tf.mul(0.00001, wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001))
regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W
...@@ -83,7 +81,7 @@ def get_config(): ...@@ -83,7 +81,7 @@ def get_config():
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.BrightnessAdd(30), imgaug.BrightnessAdd(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5,1.5)),
imgaug.GaussianDeform( imgaug.GaussianDeform( # this is slow
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(40,40), 0.2, 3), (40,40), 0.2, 3),
] ]
......
...@@ -7,6 +7,8 @@ import numpy as np ...@@ -7,6 +7,8 @@ import numpy as np
__all__ = ['GaussianDeform', 'GaussianMap'] __all__ = ['GaussianDeform', 'GaussianMap']
# TODO really needs speedup
class GaussianMap(object): class GaussianMap(object):
def __init__(self, image_shape, sigma=0.5): def __init__(self, image_shape, sigma=0.5):
assert len(image_shape) == 2 assert len(image_shape) == 2
......
...@@ -13,10 +13,10 @@ __all__ = ['BatchNorm'] ...@@ -13,10 +13,10 @@ __all__ = ['BatchNorm']
# http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow # http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
# TF batch_norm only works for 4D tensor right now: #804 # TF batch_norm only works for 4D tensor right now: #804
# decay: 0.999 not good for resnet, torch use 0.9 by default # decay: being too close to 1 leads to slow start-up, but ends up better
# eps: torch: 1e-5. Lasagne: 1e-4 # eps: torch: 1e-5. Lasagne: 1e-4
@layer_register() @layer_register()
def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5): def BatchNorm(x, use_local_stat=True, decay=0.999, epsilon=1e-5):
""" """
Batch normalization layer as described in: Batch normalization layer as described in:
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment