update latest numbers

b5a238a7 · Yuxin Wu · 9380b5fd · b5a238a7 · b5a238a7 · b5a238a7
Commit b5a238a7 authored Apr 21, 2016 by Yuxin Wu
Showing with 8 additions and 7 deletions

examples/ResNet/README.md examples/ResNet/README.md +1 -0

examples/ResNet/cifar10-resnet.py examples/ResNet/cifar10-resnet.py +5 -5

tensorpack/models/batch_norm.py tensorpack/models/batch_norm.py +2 -2

No files found.
--- a/examples/ResNet/README.md
+++ b/examples/ResNet/README.md
@@ -2,5 +2,6 @@
 ## ResNet

 Implements the paper "Deep Residual Learning for Image Recognition", [http://arxiv.org/abs/1512.03385](http://arxiv.org/abs/1512.03385)
+with the variants proposed in "Identity Mappings in Deep Residual Networks", [https://arxiv.org/abs/1603.05027](https://arxiv.org/abs/1603.05027).

 ![cifar10](https://github.com/ppwwyyxx/tensorpack/raw/master/examples/ResNet/cifar10-resnet.png)
--- a/examples/ResNet/cifar10-resnet.py
+++ b/examples/ResNet/cifar10-resnet.py
@@ -24,10 +24,10 @@ This implementation uses the variants proposed in:
 Identity Mappings in Deep Residual Networks, arxiv:1603.05027

 I can reproduce the results for
-n=5, about 7.2% val error after 93k step with 2 TitanX (6.8it/s)
-n=18, about 6.05% val error after 62k step with 2 TitanX (about 10hr)
-n=30: a 182-layer network, about 5.5% val error after 51k step with 2 GPUs
-This model uses the whole training set instead of a 95:5 train-val split.
+n=5, about 7.1% val error after 67k step with 2 TitanX (6.1it/s)
+n=18, about 6.0% val error after 62k step with 2 TitanX (about 10hr)
+n=30: a 182-layer network, about 5.6% val error after 51k step with 2 GPUs
+This model uses the whole training set instead of a train-val split.
 """

 BATCH_SIZE = 128
@@ -168,7 +168,7 @@ def get_config():
                                      [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
        ]),
        session_config=sess_config,
-        model=Model(n=5),
+        model=Model(n=18),
        step_per_epoch=step_per_epoch,
        max_epoch=500,
    )

--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -12,10 +12,10 @@ __all__ = ['BatchNorm']

 # http://stackoverflow.com/questions/33949786/how-could-i-use-batch-normalization-in-tensorflow
 # TF batch_norm only works for 4D tensor right now: #804
-# decay: being too close to 1 leads to slow start-up, but ends up better
+# decay: being too close to 1 leads to slow start-up. torch use 0.9.
 # eps: torch: 1e-5. Lasagne: 1e-4
 @layer_register(log_shape=False)
-def BatchNorm(x, use_local_stat=True, decay=0.999, epsilon=1e-5):
+def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
    """
    Batch normalization layer as described in: