misc docs update; use virtual_batch_size only for TF>=1.5 (fix #737)

a2c36b3d · Yuxin Wu · 03f18976 · a2c36b3d · a2c36b3d · a2c36b3d
Commit a2c36b3d authored Apr 25, 2018 by Yuxin Wu
5 changed files
--- a/examples/DoReFa-Net/alexnet-dorefa.py
+++ b/examples/DoReFa-Net/alexnet-dorefa.py
@@ -41,7 +41,9 @@ Accuracy:
    With (W,A,G)=(1,2,6), 47.6% error
    With (W,A,G)=(1,2,4), 58.4% error
-    Don't train with >4 GPUs because the batch size will be different.
+    Training with 2 or 8 GPUs is supported but the result may get slightly
+    different, due to limited per-GPU batch size.
+    You may want to adjust total batch size and learning rate accordingly.
 Speed:
    About 11 iteration/s on 4 P100s. (Each epoch is set to 10000 iterations)

--- a/examples/basics/cifar-convnet.py
+++ b/examples/basics/cifar-convnet.py
@@ -15,7 +15,7 @@ A small convnet model for Cifar10 or Cifar100 dataset.
 Cifar10 trained on 1 GPU:
    91% accuracy after 50k iterations.
-    70 itr/s on P100
+    79 itr/s on P100
 Not a good model for Cifar100, just for demonstration.
 """

--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -89,8 +89,9 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
    if training is None:
        training = ctx.is_training
    training = bool(training)
+    TF_version = get_tf_version_number()
    if not training and ctx.is_training:
-        assert get_tf_version_number() >= 1.4, \
+        assert TF_version >= 1.4, \
            "Fine tuning a BatchNorm model with fixed statistics is only " \
            "supported after https://github.com/tensorflow/tensorflow/pull/12580 "
        if ctx.is_main_training_tower:  # only warn in first tower
@@ -102,15 +103,26 @@ def BatchNorm(inputs, axis=None, training=None, momentum=0.9, epsilon=1e-5,
    with rename_get_variable(
            {'moving_mean': 'mean/EMA',
             'moving_variance': 'variance/EMA'}):
-        layer = tf.layers.BatchNormalization(
+        if TF_version >= 1.5:
-            axis=axis,
+            layer = tf.layers.BatchNormalization(
-            momentum=momentum, epsilon=epsilon,
+                axis=axis,
-            center=center, scale=scale,
+                momentum=momentum, epsilon=epsilon,
-            beta_initializer=beta_initializer,
+                center=center, scale=scale,
-            gamma_initializer=gamma_initializer,
+                beta_initializer=beta_initializer,
-            virtual_batch_size=virtual_batch_size,
+                gamma_initializer=gamma_initializer,
-            fused=True
+                virtual_batch_size=virtual_batch_size,
-        )
+                fused=True
+            )
+        else:
+            assert virtual_batch_size is None, "Feature not supported in this version of TF!"
+            layer = tf.layers.BatchNormalization(
+                axis=axis,
+                momentum=momentum, epsilon=epsilon,
+                center=center, scale=scale,
+                beta_initializer=beta_initializer,
+                gamma_initializer=gamma_initializer,
+                fused=True
+            )
        xn = layer.apply(inputs, training=training, scope=tf.get_variable_scope())
    # maintain EMA only on one GPU is OK, even in replicated mode.

--- a/tests/dev/git-hooks/pre-commit
+++ b/tests/dev/git-hooks/pre-commit
@@ -6,8 +6,9 @@ GIT_ARG="--git-dir ../.git --work-tree .."
 # find out modified python files, so that we ignored unstaged files
 # exclude ../docs
-MOD=$(git $GIT_ARG status -s | grep -E '\.py$'  \
+MOD=$(git $GIT_ARG status -s \
-	| grep -E '^\b+M\b+|^A' | cut -c 4- | grep -v '../docs')
+	| grep -E '\.py$' | grep -v '../docs' \
+	| grep -E '^ *M|^ *A' | cut -c 4- )
 if [[ -n $MOD ]]; then
 	flake8 $MOD
 fi
--- a/tox.ini
+++ b/tox.ini
@@ -4,6 +4,7 @@ ignore = E265,E741,E742,E743
 exclude = .git,
 					__init__.py,
 					setup.py,
+					tensorpack/train/eager.py,
 					docs,
 					examples,
 					docs/conf.py