use a better bn variable name

81bb9ac2 · Yuxin Wu · 12d27154 · 81bb9ac2 · 81bb9ac2
Commit 81bb9ac2 authored Jun 18, 2016 by Yuxin Wu
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 5 deletions

tensorpack/models/batch_norm.py tensorpack/models/batch_norm.py +6 -3

tensorpack/tfutils/sessinit.py tensorpack/tfutils/sessinit.py +4 -2

No files found.
--- a/tensorpack/models/batch_norm.py
+++ b/tensorpack/models/batch_norm.py
@@ -48,17 +48,20 @@ def BatchNorm(x, use_local_stat=True, decay=0.9, epsilon=1e-5):
        batch_mean, batch_var = tf.nn.moments(x, [0], keep_dims=False)
    else:
        batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], keep_dims=False)
+    # just to make a clear name.
+    batch_mean = tf.identity(batch_mean, 'mean')
+    batch_var = tf.identity(batch_var, 'variance')

    emaname = 'EMA'
-    in_train_tower = not batch_mean.name.startswith('towerp')
-    if in_train_tower:
+    in_main_tower = not batch_mean.name.startswith('towerp')
+    if in_main_tower:
        ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
        ema_apply_op = ema.apply([batch_mean, batch_var])
        ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
    else:
        # use training-statistics in prediction
        assert not use_local_stat
-        # have to do this again to get actual name. see issue:
+        # XXX have to do this again to get actual name. see issue:
        # https://github.com/tensorflow/tensorflow/issues/2740
        ema = tf.train.ExponentialMovingAverage(decay=decay, name=emaname)
        ema_apply_op = ema.apply([batch_mean, batch_var])

--- a/tensorpack/tfutils/sessinit.py
+++ b/tensorpack/tfutils/sessinit.py
@@ -132,7 +132,8 @@ class ParamRestore(SessionInit):

    def _init(self, sess):
        sess.run(tf.initialize_all_variables())
-        variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
+        # allow restore non-trainable variables
+        variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        var_dict = dict([v.name, v] for v in variables)
        for name, value in six.iteritems(self.prms):
            if not name.endswith(':0'):
@@ -145,7 +146,8 @@ class ParamRestore(SessionInit):
            logger.info("Restoring param {}".format(name))
            varshape = tuple(var.get_shape().as_list())
            if varshape != value.shape:
-                assert np.prod(varshape) == np.prod(value.shape)
+                assert np.prod(varshape) == np.prod(value.shape), \
+                        "{}: {}!={}".format(name, varshape, value.shape)
                logger.warn("Param {} is reshaped during loading!".format(name))
                value = value.reshape(varshape)
            sess.run(var.assign(value))