widely use add_moving_summary

94fc5acb · Yuxin Wu · 5a31fc32 · 94fc5acb · 94fc5acb · 94fc5acb
Commit 94fc5acb authored Jul 13, 2016 by Yuxin Wu
12 changed files
--- a/examples/Atari2600/DQN.py
+++ b/examples/Atari2600/DQN.py
@@ -16,6 +16,7 @@ from collections import deque
 from tensorpack import *
 from tensorpack.utils.concurrency import *
 from tensorpack.tfutils import symbolic_functions as symbf
+from tensorpack.tfutils.summary import add_moving_summary
 from tensorpack.RL import *
 import common
 from common import play_model, Evaluator, eval_model_multithread
@@ -95,7 +96,7 @@ class Model(ModelDesc):
        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)    #N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, max_pred_reward)
+        add_moving_summary(max_pred_reward)
        self.greedy_choice = tf.argmax(self.predict_value, 1)   # N,
        with tf.variable_scope('target'):

--- a/examples/Atari2600/README.md
+++ b/examples/Atari2600/README.md
@@ -15,10 +15,8 @@ Claimed performance in the paper can be reproduced, on several games I've tested
 A demo trained with Double-DQN on breakout game is available at [youtube](https://youtu.be/o21mddZtE5Y).
-<!--
+DQN would typically take 2~3 days of training to reach a score of 400 on breakout, but my A3C implementation only takes <2 hours on 1 GPU.
-   -DQN would typically take 2~3 days of training to reach a score of 400 on breakout, but my A3C implementation only takes <4 hours on 1 GPU.
+This is probably the fastest RL trainer you'd find.
-   -This is probably the fastest RL trainer you'd find.
-	 -->
 ## How to use

--- a/examples/DisturbLabel/mnist-disturb.py
+++ b/examples/DisturbLabel/mnist-disturb.py
@@ -42,22 +42,19 @@ class Model(mnist_example.Model):
                    .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')
-        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
-        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbolic_functions.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
+        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
+        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = tf.mul(1e-5,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(cost, wd_cost)
        self.cost = tf.add_n([wd_cost, cost], name='cost')

--- a/examples/DoReFa-Net/svhn-digit-dorefa.py
+++ b/examples/DoReFa-Net/svhn-digit-dorefa.py
@@ -151,20 +151,17 @@ class Model(ModelDesc):
        tf.get_variable = old_get_variable
        prob = tf.nn.softmax(logits, name='output')
-        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
-        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
        # compute the number of failed samples
        wrong = prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY,
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-                tf.reduce_mean(wrong, name='train_error'))
+        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
+        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(cost, wd_cost)
        add_param_summary([('.*/W', ['histogram', 'rms'])])
        self.cost = tf.add_n([cost, wd_cost], name='cost')

--- a/examples/Inception/inception-bn.py
+++ b/examples/Inception/inception-bn.py
@@ -98,24 +98,21 @@ class Model(ModelDesc):
        loss3 = tf.reduce_mean(loss3, name='loss3')
        cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1], name='weighted_cost')
-        for k in [cost, loss1, loss2, loss3]:
+        add_moving_summary([cost, loss1, loss2, loss3])
-            tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, k)
        wrong = prediction_incorrect(logits, label, 1)
        nr_wrong = tf.reduce_sum(wrong, name='wrong-top1')
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error_top1'))
        wrong = prediction_incorrect(logits, label, 5)
        nr_wrong = tf.reduce_sum(wrong, name='wrong-top5')
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error_top5'))
        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          80000, 0.7, True)
        wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(wd_cost)
        add_param_summary([('.*/W', ['histogram'])])   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
@@ -144,7 +141,7 @@ def get_data(train_or_test):
    ds = AugmentImageComponent(ds, augmentors)
    ds = BatchData(ds, BATCH_SIZE, remainder=not isTrain)
    if isTrain:
-        ds = PrefetchDataZMQ(ds, 5)
+        ds = PrefetchDataZMQ(ds, 6)
    return ds

--- a/examples/ResNet/cifar10-resnet.py
+++ b/examples/ResNet/cifar10-resnet.py
@@ -100,19 +100,17 @@ class Model(ModelDesc):
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
        wrong = prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          480000, 0.2, True)
        wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(cost, wd_cost)
        add_param_summary([('.*/W', ['histogram'])])   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')

--- a/examples/cifar-convnet.py
+++ b/examples/cifar-convnet.py
@@ -60,20 +60,18 @@ class Model(ModelDesc):
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbf.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
        # weight decay on all W of fc layers
        wd_cost = tf.mul(0.004,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(cost, wd_cost)
        add_param_summary([('.*/W', ['histogram'])])   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')

--- a/examples/mnist-convnet.py
+++ b/examples/mnist-convnet.py
@@ -9,6 +9,7 @@ import os, sys
 import argparse
 from tensorpack import *
+from tensorpack.tfutils.summary import add_moving_summary
 """
 MNIST ConvNet example.
@@ -46,20 +47,18 @@ class Model(ModelDesc):
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbolic_functions.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
        # weight decay on all W of fc layers
        wd_cost = tf.mul(1e-5,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(cost, wd_cost)
        summary.add_param_summary([('.*/W', ['histogram'])])   # monitor histogram of all W
        self.cost = tf.add_n([wd_cost, cost], name='cost')

--- a/examples/svhn-digit-convnet.py
+++ b/examples/svhn-digit-convnet.py
@@ -41,20 +41,18 @@ class Model(ModelDesc):
                .FullyConnected('linear', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='output')
-        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
-        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
        # monitor training error
-        tf.add_to_collection(
+        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-            MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
+        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
+        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001))
-        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
+        add_moving_summary(cost, wd_cost)
        add_param_summary([('.*/W', ['histogram', 'rms'])])   # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')

--- a/tensorpack/dataflow/dataset/ilsvrc.py
+++ b/tensorpack/dataflow/dataset/ilsvrc.py
@@ -107,9 +107,8 @@ class ILSVRC12(RNGDataFlow):
        command to build the above structure for `train/`:
        .. code-block:: none
+            tar xvf ILSVRC12_img_train.tar -C train && cd train
-            find -type f | parallel -P 10 'mkdir -p {/.} && tar xf {} -C {/.}'
+            find -type f -name '*.tar' | parallel -P 10 'echo {} && mkdir -p {/.} && tar xf {} -C {/.}'
            Or:
            for i in *.tar; do dir=${i%.tar}; echo $dir; mkdir -p $dir; tar xf $i -C $dir; done
        """

--- a/tensorpack/tfutils/gradproc.py
+++ b/tensorpack/tfutils/gradproc.py
@@ -6,7 +6,8 @@
 import tensorflow as tf
 from abc import ABCMeta, abstractmethod
 import re
-from ..utils import logger, MOVING_SUMMARY_VARS_KEY
+from ..utils import logger
+from .summary import add_moving_summary
 __all__ = ['GradientProcessor', 'SummaryGradient', 'CheckGradient',
           'ScaleGradient', 'MapGradient']
@@ -41,9 +42,9 @@ class SummaryGradient(GradientProcessor):
                continue
            _summaried_gradient.add(name)
            tf.histogram_summary(name + '/grad', grad)
-            tf.add_to_collection(MOVING_SUMMARY_VARS_KEY,
+            add_moving_summary(tf.sqrt(
-                                 tf.sqrt(tf.reduce_mean(tf.square(grad)),
+                tf.reduce_mean(tf.square(grad)),
-                                         name=name + '/gradRMS'))
+                name=name + '/gradRMS'))
        return grads

--- a/tensorpack/tfutils/summary.py
+++ b/tensorpack/tfutils/summary.py
@@ -79,8 +79,15 @@ def add_param_summary(summary_lists):
                for act in actions:
                    perform(p, act)
-def add_moving_summary(v):
+def add_moving_summary(v, *args):
-    tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, v)
+    """
+    :param v: tensor or list of tensor to summary
+    """
+    if not isinstance(v, list):
+        v = [v]
+    v.extend(args)
+    for x in v:
+        tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, x)
 def summary_moving_average():
    """ Create a MovingAverage op and summary for all variables in