histogram summary

031dd698 · ppwwyyxx · bbe4faf4 · 031dd698 · 031dd698 · 031dd698
Commit 031dd698 authored Dec 26, 2015 by ppwwyyxx
5 changed files
--- a/example_mnist.py
+++ b/example_mnist.py
@@ -24,8 +24,8 @@ def get_model(inputs):
    """
    Args:
        inputs: a list of input variable,
-        e.g.: [input_var, label_var] with:
-            input_var: bx28x28
+        e.g.: [image_var, label_var] with:
+            image_var: bx28x28
            label_var: bx1 integer
    Returns:
        (outputs, cost)
@@ -35,19 +35,18 @@ def get_model(inputs):
    # use this variable in dropout! Tensorpack will automatically set it to 1 at test time
    keep_prob = tf.placeholder(tf.float32, shape=tuple(), name=DROPOUT_PROB_OP_NAME)

-    input, label = inputs
+    image, label = inputs

-    input = tf.reshape(input, [-1, IMAGE_SIZE, IMAGE_SIZE, 1])
-    conv0 = Conv2D('conv0', input, out_channel=32, kernel_shape=5,
+    image = tf.reshape(image, [-1, IMAGE_SIZE, IMAGE_SIZE, 1])
+    conv0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5,
                  padding='valid')
    conv0 = tf.nn.relu(conv0)
-    pool0 = tf.nn.max_pool(conv0, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
-                           padding='SAME')
-    conv1 = Conv2D('conv1', pool0, out_channel=40, kernel_shape=3,
-                  padding='valid')
+    pool0 = tf.nn.max_pool(conv0, ksize=[1, 2, 2, 1],
+                           strides=[1, 2, 2, 1], padding='SAME')
+    conv1 = Conv2D('conv1', pool0, out_channel=40, kernel_shape=3, padding='valid')
    conv1 = tf.nn.relu(conv1)
-    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1],
-                           padding='SAME')
+    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1],
+                           strides=[1, 2, 2, 1], padding='SAME')

    feature = batch_flatten(pool1)

@@ -60,19 +59,23 @@ def get_model(inputs):

    y = one_hot(label, 10)
    cost = tf.nn.softmax_cross_entropy_with_logits(fc1, y)
-    cost = tf.reduce_mean(cost, name='cost')
+    cost = tf.reduce_mean(cost)

-    # number of correctly classified samples
+    # compute the number of correctly classified samples, for ValidationAccuracy to use
    correct = tf.equal(
        tf.cast(tf.argmax(prob, 1), tf.int32), label)
    correct = tf.reduce_sum(tf.cast(correct, tf.int32), name='correct')

-    return [prob, correct], cost
+    # weight decay on all W of fc layers
+    wd_cost = 1e-4 * regularize_cost('fc.*/W', tf.nn.l2_loss)
+
+    return [prob, correct], tf.add(cost, wd_cost, name='cost')

 def main():
+    BATCH_SIZE = 128
    with tf.Graph().as_default():
-        dataset_train = BatchData(Mnist('train'), 128)
-        dataset_test = BatchData(Mnist('test'), 128, remainder=True)
+        dataset_train = BatchData(Mnist('train'), BATCH_SIZE)
+        dataset_test = BatchData(Mnist('test'), 256, remainder=True)

        sess_config = tf.ConfigProto()
        sess_config.device_count['GPU'] = 1
@@ -87,11 +90,11 @@ def main():
            dataset_train=dataset_train,
            optimizer=tf.train.AdamOptimizer(1e-4),
            callbacks=[
-                TrainingAccuracy(),
-                AccuracyValidation(dataset_test,
+                TrainingAccuracy(batch_size=BATCH_SIZE),
+                ValidationAccuracy(dataset_test,
                    prefix='test', period=1),
                PeriodicSaver(LOG_DIR, period=1),
-                SummaryWriter(LOG_DIR),
+                SummaryWriter(LOG_DIR, histogram_regex='.*/W'),
            ],
            session_config=sess_config,
            inputs=input_vars,

--- a/layers/regularize.py
+++ b/layers/regularize.py
+#!/usr/bin/env python2
+# -*- coding: UTF-8 -*-
+# File: regularize.py
+# Author: Yuxin Wu <ppwwyyxx@gmail.com>
+
+import tensorflow as tf
+import re
+
+__all__ = ['regularize_cost']
+
+def regularize_cost(regex, func):
+    G = tf.get_default_graph()
+    params = G.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
+
+    cost = 0
+    for p in params:
+        name = p.name
+        if re.search(regex, name):
+            print("Weight decay for {}".format(name))
+            cost += func(p)
+    return cost
+
--- a/train.py
+++ b/train.py
@@ -58,6 +58,6 @@ def start_train(config):
                    [train_op, cost_var] + output_vars, feed_dict=feed)
                cost = results[1]
                outputs = results[2:]
-                callbacks.trigger_step(dp, outputs, cost)
+                callbacks.trigger_step(feed, outputs, cost)

            callbacks.trigger_epoch()
--- a/utils/callback.py
+++ b/utils/callback.py
@@ -9,8 +9,6 @@ import numpy as np
 import os
 from abc import abstractmethod

-from .stat import *
-from .utils import *
 from .naming import *

 class Callback(object):
@@ -24,11 +22,11 @@ class Callback(object):
        Called before starting iterative training
        """

-    def trigger_step(self, dp, outputs, cost):
+    def trigger_step(self, inputs, outputs, cost):
        """
        Callback to be triggered after every step (every backpropagation)
        Args:
-            dp: the input dict fed into the graph
+            inputs: the input dict fed into the graph
            outputs: list of output values after running this dp
            cost: the cost value after running this dp
        """
@@ -54,105 +52,6 @@ class PeriodicCallback(Callback):
    def _trigger(self):
        pass

-class AccuracyValidation(PeriodicCallback):
-    """
-    Validate the accuracy for the given correct and cost variable
-    Use under the following setup:
-        correct_var: integer, number of correct samples in this batch
-        ds: batched dataset
-    """
-    def __init__(self, ds, prefix,
-                 period=1,
-                 correct_var_name='correct:0',
-                 cost_var_name='cost:0'):
-        super(AccuracyValidation, self).__init__(period)
-        self.ds = ds
-        self.prefix = prefix
-
-        self.correct_var_name = correct_var_name
-        self.cost_var_name = cost_var_name
-
-    def get_tensor(self, name):
-        return self.graph.get_tensor_by_name(name)
-
-    def _before_train(self):
-        self.input_vars = self.graph.get_collection(INPUT_VARS_KEY)
-        self.dropout_var = self.get_tensor(DROPOUT_PROB_VAR_NAME)
-        self.correct_var = self.get_tensor(self.correct_var_name)
-        self.cost_var = self.get_tensor(self.cost_var_name)
-        self.writer = tf.get_collection(SUMMARY_WRITER_COLLECTION_KEY)[0]
-
-    def _trigger(self):
-        cnt = 0
-        correct_stat = Accuracy()
-        cost_sum = 0
-        for dp in self.ds.get_data():
-            feed = {self.dropout_var: 1.0}
-            feed.update(dict(zip(self.input_vars, dp)))
-
-            batch_size = dp[0].shape[0]   # assume batched input
-
-            cnt += batch_size
-            correct, cost = self.sess.run(
-                [self.correct_var, self.cost_var], feed_dict=feed)
-            correct_stat.feed(correct, batch_size)
-            # each batch might not have the same size in validation
-            cost_sum += cost * batch_size
-
-        cost_avg = cost_sum / cnt
-        self.writer.add_summary(
-            create_summary('{} accuracy'.format(self.prefix),
-                           correct_stat.accuracy),
-            self.epoch_num)
-        self.writer.add_summary(
-            create_summary('{} cost'.format(self.prefix),
-                           cost_avg),
-            self.epoch_num)
-        print "{} validation after epoch {}: acc={}, cost={}".format(
-            self.prefix, self.epoch_num, correct_stat.accuracy, cost_avg)
-
-class TrainingAccuracy(Callback):
-    def __init__(self, correct_var_name='correct:0'):
-        """
-            correct_var: number of correct sample in this batch
-        """
-        self.correct_var_name = correct_var_name
-        self.epoch_num = 0
-
-    def _before_train(self):
-        self.writer = tf.get_collection(SUMMARY_WRITER_COLLECTION_KEY)[0]
-        output_vars = self.graph.get_collection(OUTPUT_VARS_KEY)
-        for idx, var in enumerate(output_vars):
-            if var.name == self.correct_var_name:
-                self.correct_output_idx = idx
-                break
-        else:
-            raise RuntimeError(
-                "'correct' variable must be in the model outputs to use TrainingAccuracy")
-        self.running_cost = StatCounter()
-        self.running_acc = Accuracy()
-
-    def trigger_step(self, inputs, outputs, cost):
-        self.running_cost.feed(cost)
-        self.running_acc.feed(
-            outputs[self.correct_output_idx],
-            inputs[0].shape[0]) # assume batch input
-
-    def trigger_epoch(self):
-        self.epoch_num += 1
-        print('Training average in Epoch {}: cost={}, acc={}'.format
-              (self.epoch_num, self.running_cost.average,
-              self.running_acc.accuracy))
-        self.writer.add_summary(
-            create_summary('training average accuracy', self.running_acc.accuracy),
-            self.epoch_num)
-        self.writer.add_summary(
-            create_summary('training average cost', self.running_cost.average),
-            self.epoch_num)
-
-        self.running_cost.reset()
-        self.running_acc.reset()
-
 class PeriodicSaver(PeriodicCallback):
    def __init__(self, log_dir, period=1):
        super(PeriodicSaver, self).__init__(period)
@@ -166,25 +65,35 @@ class PeriodicSaver(PeriodicCallback):
                        global_step=self.epoch_num, latest_filename='latest')

 class SummaryWriter(Callback):
-    def __init__(self, log_dir):
+    def __init__(self, log_dir, histogram_regex=None):
        self.log_dir = log_dir
        self.epoch_num = 0
+        self.hist_regex = histogram_regex

    def _before_train(self):
-        sess = tf.get_default_session()
-        graph = tf.get_default_graph()
        self.writer = tf.train.SummaryWriter(
-            self.log_dir, graph_def=sess.graph_def)
-        graph.add_to_collection(SUMMARY_WRITER_COLLECTION_KEY, self.writer)
+            self.log_dir, graph_def=self.sess.graph_def)
+        self.graph.add_to_collection(SUMMARY_WRITER_COLLECTION_KEY, self.writer)
+
+        # create some summary
+        if self.hist_regex is not None:
+            import re
+            params = self.graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
+            for p in params:
+                name = p.name
+                if re.search(self.hist_regex, name):
+                    tf.histogram_summary(name, p)
+
        self.summary_op = tf.merge_all_summaries()

-    def trigger_step(self, dp, outputs, cost):
-        self.last_dp = dp
+    def trigger_step(self, inputs, outputs, cost):
+        self.last_dp = inputs

    def trigger_epoch(self):
        # check if there is any summary
        if self.summary_op is None:
            return
+
        summary_str = self.summary_op.eval(self.last_dp)
        self.epoch_num += 1
        self.writer.add_summary(summary_str, self.epoch_num)
@@ -196,6 +105,8 @@ class Callbacks(Callback):
            if type(cb) == SummaryWriter:
                callbacks.insert(0, callbacks.pop(idx))
                break
+        else:
+            raise RuntimeError("callbacks must contain a SummaryWriter!")

        self.callbacks = callbacks

@@ -203,9 +114,9 @@ class Callbacks(Callback):
        for cb in self.callbacks:
            cb.before_train()

-    def trigger_step(self, dp, outputs, cost):
+    def trigger_step(self, inputs, outputs, cost):
        for cb in self.callbacks:
-            cb.trigger_step(dp, outputs, cost)
+            cb.trigger_step(inputs, outputs, cost)

    def trigger_epoch(self):
        for cb in self.callbacks:

--- a/utils/validation_callback.py
+++ b/utils/validation_callback.py
+#!/usr/bin/env python2
+# -*- coding: UTF-8 -*-
+# File: validation_callback.py
+# Author: Yuxin Wu <ppwwyyxx@gmail.com>
+
+import tensorflow as tf
+from .stat import *
+from .callback import PeriodicCallback, Callback
+from .naming import *
+from .utils import *
+
+class ValidationAccuracy(PeriodicCallback):
+    """
+    Validate the accuracy for the given correct and cost variable
+    Use under the following setup:
+        correct_var: integer, number of correct samples in this batch
+        ds: batched dataset
+    """
+    def __init__(self, ds, prefix,
+                 period=1,
+                 correct_var_name='correct:0',
+                 cost_var_name='cost:0'):
+        super(ValidationAccuracy, self).__init__(period)
+        self.ds = ds
+        self.prefix = prefix
+
+        self.correct_var_name = correct_var_name
+        self.cost_var_name = cost_var_name
+
+    def get_tensor(self, name):
+        return self.graph.get_tensor_by_name(name)
+
+    def _before_train(self):
+        self.input_vars = self.graph.get_collection(INPUT_VARS_KEY)
+        self.dropout_var = self.get_tensor(DROPOUT_PROB_VAR_NAME)
+        self.correct_var = self.get_tensor(self.correct_var_name)
+        self.cost_var = self.get_tensor(self.cost_var_name)
+        self.writer = tf.get_collection(SUMMARY_WRITER_COLLECTION_KEY)[0]
+
+    def _trigger(self):
+        cnt = 0
+        correct_stat = Accuracy()
+        cost_sum = 0
+        for dp in self.ds.get_data():
+            feed = {self.dropout_var: 1.0}
+            feed.update(dict(zip(self.input_vars, dp)))
+
+            batch_size = dp[0].shape[0]   # assume batched input
+
+            cnt += batch_size
+            correct, cost = self.sess.run(
+                [self.correct_var, self.cost_var], feed_dict=feed)
+            correct_stat.feed(correct, batch_size)
+            # each batch might not have the same size in validation
+            cost_sum += cost * batch_size
+
+        cost_avg = cost_sum / cnt
+        self.writer.add_summary(
+            create_summary('{} accuracy'.format(self.prefix),
+                           correct_stat.accuracy),
+            self.epoch_num)
+        self.writer.add_summary(
+            create_summary('{} cost'.format(self.prefix),
+                           cost_avg),
+            self.epoch_num)
+        print "{} validation after epoch {}: acc={}, cost={}".format(
+            self.prefix, self.epoch_num, correct_stat.accuracy, cost_avg)
+
+class TrainingAccuracy(Callback):
+    """
+    Record the accuracy and cost during each step of trianing.
+    The result is a running average, thus not directly comparable with ValidationAccuracy
+    """
+    def __init__(self, batch_size, correct_var_name='correct:0'):
+        """
+            correct_var: number of correct sample in this batch
+        """
+        self.correct_var_name = correct_var_name
+        self.batch_size = batch_size
+        self.epoch_num = 0
+
+    def _before_train(self):
+        self.writer = tf.get_collection(SUMMARY_WRITER_COLLECTION_KEY)[0]
+        output_vars = self.graph.get_collection(OUTPUT_VARS_KEY)
+        for idx, var in enumerate(output_vars):
+            if var.name == self.correct_var_name:
+                self.correct_output_idx = idx
+                break
+        else:
+            raise RuntimeError(
+                "'correct' variable must be in the model outputs to use TrainingAccuracy")
+        self.running_cost = StatCounter()
+        self.running_acc = Accuracy()
+
+    def trigger_step(self, inputs, outputs, cost):
+        self.running_cost.feed(cost)
+        self.running_acc.feed(
+            outputs[self.correct_output_idx],
+            self.batch_size) # assume batch input
+
+    def trigger_epoch(self):
+        self.epoch_num += 1
+        print('Training average in Epoch {}: cost={}, acc={}'.format
+              (self.epoch_num, self.running_cost.average,
+              self.running_acc.accuracy))
+        self.writer.add_summary(
+            create_summary('training average accuracy', self.running_acc.accuracy),
+            self.epoch_num)
+        self.writer.add_summary(
+            create_summary('training average cost', self.running_cost.average),
+            self.epoch_num)
+
+        self.running_cost.reset()
+        self.running_acc.reset()