summary activation, layer with nl

28599036 · ppwwyyxx · 5ec865d8 · 28599036 · 28599036 · 28599036
Commit 28599036 authored Dec 27, 2015 by ppwwyyxx
12 changed files
--- a/dataflow/batch.py
+++ b/dataflow/batch.py
@@ -18,6 +18,14 @@ class BatchData(object):
        self.batch_size = batch_size
        self.remainder = remainder
+    def size(self):
+        ds_size = self.ds.size()
+        div = ds_size / self.batch_size
+        rem = ds_size % self.batch_size
+        if rem == 0:
+            return div
+        return div + int(self.remainder)
    def get_data(self):
        holder = []
        for data in self.ds.get_data():

--- a/dataflow/dataset/mnist.py
+++ b/dataflow/dataset/mnist.py
@@ -128,6 +128,10 @@ class Mnist(object):
        self.dataset = read_data_sets(dir)
        self.train_or_test = train_or_test
+    def size(self):
+        ds = self.dataset.train if self.train_or_test == 'train' else self.dataset.test
+        return ds.num_examples
    def get_data(self):
        ds = self.dataset.train if self.train_or_test == 'train' else self.dataset.test
        for k in xrange(ds.num_examples):

--- a/example_mnist.py
+++ b/example_mnist.py
@@ -40,21 +40,16 @@ def get_model(inputs):
    image = tf.reshape(image, [-1, IMAGE_SIZE, IMAGE_SIZE, 1])
    conv0 = Conv2D('conv0', image, out_channel=32, kernel_shape=5,
                  padding='valid')
-    conv0 = tf.nn.relu(conv0)
+    pool0 = MaxPooling('pool0', conv0, 2)
-    pool0 = tf.nn.max_pool(conv0, ksize=[1, 2, 2, 1],
+    conv1 = Conv2D('conv1', pool0, out_channel=40, kernel_shape=3)
-                           strides=[1, 2, 2, 1], padding='SAME')
+    pool1 = MaxPooling('pool1', conv1, 2)
-    conv1 = Conv2D('conv1', pool0, out_channel=40, kernel_shape=3, padding='valid')
-    conv1 = tf.nn.relu(conv1)
+    fc0 = FullyConnected('fc0', pool1, 1024)
-    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1],
-                           strides=[1, 2, 2, 1], padding='SAME')
-    feature = batch_flatten(pool1)
-    fc0 = FullyConnected('fc0', feature, 1024)
-    fc0 = tf.nn.relu(fc0)
    fc0 = tf.nn.dropout(fc0, keep_prob)
-    fc1 = FullyConnected('lr', fc0, out_dim=10)
+    # fc will have activation summary by default. disable this for the output layer
+    fc1 = FullyConnected('fc1', fc0, out_dim=10,
+                         summary_activation=False, nl=tf.identity)
    prob = tf.nn.softmax(fc1, name='output')
    y = one_hot(label, 10)
@@ -62,16 +57,16 @@ def get_model(inputs):
    cost = tf.reduce_mean(cost, name='cross_entropy_loss')
    tf.add_to_collection(COST_VARS_KEY, cost)
-    # compute the number of correctly classified samples, for ValidationAccuracy to use at test time
+    # compute the number of failed samples, for ValidationErro to use at test time
-    correct = tf.equal(
+    wrong = tf.not_equal(
        tf.cast(tf.argmax(prob, 1), tf.int32), label)
-    correct = tf.cast(correct, tf.float32)
+    wrong = tf.cast(wrong, tf.float32)
-    nr_correct = tf.reduce_sum(correct, name='correct')
+    nr_wrong = tf.reduce_sum(wrong, name='wrong')
    # monitor training accuracy
    tf.add_to_collection(
        SUMMARY_VARS_KEY,
-        1 - tf.reduce_mean(correct, name='train_error'))
+        tf.sub(1.0, tf.reduce_mean(wrong), name='train_error'))
    # weight decay on all W of fc layers
    wd_cost = tf.mul(1e-4,
@@ -79,7 +74,7 @@ def get_model(inputs):
                     name='regularize_loss')
    tf.add_to_collection(COST_VARS_KEY, wd_cost)
-    return [prob, nr_correct], tf.add_n(tf.get_collection(COST_VARS_KEY), name='cost')
+    return [prob, nr_wrong], tf.add_n(tf.get_collection(COST_VARS_KEY), name='cost')
 def main(argv=None):
    BATCH_SIZE = 128
@@ -97,11 +92,20 @@ def main(argv=None):
        output_vars, cost_var = get_model(input_vars)
        add_histogram_summary('.*/W') # monitor histogram of all W
+        global_step_var = tf.Variable(
+            0, trainable=False, name=GLOBAL_STEP_OP_NAME)
+        lr = tf.train.exponential_decay(
+            learning_rate=1e-4,
+            global_step=global_step_var,
+            decay_steps=dataset_train.size() * 50,
+            decay_rate=0.1, staircase=True, name='learning_rate')
+        tf.scalar_summary('learning_rate', lr)
        config = dict(
            dataset_train=dataset_train,
-            optimizer=tf.train.AdamOptimizer(1e-4),
+            optimizer=tf.train.AdamOptimizer(lr),
            callbacks=[
-                ValidationAccuracy(
+                ValidationError(
                    dataset_test,
                    prefix='test'),
                PeriodicSaver(LOG_DIR, period=1),

--- a/layers/_common.py
+++ b/layers/_common.py
@@ -4,15 +4,43 @@
 # Author: Yuxin Wu <ppwwyyxx@gmail.com>
 import tensorflow as tf
+from utils.summary import *
-def layer_register():
+def layer_register(summary_activation=False):
+    """
+    summary_activation: default behavior of whether to summary the output of this layer
+    """
    def wrapper(func):
        def inner(*args, **kwargs):
            name = args[0]
            assert isinstance(name, basestring)
            args = args[1:]
+            do_summary = kwargs.pop(
+                'summary_activation', summary_activation)
-            with tf.variable_scope(name):
+            with tf.variable_scope(name) as scope:
-                return func(*args, **kwargs)
+                ret = func(*args, **kwargs)
+                if do_summary:
+                    ndim = ret.get_shape().ndims
+                    assert ndim >= 2, \
+                        "Summary a scalar with histogram? Maybe use scalar instead. FIXME!"
+                    add_activation_summary(ret, scope.name)
+                return ret
        return inner
    return wrapper
+def shape2d(a):
+    """
+    a: a int or tuple/list of length 2
+    """
+    if type(a) == int:
+        return [a, a]
+    if type(a) in [list, tuple]:
+        assert len(a) == 2
+        return list(a)
+    raise RuntimeError("Illegal shape: {}".format(a))
+def shape4d(a):
+    # for use with tensorflow
+    return [1] + shape2d(a) + [1]
--- a/layers/conv2d.py
+++ b/layers/conv2d.py
@@ -5,14 +5,14 @@
 import tensorflow as tf
 import math
-from ._common import layer_register
+from ._common import *
 __all__ = ['Conv2D']
-@layer_register()
+@layer_register(summary_activation=True)
 def Conv2D(x, out_channel, kernel_shape,
-           padding='VALID', stride=None,
+           padding='VALID', stride=1,
-           W_init=None, b_init=None):
+           W_init=None, b_init=None, nl=tf.nn.relu):
    """
    kernel_shape: (h, w) or a int
    stride: (h, w) or a int
@@ -21,19 +21,10 @@ def Conv2D(x, out_channel, kernel_shape,
    in_shape = x.get_shape().as_list()
    in_channel = in_shape[-1]
-    if type(kernel_shape) == int:
+    kernel_shape = shape2d(kernel_shape)
-        kernel_shape = [kernel_shape, kernel_shape]
    padding = padding.upper()
    filter_shape = kernel_shape + [in_channel, out_channel]
+    stride = shape4d(stride)
-    if stride is None:
-        stride = [1, 1, 1, 1]
-    elif type(stride) == int:
-        stride = [1, stride, stride, 1]
-    elif type(stride) in [list, tuple]:
-        assert len(stride) == 2
-        stride = [1] + list(stride) + [1]
    if W_init is None:
        W_init = tf.truncated_normal_initializer(stddev=0.04)
@@ -44,5 +35,5 @@ def Conv2D(x, out_channel, kernel_shape,
    b = tf.get_variable('b', [out_channel], initializer=b_init)
    conv = tf.nn.conv2d(x, W, stride, padding)
-    return tf.nn.bias_add(conv, b)
+    return nl(tf.nn.bias_add(conv, b))
--- a/layers/fc.py
+++ b/layers/fc.py
@@ -6,15 +6,14 @@
 from ._common import layer_register
 import tensorflow as tf
+from utils.symbolic_functions import *
 import math
 __all__ = ['FullyConnected']
-@layer_register()
+@layer_register(summary_activation=True)
-def FullyConnected(x, out_dim, W_init=None, b_init=None):
+def FullyConnected(x, out_dim, W_init=None, b_init=None, nl=tf.nn.relu):
-    """
+    x = batch_flatten(x)
-    x: matrix of bxn
-    """
    in_dim = x.get_shape().as_list()[1]
    if W_init is None:
@@ -22,6 +21,6 @@ def FullyConnected(x, out_dim, W_init=None, b_init=None):
    if b_init is None:
        b_init = tf.constant_initializer()
-    W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init) # TODO collections
+    W = tf.get_variable('W', [in_dim, out_dim], initializer=W_init)
    b = tf.get_variable('b', [out_dim], initializer=b_init)
-    return tf.matmul(x, W) + b
+    return nl(tf.matmul(x, W) + b)
--- a/layers/pool.py
+++ b/layers/pool.py
+#!/usr/bin/env python2
+# -*- coding: UTF-8 -*-
+# File: pool.py
+# Author: Yuxin Wu <ppwwyyxx@gmail.com>
+from ._common import *
+import tensorflow as tf
+__all__ = ['MaxPooling']
+@layer_register()
+def MaxPooling(x, shape, stride=None, padding='VALID'):
+    """
+        shape, stride: int or list/tuple of length 2
+        if stride is None, use shape by default
+        padding: 'VALID' or 'SAME'
+    """
+    padding = padding.upper()
+    shape = shape4d(shape)
+    if stride is None:
+        stride = shape
+    else:
+        stride = shape4d(stride)
+    return tf.nn.max_pool(x, ksize=shape, strides=stride, padding=padding)
--- a/train.py
+++ b/train.py
@@ -40,7 +40,11 @@ def start_train(config):
    for v in output_vars:
        G.add_to_collection(OUTPUT_VARS_KEY, v)
-    global_step_var = tf.Variable(0, trainable=False, name='global_step')
+    try:
+        global_step_var = G.get_tensor_by_name(GLOBAL_STEP_VAR_NAME)
+    except KeyError:    # not created
+        global_step_var = tf.Variable(
+            0, trainable=False, name=GLOBAL_STEP_OP_NAME)
    # add some summary ops to the graph
    averager = tf.train.ExponentialMovingAverage(

--- a/utils/callback.py
+++ b/utils/callback.py
@@ -63,7 +63,7 @@ class PeriodicSaver(PeriodicCallback):
                        global_step=self.epoch_num, latest_filename='latest')
 class SummaryWriter(Callback):
-    def __init__(self, log_dir, histogram_regex=None):
+    def __init__(self, log_dir):
        self.log_dir = log_dir
        self.epoch_num = 0
@@ -100,6 +100,7 @@ class Callbacks(Callback):
    def before_train(self):
        for cb in self.callbacks:
            cb.before_train()
+        self.writer = tf.get_collection(SUMMARY_WRITER_COLLECTION_KEY)[0]
    def trigger_step(self, inputs, outputs, cost):
        for cb in self.callbacks:
@@ -108,4 +109,5 @@ class Callbacks(Callback):
    def trigger_epoch(self):
        for cb in self.callbacks:
            cb.trigger_epoch()
+        self.writer.flush()
--- a/utils/naming.py
+++ b/utils/naming.py
@@ -6,8 +6,10 @@
 DROPOUT_PROB_OP_NAME = 'dropout_prob'
 DROPOUT_PROB_VAR_NAME = 'dropout_prob:0'
+GLOBAL_STEP_OP_NAME = 'global_step'
+GLOBAL_STEP_VAR_NAME = 'global_step:0'
 SUMMARY_WRITER_COLLECTION_KEY = 'summary_writer'
-MERGE_SUMMARY_OP_NAME = 'MergeSummary/MergeSummary:0'
 INPUT_VARS_KEY = 'INPUT_VARIABLES'
 OUTPUT_VARS_KEY = 'OUTPUT_VARIABLES'

--- a/utils/utils.py
+++ b/utils/utils.py
@@ -5,7 +5,7 @@
 import tensorflow as tf
-__all__ = ['create_summary', 'add_histogram_summary']
+__all__ = ['create_summary', 'add_histogram_summary', 'add_activation_summary']
 def create_summary(name, v):
    """
@@ -19,6 +19,17 @@ def create_summary(name, v):
    s.value.add(tag=name, simple_value=v)
    return s
+def add_activation_summary(x, name=None):
+    """
+    Summary for an activation tensor x.
+    If name is None, use x.name
+    """
+    if name is None:
+        name = x.name
+    tf.histogram_summary(name + '/activations', x)
+    tf.scalar_summary(name + '/sparsity', tf.nn.zero_fraction(x))
+    # TODO avoid repeating activations on multiple GPUs
 def add_histogram_summary(regex):
    """
    Add histogram summary for all trainable variables matching the regex

--- a/utils/validation_callback.py
+++ b/utils/validation_callback.py
@@ -7,24 +7,24 @@ import tensorflow as tf
 from .stat import *
 from .callback import PeriodicCallback, Callback
 from .naming import *
-from .utils import *
+from .summary import *
-class ValidationAccuracy(PeriodicCallback):
+class ValidationError(PeriodicCallback):
    """
-    Validate the accuracy for the given correct and cost variable
+    Validate the accuracy for the given wrong and cost variable
    Use under the following setup:
-        correct_var: integer, number of correct samples in this batch
+        wrong_var: integer, number of failed samples in this batch
        ds: batched dataset
    """
    def __init__(self, ds, prefix,
                 period=1,
-                 correct_var_name='correct:0',
+                 wrong_var_name='wrong:0',
                 cost_var_name='cost:0'):
-        super(ValidationAccuracy, self).__init__(period)
+        super(ValidationError, self).__init__(period)
        self.ds = ds
        self.prefix = prefix
-        self.correct_var_name = correct_var_name
+        self.wrong_var_name = wrong_var_name
        self.cost_var_name = cost_var_name
    def get_tensor(self, name):
@@ -33,13 +33,13 @@ class ValidationAccuracy(PeriodicCallback):
    def _before_train(self):
        self.input_vars = tf.get_collection(INPUT_VARS_KEY)
        self.dropout_var = self.get_tensor(DROPOUT_PROB_VAR_NAME)
-        self.correct_var = self.get_tensor(self.correct_var_name)
+        self.wrong_var = self.get_tensor(self.wrong_var_name)
        self.cost_var = self.get_tensor(self.cost_var_name)
        self.writer = tf.get_collection(SUMMARY_WRITER_COLLECTION_KEY)[0]
    def _trigger(self):
        cnt = 0
-        correct_stat = Accuracy()
+        err_stat = Accuracy()
        cost_sum = 0
        for dp in self.ds.get_data():
            feed = {self.dropout_var: 1.0}
@@ -48,20 +48,20 @@ class ValidationAccuracy(PeriodicCallback):
            batch_size = dp[0].shape[0]   # assume batched input
            cnt += batch_size
-            correct, cost = self.sess.run(
+            wrong, cost = self.sess.run(
-                [self.correct_var, self.cost_var], feed_dict=feed)
+                [self.wrong_var, self.cost_var], feed_dict=feed)
-            correct_stat.feed(correct, batch_size)
+            err_stat.feed(wrong, batch_size)
            # each batch might not have the same size in validation
            cost_sum += cost * batch_size
        cost_avg = cost_sum / cnt
        self.writer.add_summary(
            create_summary('{}_error'.format(self.prefix),
-                           1 - correct_stat.accuracy),
+                           err_stat.accuracy),
            self.epoch_num)
        self.writer.add_summary(
            create_summary('{}_cost'.format(self.prefix),
                           cost_avg),
            self.epoch_num)
        print "{} validation after epoch {}: err={}, cost={}".format(
-            self.prefix, self.epoch_num, 1 - correct_stat.accuracy, cost_avg)
+            self.prefix, self.epoch_num, err_stat.accuracy, cost_avg)