refine docs and examples

2634a254 · Yuxin Wu · df74a4a9 · 2634a254 · 2634a254 · 2634a254
Commit 2634a254 authored Oct 15, 2016 by Yuxin Wu
12 changed files
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@ Neural Network Toolbox on TensorFlow
 Still in development, but usable.
-See some interesting [examples](examples) to learn about the framework:
+See some interesting [training examples](examples) to learn about the framework:
 + [DoReFa-Net: training binary / low bitwidth CNN](examples/DoReFa-Net)
 + [Double-DQN and A3C for playing Atari games](examples/Atari2600)

--- a/examples/Atari2600/DQN.py
+++ b/examples/Atari2600/DQN.py
@@ -164,11 +164,11 @@ def get_config():
            ModelSaver(),
            ScheduledHyperParamSetter('learning_rate',
                [(150, 4e-4), (250, 1e-4), (350, 5e-5)]),
-            HumanHyperParamSetter('learning_rate', 'hyper.txt'),
-            HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'),
            RunOp(lambda: M.update_target_param()),
            dataset_train,
            PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['fct/output']), 3),
+            #HumanHyperParamSetter('learning_rate', 'hyper.txt'),
+            #HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'),
        ]),
        # save memory for multiprocess evaluator
        session_config=get_default_sess_config(0.6),

--- a/examples/DisturbLabel/mnist-disturb.py
+++ b/examples/DisturbLabel/mnist-disturb.py
@@ -26,9 +26,8 @@ IMAGE_SIZE = 28
 class Model(mnist_example.Model):
    def _build_graph(self, input_vars):
        image, label = input_vars
-        image = tf.expand_dims(image, 3)    # add a single channel
+        image = tf.expand_dims(image, 3)
        with argscope(Conv2D, kernel_shape=5):
            logits = (LinearWrap(image) # the starting brace is only for line-breaking
@@ -40,17 +39,13 @@ class Model(mnist_example.Model):
                    .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')
-        # compute the number of failed samples, for ClassificationError to use at test time
        wrong = symbolic_functions.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
-        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        # weight decay on all W of fc layers
+        wd_cost = tf.mul(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
-        wd_cost = tf.mul(1e-5,
-                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
        add_moving_summary(cost, wd_cost)

--- a/examples/DisturbLabel/svhn-disturb.py
+++ b/examples/DisturbLabel/svhn-disturb.py
@@ -44,7 +44,7 @@ svhn_example.get_data = get_data
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
-    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') # nargs='*' in multi mode
+    parser.add_argument('--gpu', help='a gpu to use')
    parser.add_argument('--load', help='load model')
    parser.add_argument('--prob', help='disturb prob', type=float, required=True)
    args = parser.parse_args()
@@ -57,6 +57,4 @@ if __name__ == '__main__':
    config = get_config(args.prob)
    if args.load:
        config.session_init = SaverRestore(args.load)
-    if args.gpu:
-        config.nr_tower = len(args.gpu.split(','))
    QueueInputTrainer(config).train()
--- a/examples/DoReFa-Net/alexnet-dorefa.py
+++ b/examples/DoReFa-Net/alexnet-dorefa.py
@@ -228,8 +228,7 @@ def get_config():
        dataset=data_train,
        optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
        callbacks=Callbacks([
-            StatPrinter(),
+            StatPrinter(), ModelSaver(),
-            ModelSaver(),
            #HumanHyperParamSetter('learning_rate'),
            ScheduledHyperParamSetter(
                'learning_rate', [(56, 2e-5), (64, 4e-6)]),

--- a/examples/Inception/inception-bn.py
+++ b/examples/Inception/inception-bn.py
@@ -154,8 +154,6 @@ def get_config():
    step_per_epoch = 5000
    dataset_val = get_data('val')
-    sess_config = get_default_sess_config(0.99)
    lr = tf.Variable(0.045, trainable=False, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)
@@ -163,8 +161,7 @@ def get_config():
        dataset=dataset_train,
        optimizer=tf.train.MomentumOptimizer(lr, 0.9),
        callbacks=Callbacks([
-            StatPrinter(),
+            StatPrinter(), ModelSaver(),
-            ModelSaver(),
            InferenceRunner(dataset_val, [
                ClassificationError('wrong-top1', 'val-top1-error'),
                ClassificationError('wrong-top5', 'val-top5-error')]),
@@ -174,7 +171,7 @@ def get_config():
                                       (19, 3e-3), (24, 1e-3), (26, 2e-4),
                                       (30, 5e-5) ])
        ]),
-        session_config=sess_config,
+        session_config=get_default_sess_config(0.99),
        model=Model(),
        step_per_epoch=step_per_epoch,
        max_epoch=80,

--- a/examples/Inception/inceptionv3.py
+++ b/examples/Inception/inceptionv3.py
@@ -151,7 +151,6 @@ class Model(ModelDesc):
                ], name='concat')
            for x in ['a', 'b']:
                with tf.variable_scope('incep-8-2048{}'.format(x)) as scope:
-                    #print scope.name
                    br11 = Conv2D('conv11', l, 320, 1)
                    br33 = Conv2D('conv133r', l, 384, 1)
                    br33 = tf.concat(3, [
@@ -261,8 +260,6 @@ def get_config():
    dataset_train = get_data('train')
    dataset_val = get_data('val')
-    sess_config = get_default_sess_config(0.9)
    lr = tf.Variable(0.045, trainable=False, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)
@@ -280,7 +277,7 @@ def get_config():
                                       (41, 8e-5), (48, 1e-5), (53, 2e-6)]),
            HumanHyperParamSetter('learning_rate')
        ]),
-        session_config=sess_config,
+        session_config=get_default_sess_config(0.9),
        model=Model(),
        step_per_epoch=5000,
        max_epoch=100,

--- a/examples/README.md
+++ b/examples/README.md
 # tensorpack examples
-Examples with __reproducible__ and meaningful performance.
+Training examples with __reproducible__ and meaningful performance.
-+ [An illustrative mnist example](mnist-convnet.py)
+ [An illustrative mnist example with explanation of the framework](mnist-convnet.py)
 + [A tiny SVHN ConvNet with 97.5% accuracy](svhn-digit-convnet.py)
 + Reinforcement learning (DQN, A3C) on [Atari games](Atari2600) and [demos on OpenAI Gym](OpenAIGym).
 + [char-rnn for fun](char-rnn)

--- a/examples/ResNet/cifar10-resnet.py
+++ b/examples/ResNet/cifar10-resnet.py
@@ -34,8 +34,7 @@ class Model(ModelDesc):
    def _get_input_vars(self):
        return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
-                InputVar(tf.int32, [None], 'label')
+                InputVar(tf.int32, [None], 'label') ]
-               ]
    def _build_graph(self, input_vars):
        image, label = input_vars
@@ -144,9 +143,6 @@ def get_config():
    step_per_epoch = dataset_train.size()
    dataset_test = get_data('test')
-    sess_config = get_default_sess_config(0.9)
-    get_global_step_var()
    lr = tf.Variable(0.01, trainable=False, name='learning_rate')
    tf.scalar_summary('learning_rate', lr)
@@ -154,14 +150,13 @@ def get_config():
        dataset=dataset_train,
        optimizer=tf.train.MomentumOptimizer(lr, 0.9),
        callbacks=Callbacks([
-            StatPrinter(),
+            StatPrinter(), ModelSaver(),
-            ModelSaver(),
            InferenceRunner(dataset_test,
                [ScalarStats('cost'), ClassificationError()]),
            ScheduledHyperParamSetter('learning_rate',
                                      [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
        ]),
-        session_config=sess_config,
+        session_config=get_default_sess_config(0.9),
        model=Model(n=18),
        step_per_epoch=step_per_epoch,
        max_epoch=400,

--- a/examples/load-alexnet.py
+++ b/examples/load-alexnet.py
@@ -19,10 +19,6 @@ Usage:
    ./load-alexnet.py --load alexnet.npy --input cat.png
 """
-BATCH_SIZE = 10
-MIN_AFTER_DEQUEUE = 500
-CAPACITY = MIN_AFTER_DEQUEUE + 3 * BATCH_SIZE
 class Model(ModelDesc):
    def _get_input_vars(self):
        return [InputVar(tf.float32, (None, 227, 227, 3), 'input'),
@@ -60,7 +56,7 @@ def run_test(path, input):
        input_var_names=['input'],
        session_init=ParamRestore(param_dict),
        session_config=get_default_sess_config(0.9),
-        output_var_names=['output']   # output:0 is the probability distribution
+        output_var_names=['output']   # the variable 'output' is the probability distribution
    )
    predict_func = get_predict_func(pred_config)

--- a/examples/mnist-convnet.py
+++ b/examples/mnist-convnet.py
@@ -8,58 +8,80 @@ import tensorflow as tf
 import os, sys
 import argparse
-from tensorpack import *
 """
 MNIST ConvNet example.
 about 0.6% validation error after 30 epochs.
 """
+# Just import everything into current namespace
+from tensorpack import *
 IMAGE_SIZE = 28
 class Model(ModelDesc):
    def _get_input_vars(self):
+        """Define all the input variables (with type, shape, name) that'll be
+        fed into the graph to produce a cost.  """
        return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
                InputVar(tf.int32, (None,), 'label') ]
    def _build_graph(self, input_vars):
-        is_training = get_current_tower_context().is_training
+        """This function should build the model which takes the input variables
-        keep_prob = tf.constant(0.5 if is_training else 1.0)
+        and define self.cost at the end"""
+        # input_vars contains a list of input variables defined above
        image, label = input_vars
-        image = tf.expand_dims(image, 3)    # add a single channel
+        # In tensorflow, inputs to convolution function are assumed to be
+        # NHWC. Add a single channel here.
+        image = tf.expand_dims(image, 3)
+        image = image * 2 - 1   # center the pixels values at zero
+        # The context manager `argscope` sets the default option for all the layers under
+        # this context. Here we use 32 channel convolution with shape 3x3 and
+        # PReLU as nonlinearity.
+        with argscope(Conv2D, kernel_shape=3, nl=PReLU.f, out_channel=32):
+            """
+            LinearWrap is just a convenient way to compose a linear symbolic graph.
+            You can also do the equivalent in tensorflow style:
+            l = Conv2D('conv0', image)
+            l = MaxPooling('pool0', image, 2)
+            ...  """
-        nl = PReLU.f
-        image = image * 2 - 1
-        with argscope(Conv2D, kernel_shape=3, nl=nl, out_channel=32):
            logits = (LinearWrap(image) # the starting brace is only for line-breaking
-                    .Conv2D('conv0', padding='VALID')
+                    .Conv2D('conv0')
                    .MaxPooling('pool0', 2)
                    .Conv2D('conv1', padding='SAME')
-                    .Conv2D('conv2', padding='VALID')
+                    .Conv2D('conv2')
                    .MaxPooling('pool1', 2)
-                    .Conv2D('conv3', padding='VALID')
+                    .Conv2D('conv3')
                    .FullyConnected('fc0', 512)
-                    .tf.nn.dropout(keep_prob)
+                    .Dropout('dropout', 0.5)
                    .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
-        prob = tf.nn.softmax(logits, name='prob')
+        prob = tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities
-        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
+        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) # a vector of length B with loss of each sample
-        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
+        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss
-        # compute the number of failed samples, for ClassificationError to use at test time
+        # compute the number of failed samples, for thee callback ClassificationError to use at test time
        wrong = symbolic_functions.prediction_incorrect(logits, label)
        nr_wrong = tf.reduce_sum(wrong, name='wrong')
-        # monitor training error
-        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
-        # weight decay on all W of fc layers
+        # This will monitor training error (in a moving_average fashion):
+        # 1. write the value to tensosrboard
+        # 2. write the value to stat.json
+        # 3. print the value after each epoch
+        train_error = tf.reduce_mean(wrong, name='train_error')
+        summary.add_moving_summary(train_error)
+        # Use a regex to find parameters to apply weight decay.
+        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.mul(1e-5,
                         regularize_cost('fc.*/W', tf.nn.l2_loss),
                         name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
-        summary.add_param_summary([('.*/W', ['histogram'])])   # monitor histogram of all W
+        # monitor histogram of all weight (of conv and fc layers) in tensorboard
+        summary.add_param_summary([('.*/W', ['histogram'])])
        self.cost = tf.add_n([wd_cost, cost], name='cost')
 def get_data():
@@ -68,9 +90,11 @@ def get_data():
    return train, test
 def get_config():
+    # automatically setup the directory train_log/mnist-convnet for logging
    logger.auto_set_dir()
    dataset_train, dataset_test = get_data()
+    # how many iterations you want in each epoch
    step_per_epoch = dataset_train.size()
    lr = tf.train.exponential_decay(
@@ -78,18 +102,23 @@ def get_config():
        global_step=get_global_step_var(),
        decay_steps=dataset_train.size() * 10,
        decay_rate=0.3, staircase=True, name='learning_rate')
+    # This will also put the summary in tensorboard,stat.json and print in
+    # terminal, but without the moving average
    tf.scalar_summary('learning_rate', lr)
+    # get the config which contains everything necessary in a training
    return TrainConfig(
-        dataset=dataset_train,
+        dataset=dataset_train,  # the DataFlow instance for training
        optimizer=tf.train.AdamOptimizer(lr),
        callbacks=Callbacks([
-            StatPrinter(),
+            StatPrinter(),  # print statistics in terminal after every epoch
-            ModelSaver(),
+            ModelSaver(),   # save the model after every epoch
-            InferenceRunner(dataset_test,
+            InferenceRunner(    # run inference(for validation) after every epoch
+                dataset_test,   # the DataFlow instance used for validation
+                # Calculate both the cost and the error for this DataFlow
                [ScalarStats('cost'), ClassificationError() ]),
        ]),
-        session_config=get_default_sess_config(0.5),
        model=Model(),
        step_per_epoch=step_per_epoch,
        max_epoch=100,
@@ -106,6 +135,5 @@ if __name__ == '__main__':
    config = get_config()
    if args.load:
        config.session_init = SaverRestore(args.load)
-    #QueueInputTrainer(config).train()
    SimpleTrainer(config).train()
--- a/examples/svhn-digit-convnet.py
+++ b/examples/svhn-digit-convnet.py
@@ -51,7 +51,6 @@ class Model(ModelDesc):
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
-        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001))
        add_moving_summary(cost, wd_cost)
@@ -98,8 +97,7 @@ def get_config():
        dataset=data_train,
        optimizer=tf.train.AdamOptimizer(lr),
        callbacks=Callbacks([
-            StatPrinter(),
+            StatPrinter(), ModelSaver(),
-            ModelSaver(),
            InferenceRunner(data_test,
                [ScalarStats('cost'), ClassificationError()])
        ]),