misc docs update

5aaf6410 · Yuxin Wu · 527c425b · 5aaf6410 · 5aaf6410 · 5aaf6410
Commit 5aaf6410 authored Nov 25, 2016 by Yuxin Wu
8 changed files
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@ You can actually train them and reproduce the performance... not just to see how
 + [InceptionV3 on ImageNet](examples/Inception/inceptionv3.py)
 + [Fully-convolutional Network for Holistically-Nested Edge Detection](examples/HED)
 + [Spatial Transformer Networks on MNIST addition](examples/SpatialTransformer)
-+ [Generative Adversarial Networks & Image to Image Translation](examples/GAN)
+ [Generative Adversarial Networks(GAN) variants](examples/GAN)
 + [DQN variants on Atari games](examples/Atari2600)
 + [Asynchronous Advantage Actor-Critic(A3C) with demos on OpenAI Gym](examples/OpenAIGym)
 + [char-rnn language model](examples/char-rnn)
@@ -18,17 +18,17 @@ You can actually train them and reproduce the performance... not just to see how

 Describe your training task with three components:

-1. Model, or graph. `models/` has some scoped abstraction of common models, but you can simply use
+1. __Model__, or graph. `models/` has some scoped abstraction of common models, but you can simply use
 	 any symbolic functions available in tensorflow, or most functions in slim/tflearn/tensorlayer.
-	`LinearWrap` and `argscope` makes large models look simpler.
+	`LinearWrap` and `argscope` makes large models look simpler ([vgg example](https://github.com/ppwwyyxx/tensorpack/blob/master/examples/load-vgg16.py)).

-2. Data. tensorpack allows and encourages complex data processing.
+2. __DataFlow__. tensorpack allows and encourages complex data processing.

 	+ All data producer has an unified `generator` interface, allowing them to be composed to perform complex preprocessing.
 	+ Use Python to easily handle any data format, yet still keep a good training speed thanks to multiprocess prefetch & TF Queue prefetch.
 	For example, InceptionV3 can run in the same speed as the official code which reads data using TF operators.

-3. Callbacks, including everything you want to do apart from the training iterations, such as:
+3. __Callbacks__, including everything you want to do apart from the training iterations, such as:
 	+ Change hyperparameters during training
 	+ Print some variables of interest
 	+ Run inference on a test dataset
@@ -39,6 +39,8 @@ With the above components defined, tensorpack trainer will run the training iter
 Multi-GPU training is off-the-shelf by simply switching the trainer.
 You can also define your own trainer for non-standard training (e.g. GAN).

+The components are designed to be independent. You can use only Model or DataFlow in your project.
+
 ## Dependencies:

 + Python 2 or 3

--- a/examples/DoReFa-Net/dorefa.py
+++ b/examples/DoReFa-Net/dorefa.py
@@ -4,13 +4,14 @@
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>

 import tensorflow as tf
+from tensorpack.utils.argtools import memoized

-# just a hack to avoid repeatedly registering the gradient
-GRAD_DEFINED = False
-
+@memoized
 def get_dorefa(bitW, bitA, bitG):
-    """ return the three quantization functions fw, fa, fg, for weights,
-    activations and gradients respectively"""
+    """
+    return the three quantization functions fw, fa, fg, for weights, activations and gradients respectively
+    It's unsafe to call this function multiple times with different parameters
+    """
    G = tf.get_default_graph()

    def quantize(x, k):
@@ -34,8 +35,6 @@ def get_dorefa(bitW, bitA, bitG):
            return x
        return quantize(x, bitA)

-    global GRAD_DEFINED
-    if not GRAD_DEFINED:
    @tf.RegisterGradient("FGGrad")
    def grad_fg(op, x):
        rank = x.get_shape().ndims
@@ -48,7 +47,6 @@ def get_dorefa(bitW, bitA, bitG):
        x = tf.clip_by_value(x, 0.0, 1.0)
        x = quantize(x, bitG) - 0.5
        return x * maxx * 2
-    GRAD_DEFINED = True

    def fg(x):
        if bitG == 32:

--- a/examples/GAN/Image2Image.py
+++ b/examples/GAN/Image2Image.py
@@ -84,9 +84,9 @@ class Model(ModelDesc):
                .BatchNorm('bn1').LeakyReLU()
                .Conv2D('conv2', NF*4)
                .BatchNorm('bn2').LeakyReLU()
-                .Conv2D('conv3', NF*8, stride=1)    # valid?
+                .Conv2D('conv3', NF*8, stride=1, padding='VALID')
                .BatchNorm('bn3').LeakyReLU()
-                .Conv2D('convlast', 1, stride=1)())
+                .Conv2D('convlast', 1, stride=1, padding='VALID')())
        return l

    def _build_graph(self, input_vars):

--- a/examples/GAN/InfoGAN-mnist.py
+++ b/examples/GAN/InfoGAN-mnist.py
@@ -119,7 +119,6 @@ def sample(model_path):

    eye = [k for k in np.eye(10)]
    inputs = np.asarray(eye * 10)
-    print inputs.shape
    while True:
        o = pred([inputs])
        o = (o[0] + 1) * 128.0
@@ -131,7 +130,7 @@ if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
    parser.add_argument('--load', help='load model')
-    parser.add_argument('--sample', action='store_true')
+    parser.add_argument('--sample', action='store_true', help='visualize the space of the 10 latent codes')
    args = parser.parse_args()
    if args.gpu:
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

--- a/examples/OpenAIGym/README.md
+++ b/examples/OpenAIGym/README.md
-### A3C code and models for my Gym submissions on Atari games
+### code and models for my Gym submissions on Atari games
+
+Use A3C in [Asynchronous Methods for Deep Reinforcement Learning](http://arxiv.org/abs/1602.01783).

 ### To train on an Atari game:

@@ -9,9 +11,9 @@
 1. Download models from [model zoo](https://drive.google.com/open?id=0B9IPQTvr2BBkS0VhX0xmS1c5aFk)
 2. `ENV=Breakout-v0; ./run-atari.py --load "$ENV".tfmodel --env "$ENV" --episode 100 --output output_dir`

-Models are available for the following gym atari environments (click links for videos):
+Models are available for the following gym atari environments (click links for videos on gym):

-+ [AirRaid](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw) (a bit flickering, don't know why)
+ [AirRaid](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw) (this one is flickering, don't know why)
 + [Alien](https://gym.openai.com/evaluations/eval_8NR1IvjTQkSIT6En4xSMA)
 + [Amidar](https://gym.openai.com/evaluations/eval_HwEazbHtTYGpCialv9uPhA)
 + [Assault](https://gym.openai.com/evaluations/eval_tCiHwy5QrSdFVucSbBV6Q)

--- a/examples/ResNet/imagenet-resnet.py
+++ b/examples/ResNet/imagenet-resnet.py
@@ -124,9 +124,11 @@ def get_data(train_or_test):

    if isTrain:
        class Resize(imgaug.ImageAugmentor):
+            """
+            crop 8%~100% of the original image
+            See `Going Deeper with Convolutions` by Google.
+            """
            def _augment(self, img, _):
-                # crop 8%~100% of the original image
-                # See `Going Deeper with Convolutions` by Google.
                h, w = img.shape[:2]
                area = h * w
                for _ in range(10):

--- a/examples/load-alexnet.py
+++ b/examples/load-alexnet.py
@@ -5,8 +5,7 @@

 import tensorflow as tf
 import numpy as np
-import os
-import argparse
+import os, cv2, argparse

 from tensorpack import *
 from tensorpack.tfutils.symbolic_functions import *
@@ -46,41 +45,34 @@ class Model(ModelDesc):
            l = FullyConnected('fc7', l, out_dim=4096)
        # fc will have activation summary by default. disable this for the output layer
        logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
-        prob = tf.nn.softmax(logits, name='output')
+        prob = tf.nn.softmax(logits, name='prob')

 def run_test(path, input):
-    param_dict = np.load(path).item()
-
-    pred_config = PredictConfig(
+    param_dict = np.load(path, encoding='latin1').item()
+    predict_func = OfflinePredictor(PredictConfig(
        model=Model(),
        session_init=ParamRestore(param_dict),
-        session_config=get_default_sess_config(0.9),
        input_names=['input'],
-        output_names=['output']   # the variable 'output' is the probability distribution
-    )
-    predict_func = get_predict_func(pred_config)
+        output_names=['prob']
+    ))

-    import cv2
    im = cv2.imread(input)
-    assert im is not None
-    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
-    im = cv2.resize(im, (227, 227))
-    im = np.reshape(im, (1, 227, 227, 3)).astype('float32')
-    im = im - 110
+    assert im is not None, input
+    im = cv2.resize(im, (227, 227))[:,:,::-1].reshape(
+            (1,227,227,3)).astype('float32') - 110
    outputs = predict_func([im])[0]
    prob = outputs[0]
    ret = prob.argsort()[-10:][::-1]
-    print ret
+    print("Top10 predictions:", ret)

    meta = ILSVRCMeta().get_synset_words_1000()
-    print [meta[k] for k in ret]
+    print("Top10 class names:", [meta[k] for k in ret])

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
-    parser.add_argument('--load',
-                        help='.npy model file generated by tensorpack.utils.loadcaffe',
-                        required=True)
+    parser.add_argument('--load', required=True,
+                        help='.npy model file generated by tensorpack.utils.loadcaffe')
    parser.add_argument('--input', help='an input image', required=True)
    args = parser.parse_args()
    if args.gpu:

--- a/examples/load-vgg16.py
+++ b/examples/load-vgg16.py
@@ -3,22 +3,15 @@
 # File: load-vgg16.py
 # Author: Yuxin Wu <ppwwyyxxc@gmail.com>

+from __future__ import print_function
 import cv2
 import tensorflow as tf
 import numpy as np
-import os
-import argparse
-import pickle as pkl
+import os, argparse

-from tensorpack.train import TrainConfig
-from tensorpack.predict import PredictConfig, get_predict_func
-from tensorpack.models import *
-from tensorpack.utils import *
-from tensorpack.tfutils import *
+from tensorpack import *
 from tensorpack.tfutils.symbolic_functions import *
 from tensorpack.tfutils.summary import *
-from tensorpack.callbacks import *
-from tensorpack.dataflow import *
 from tensorpack.dataflow.dataset import ILSVRCMeta

 """
@@ -32,11 +25,8 @@ class Model(ModelDesc):
        return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ]

    def _build_graph(self, inputs):
-
        image = inputs[0]
-
        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
-            # 224
            logits = (LinearWrap(image)
                .Conv2D('conv1_1', 64)
                .Conv2D('conv1_2', 64)
@@ -66,38 +56,33 @@ class Model(ModelDesc):
                .FullyConnected('fc7', 4096, nl=tf.nn.relu)
                .Dropout('drop1', 0.5)
                .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
-        prob = tf.nn.softmax(logits, name='output')
+        prob = tf.nn.softmax(logits, name='prob')

 def run_test(path, input):
-    param_dict = np.load(path).item()
-    pred_config = PredictConfig(
+    param_dict = np.load(path, encoding='latin1').item()
+    predict_func = OfflinePredictor(PredictConfig(
        model=Model(),
-        input_names=['input'],
        session_init=ParamRestore(param_dict),
-        session_config=get_default_sess_config(0.9),
-        output_names=['output']   # output:0 is the probability distribution
-    )
-    predict_func = get_predict_func(pred_config)
+        input_names=['input'],
+        output_names=['prob']   # prob:0 is the probability distribution
+    ))

-    import cv2
    im = cv2.imread(input)
-    assert im is not None
+    assert im is not None, input
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
-    im = cv2.resize(im, (224, 224))
-    im = np.reshape(im, (1, 224, 224, 3)).astype('float32')
+    im = cv2.resize(im, (224, 224)).reshape((1,224,224,3)).astype('float32')
    im = im - 110
    outputs = predict_func([im])[0]
    prob = outputs[0]
    ret = prob.argsort()[-10:][::-1]
-    print(ret)
+    print("Top10 predictions:", ret)

    meta = ILSVRCMeta().get_synset_words_1000()
-    print([meta[k] for k in ret])
+    print("Top10 class names:", [meta[k] for k in ret])

 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
-    parser.add_argument('--gpu',
-                        help='comma separated list of GPU(s) to use.')
+    parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
    parser.add_argument('--load', required=True,
                        help='.npy model file generated by tensorpack.utils.loadcaffe')
    parser.add_argument('--input', help='an input image', required=True)