Commit 5aaf6410 authored by Yuxin Wu's avatar Yuxin Wu

misc docs update

parent 527c425b
...@@ -9,7 +9,7 @@ You can actually train them and reproduce the performance... not just to see how ...@@ -9,7 +9,7 @@ You can actually train them and reproduce the performance... not just to see how
+ [InceptionV3 on ImageNet](examples/Inception/inceptionv3.py) + [InceptionV3 on ImageNet](examples/Inception/inceptionv3.py)
+ [Fully-convolutional Network for Holistically-Nested Edge Detection](examples/HED) + [Fully-convolutional Network for Holistically-Nested Edge Detection](examples/HED)
+ [Spatial Transformer Networks on MNIST addition](examples/SpatialTransformer) + [Spatial Transformer Networks on MNIST addition](examples/SpatialTransformer)
+ [Generative Adversarial Networks & Image to Image Translation](examples/GAN) + [Generative Adversarial Networks(GAN) variants](examples/GAN)
+ [DQN variants on Atari games](examples/Atari2600) + [DQN variants on Atari games](examples/Atari2600)
+ [Asynchronous Advantage Actor-Critic(A3C) with demos on OpenAI Gym](examples/OpenAIGym) + [Asynchronous Advantage Actor-Critic(A3C) with demos on OpenAI Gym](examples/OpenAIGym)
+ [char-rnn language model](examples/char-rnn) + [char-rnn language model](examples/char-rnn)
...@@ -18,17 +18,17 @@ You can actually train them and reproduce the performance... not just to see how ...@@ -18,17 +18,17 @@ You can actually train them and reproduce the performance... not just to see how
Describe your training task with three components: Describe your training task with three components:
1. Model, or graph. `models/` has some scoped abstraction of common models, but you can simply use 1. __Model__, or graph. `models/` has some scoped abstraction of common models, but you can simply use
any symbolic functions available in tensorflow, or most functions in slim/tflearn/tensorlayer. any symbolic functions available in tensorflow, or most functions in slim/tflearn/tensorlayer.
`LinearWrap` and `argscope` makes large models look simpler. `LinearWrap` and `argscope` makes large models look simpler ([vgg example](https://github.com/ppwwyyxx/tensorpack/blob/master/examples/load-vgg16.py)).
2. Data. tensorpack allows and encourages complex data processing. 2. __DataFlow__. tensorpack allows and encourages complex data processing.
+ All data producer has an unified `generator` interface, allowing them to be composed to perform complex preprocessing. + All data producer has an unified `generator` interface, allowing them to be composed to perform complex preprocessing.
+ Use Python to easily handle any data format, yet still keep a good training speed thanks to multiprocess prefetch & TF Queue prefetch. + Use Python to easily handle any data format, yet still keep a good training speed thanks to multiprocess prefetch & TF Queue prefetch.
For example, InceptionV3 can run in the same speed as the official code which reads data using TF operators. For example, InceptionV3 can run in the same speed as the official code which reads data using TF operators.
3. Callbacks, including everything you want to do apart from the training iterations, such as: 3. __Callbacks__, including everything you want to do apart from the training iterations, such as:
+ Change hyperparameters during training + Change hyperparameters during training
+ Print some variables of interest + Print some variables of interest
+ Run inference on a test dataset + Run inference on a test dataset
...@@ -39,6 +39,8 @@ With the above components defined, tensorpack trainer will run the training iter ...@@ -39,6 +39,8 @@ With the above components defined, tensorpack trainer will run the training iter
Multi-GPU training is off-the-shelf by simply switching the trainer. Multi-GPU training is off-the-shelf by simply switching the trainer.
You can also define your own trainer for non-standard training (e.g. GAN). You can also define your own trainer for non-standard training (e.g. GAN).
The components are designed to be independent. You can use only Model or DataFlow in your project.
## Dependencies: ## Dependencies:
+ Python 2 or 3 + Python 2 or 3
......
...@@ -4,13 +4,14 @@ ...@@ -4,13 +4,14 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import tensorflow as tf import tensorflow as tf
from tensorpack.utils.argtools import memoized
# just a hack to avoid repeatedly registering the gradient @memoized
GRAD_DEFINED = False
def get_dorefa(bitW, bitA, bitG): def get_dorefa(bitW, bitA, bitG):
""" return the three quantization functions fw, fa, fg, for weights, """
activations and gradients respectively""" return the three quantization functions fw, fa, fg, for weights, activations and gradients respectively
It's unsafe to call this function multiple times with different parameters
"""
G = tf.get_default_graph() G = tf.get_default_graph()
def quantize(x, k): def quantize(x, k):
...@@ -34,8 +35,6 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -34,8 +35,6 @@ def get_dorefa(bitW, bitA, bitG):
return x return x
return quantize(x, bitA) return quantize(x, bitA)
global GRAD_DEFINED
if not GRAD_DEFINED:
@tf.RegisterGradient("FGGrad") @tf.RegisterGradient("FGGrad")
def grad_fg(op, x): def grad_fg(op, x):
rank = x.get_shape().ndims rank = x.get_shape().ndims
...@@ -48,7 +47,6 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -48,7 +47,6 @@ def get_dorefa(bitW, bitA, bitG):
x = tf.clip_by_value(x, 0.0, 1.0) x = tf.clip_by_value(x, 0.0, 1.0)
x = quantize(x, bitG) - 0.5 x = quantize(x, bitG) - 0.5
return x * maxx * 2 return x * maxx * 2
GRAD_DEFINED = True
def fg(x): def fg(x):
if bitG == 32: if bitG == 32:
......
...@@ -84,9 +84,9 @@ class Model(ModelDesc): ...@@ -84,9 +84,9 @@ class Model(ModelDesc):
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', NF*4) .Conv2D('conv2', NF*4)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', NF*8, stride=1) # valid? .Conv2D('conv3', NF*8, stride=1, padding='VALID')
.BatchNorm('bn3').LeakyReLU() .BatchNorm('bn3').LeakyReLU()
.Conv2D('convlast', 1, stride=1)()) .Conv2D('convlast', 1, stride=1, padding='VALID')())
return l return l
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
......
...@@ -119,7 +119,6 @@ def sample(model_path): ...@@ -119,7 +119,6 @@ def sample(model_path):
eye = [k for k in np.eye(10)] eye = [k for k in np.eye(10)]
inputs = np.asarray(eye * 10) inputs = np.asarray(eye * 10)
print inputs.shape
while True: while True:
o = pred([inputs]) o = pred([inputs])
o = (o[0] + 1) * 128.0 o = (o[0] + 1) * 128.0
...@@ -131,7 +130,7 @@ if __name__ == '__main__': ...@@ -131,7 +130,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model') parser.add_argument('--load', help='load model')
parser.add_argument('--sample', action='store_true') parser.add_argument('--sample', action='store_true', help='visualize the space of the 10 latent codes')
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
......
### A3C code and models for my Gym submissions on Atari games ### code and models for my Gym submissions on Atari games
Use A3C in [Asynchronous Methods for Deep Reinforcement Learning](http://arxiv.org/abs/1602.01783).
### To train on an Atari game: ### To train on an Atari game:
...@@ -9,9 +11,9 @@ ...@@ -9,9 +11,9 @@
1. Download models from [model zoo](https://drive.google.com/open?id=0B9IPQTvr2BBkS0VhX0xmS1c5aFk) 1. Download models from [model zoo](https://drive.google.com/open?id=0B9IPQTvr2BBkS0VhX0xmS1c5aFk)
2. `ENV=Breakout-v0; ./run-atari.py --load "$ENV".tfmodel --env "$ENV" --episode 100 --output output_dir` 2. `ENV=Breakout-v0; ./run-atari.py --load "$ENV".tfmodel --env "$ENV" --episode 100 --output output_dir`
Models are available for the following gym atari environments (click links for videos): Models are available for the following gym atari environments (click links for videos on gym):
+ [AirRaid](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw) (a bit flickering, don't know why) + [AirRaid](https://gym.openai.com/evaluations/eval_zIeNk5MxSGOmvGEUxrZDUw) (this one is flickering, don't know why)
+ [Alien](https://gym.openai.com/evaluations/eval_8NR1IvjTQkSIT6En4xSMA) + [Alien](https://gym.openai.com/evaluations/eval_8NR1IvjTQkSIT6En4xSMA)
+ [Amidar](https://gym.openai.com/evaluations/eval_HwEazbHtTYGpCialv9uPhA) + [Amidar](https://gym.openai.com/evaluations/eval_HwEazbHtTYGpCialv9uPhA)
+ [Assault](https://gym.openai.com/evaluations/eval_tCiHwy5QrSdFVucSbBV6Q) + [Assault](https://gym.openai.com/evaluations/eval_tCiHwy5QrSdFVucSbBV6Q)
......
...@@ -124,9 +124,11 @@ def get_data(train_or_test): ...@@ -124,9 +124,11 @@ def get_data(train_or_test):
if isTrain: if isTrain:
class Resize(imgaug.ImageAugmentor): class Resize(imgaug.ImageAugmentor):
"""
crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google.
"""
def _augment(self, img, _): def _augment(self, img, _):
# crop 8%~100% of the original image
# See `Going Deeper with Convolutions` by Google.
h, w = img.shape[:2] h, w = img.shape[:2]
area = h * w area = h * w
for _ in range(10): for _ in range(10):
......
...@@ -5,8 +5,7 @@ ...@@ -5,8 +5,7 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os import os, cv2, argparse
import argparse
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -46,41 +45,34 @@ class Model(ModelDesc): ...@@ -46,41 +45,34 @@ class Model(ModelDesc):
l = FullyConnected('fc7', l, out_dim=4096) l = FullyConnected('fc7', l, out_dim=4096)
# fc will have activation summary by default. disable this for the output layer # fc will have activation summary by default. disable this for the output layer
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path).item() param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig(
pred_config = PredictConfig(
model=Model(), model=Model(),
session_init=ParamRestore(param_dict), session_init=ParamRestore(param_dict),
session_config=get_default_sess_config(0.9),
input_names=['input'], input_names=['input'],
output_names=['output'] # the variable 'output' is the probability distribution output_names=['prob']
) ))
predict_func = get_predict_func(pred_config)
import cv2
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None assert im is not None, input
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.resize(im, (227, 227))[:,:,::-1].reshape(
im = cv2.resize(im, (227, 227)) (1,227,227,3)).astype('float32') - 110
im = np.reshape(im, (1, 227, 227, 3)).astype('float32')
im = im - 110
outputs = predict_func([im])[0] outputs = predict_func([im])[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
print ret print("Top10 predictions:", ret)
meta = ILSVRCMeta().get_synset_words_1000() meta = ILSVRCMeta().get_synset_words_1000()
print [meta[k] for k in ret] print("Top10 class names:", [meta[k] for k in ret])
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', parser.add_argument('--load', required=True,
help='.npy model file generated by tensorpack.utils.loadcaffe', help='.npy model file generated by tensorpack.utils.loadcaffe')
required=True)
parser.add_argument('--input', help='an input image', required=True) parser.add_argument('--input', help='an input image', required=True)
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
......
...@@ -3,22 +3,15 @@ ...@@ -3,22 +3,15 @@
# File: load-vgg16.py # File: load-vgg16.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
from __future__ import print_function
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os import os, argparse
import argparse
import pickle as pkl
from tensorpack.train import TrainConfig from tensorpack import *
from tensorpack.predict import PredictConfig, get_predict_func
from tensorpack.models import *
from tensorpack.utils import *
from tensorpack.tfutils import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
from tensorpack.callbacks import *
from tensorpack.dataflow import *
from tensorpack.dataflow.dataset import ILSVRCMeta from tensorpack.dataflow.dataset import ILSVRCMeta
""" """
...@@ -32,11 +25,8 @@ class Model(ModelDesc): ...@@ -32,11 +25,8 @@ class Model(ModelDesc):
return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ] return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ]
def _build_graph(self, inputs): def _build_graph(self, inputs):
image = inputs[0] image = inputs[0]
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu): with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
# 224
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv1_1', 64) .Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64) .Conv2D('conv1_2', 64)
...@@ -66,38 +56,33 @@ class Model(ModelDesc): ...@@ -66,38 +56,33 @@ class Model(ModelDesc):
.FullyConnected('fc7', 4096, nl=tf.nn.relu) .FullyConnected('fc7', 4096, nl=tf.nn.relu)
.Dropout('drop1', 0.5) .Dropout('drop1', 0.5)
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)()) .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path).item() param_dict = np.load(path, encoding='latin1').item()
pred_config = PredictConfig( predict_func = OfflinePredictor(PredictConfig(
model=Model(), model=Model(),
input_names=['input'],
session_init=ParamRestore(param_dict), session_init=ParamRestore(param_dict),
session_config=get_default_sess_config(0.9), input_names=['input'],
output_names=['output'] # output:0 is the probability distribution output_names=['prob'] # prob:0 is the probability distribution
) ))
predict_func = get_predict_func(pred_config)
import cv2
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None assert im is not None, input
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224)) im = cv2.resize(im, (224, 224)).reshape((1,224,224,3)).astype('float32')
im = np.reshape(im, (1, 224, 224, 3)).astype('float32')
im = im - 110 im = im - 110
outputs = predict_func([im])[0] outputs = predict_func([im])[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
print(ret) print("Top10 predictions:", ret)
meta = ILSVRCMeta().get_synset_words_1000() meta = ILSVRCMeta().get_synset_words_1000()
print([meta[k] for k in ret]) print("Top10 class names:", [meta[k] for k in ret])
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', required=True, parser.add_argument('--load', required=True,
help='.npy model file generated by tensorpack.utils.loadcaffe') help='.npy model file generated by tensorpack.utils.loadcaffe')
parser.add_argument('--input', help='an input image', required=True) parser.add_argument('--input', help='an input image', required=True)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment