Commit 233b3b90 authored by Yuxin Wu's avatar Yuxin Wu

run autopep8 over examples

parent fb2a051c
......@@ -6,11 +6,15 @@
import numpy as np
import tensorflow as tf
import os, sys, re, time
import os
import sys
import re
import time
import random
import argparse
import subprocess
import multiprocessing, threading
import multiprocessing
import threading
from collections import deque
from tensorpack import *
......@@ -47,9 +51,10 @@ NUM_ACTIONS = None
ROM_FILE = None
METHOD = None
def get_player(viz=False, train=False):
pl = AtariPlayer(ROM_FILE, frame_skip=ACTION_REPEAT,
image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train)
image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train)
global NUM_ACTIONS
NUM_ACTIONS = pl.get_action_space().num_actions()
if not train:
......@@ -59,15 +64,18 @@ def get_player(viz=False, train=False):
return pl
common.get_player = get_player # so that eval functions in common can use the player
class Model(ModelDesc):
def _get_input_vars(self):
if NUM_ACTIONS is None:
p = get_player(); del p
p = get_player()
del p
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'reward'),
InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'),
InputVar(tf.bool, (None,), 'isOver') ]
InputVar(tf.bool, (None,), 'isOver')]
def _get_DQN_prediction(self, image):
""" image: [0,255]"""
......@@ -75,20 +83,20 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=PReLU.f, use_bias=True), \
argscope(LeakyReLU, alpha=0.01):
l = (LinearWrap(image)
.Conv2D('conv0', out_channel=32, kernel_shape=5)
.MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=32, kernel_shape=5)
.MaxPooling('pool1', 2)
.Conv2D('conv2', out_channel=64, kernel_shape=4)
.MaxPooling('pool2', 2)
.Conv2D('conv3', out_channel=64, kernel_shape=3)
# the original arch
#.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
#.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
#.Conv2D('conv2', out_channel=64, kernel_shape=3)
.FullyConnected('fc0', 512, nl=LeakyReLU)())
.Conv2D('conv0', out_channel=32, kernel_shape=5)
.MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=32, kernel_shape=5)
.MaxPooling('pool1', 2)
.Conv2D('conv2', out_channel=64, kernel_shape=4)
.MaxPooling('pool2', 2)
.Conv2D('conv3', out_channel=64, kernel_shape=3)
# the original arch
#.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
#.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
#.Conv2D('conv2', out_channel=64, kernel_shape=3)
.FullyConnected('fc0', 512, nl=LeakyReLU)())
if METHOD != 'Dueling':
Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity)
else:
......@@ -101,7 +109,7 @@ class Model(ModelDesc):
state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state)
action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N,
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N,
max_pred_reward = tf.reduce_mean(tf.reduce_max(
self.predict_value, 1), name='predict_reward')
add_moving_summary(max_pred_reward)
......@@ -125,7 +133,7 @@ class Model(ModelDesc):
self.cost = tf.truediv(symbf.huber_loss(target - pred_action_value),
tf.cast(BATCH_SIZE, tf.float32), name='cost')
summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms']) ]) # monitor all W
('fc.*/W', ['histogram', 'rms'])]) # monitor all W
def update_target_param(self):
vars = tf.trainable_variables()
......@@ -142,22 +150,23 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
SummaryGradient()]
def get_config():
logger.auto_set_dir()
M = Model()
dataset_train = ExpReplay(
predictor_io_names=(['state'], ['Qvalue']),
player=get_player(train=True),
batch_size=BATCH_SIZE,
memory_size=MEMORY_SIZE,
init_memory_size=INIT_MEMORY_SIZE,
exploration=INIT_EXPLORATION,
end_exploration=END_EXPLORATION,
exploration_epoch_anneal=EXPLORATION_EPOCH_ANNEAL,
update_frequency=4,
reward_clip=(-1, 1),
history_len=FRAME_HISTORY)
predictor_io_names=(['state'], ['Qvalue']),
player=get_player(train=True),
batch_size=BATCH_SIZE,
memory_size=MEMORY_SIZE,
init_memory_size=INIT_MEMORY_SIZE,
exploration=INIT_EXPLORATION,
end_exploration=END_EXPLORATION,
exploration_epoch_anneal=EXPLORATION_EPOCH_ANNEAL,
update_frequency=4,
reward_clip=(-1, 1),
history_len=FRAME_HISTORY)
lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
......@@ -167,7 +176,7 @@ def get_config():
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate',
[(150, 4e-4), (250, 1e-4), (350, 5e-5)]),
[(150, 4e-4), (250, 1e-4), (350, 5e-5)]),
RunOp(lambda: M.update_target_param()),
dataset_train,
PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['Qvalue']), 3),
......@@ -185,10 +194,10 @@ if __name__ == '__main__':
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model')
parser.add_argument('--task', help='task to perform',
choices=['play', 'eval', 'train'], default='train')
choices=['play', 'eval', 'train'], default='train')
parser.add_argument('--rom', help='atari rom', required=True)
parser.add_argument('--algo', help='algorithm',
choices=['DQN', 'Double', 'Dueling'], default='Double')
choices=['DQN', 'Double', 'Dueling'], default='Double')
args = parser.parse_args()
if args.gpu:
......@@ -200,10 +209,10 @@ if __name__ == '__main__':
if args.task != 'train':
cfg = PredictConfig(
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['Qvalue'])
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['Qvalue'])
if args.task == 'play':
play_model(cfg)
elif args.task == 'eval':
......@@ -213,4 +222,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -4,7 +4,8 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import numpy as np
import time, os
import time
import os
import cv2
from collections import deque
import threading
......@@ -22,15 +23,17 @@ __all__ = ['AtariPlayer']
ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms"
_ALE_LOCK = threading.Lock()
class AtariPlayer(RLEnvironment):
"""
A wrapper for atari emulator.
Will automatically restart when a real episode ends (isOver might be just
lost of lives but not game over).
"""
def __init__(self, rom_file, viz=0, height_range=(None,None),
frame_skip=4, image_shape=(84, 84), nullop_start=30,
live_lost_as_eoe=True):
def __init__(self, rom_file, viz=0, height_range=(None, None),
frame_skip=4, image_shape=(84, 84), nullop_start=30,
live_lost_as_eoe=True):
"""
:param rom_file: path to the rom
:param frame_skip: skip every k frames and repeat the action
......@@ -47,7 +50,7 @@ class AtariPlayer(RLEnvironment):
if not os.path.isfile(rom_file) and '/' not in rom_file:
rom_file = get_dataset_path('atari_rom', rom_file)
assert os.path.isfile(rom_file), \
"rom {} not found. Please download at {}".format(rom_file, ROM_URL)
"rom {} not found. Please download at {}".format(rom_file, ROM_URL)
try:
ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
......@@ -84,7 +87,6 @@ class AtariPlayer(RLEnvironment):
self.width, self.height = self.ale.getScreenDims()
self.actions = self.ale.getMinimalActionSet()
self.live_lost_as_eoe = live_lost_as_eoe
self.frame_skip = frame_skip
self.nullop_start = nullop_start
......@@ -112,7 +114,7 @@ class AtariPlayer(RLEnvironment):
if isinstance(self.viz, float):
cv2.imshow(self.windowname, ret)
time.sleep(self.viz)
ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
ret = ret[self.height_range[0]:self.height_range[1], :].astype('float32')
# 0.299,0.587.0.114. same as rgb2y in torch/image
ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
ret = cv2.resize(ret, self.image_shape)
......@@ -169,7 +171,7 @@ if __name__ == '__main__':
import time
def benchmark():
a = AtariPlayer(sys.argv[1], viz=False, height_range=(28,-8))
a = AtariPlayer(sys.argv[1], viz=False, height_range=(28, -8))
num = a.get_action_space().num_actions()
rng = get_rng(num)
start = time.time()
......@@ -184,7 +186,8 @@ if __name__ == '__main__':
print(time.time() - start)
if len(sys.argv) == 3 and sys.argv[2] == 'benchmark':
import threading, multiprocessing
import threading
import multiprocessing
for k in range(3):
#th = multiprocessing.Process(target=benchmark)
th = threading.Thread(target=benchmark)
......@@ -193,7 +196,7 @@ if __name__ == '__main__':
benchmark()
else:
a = AtariPlayer(sys.argv[1],
viz=0.03, height_range=(28,-8))
viz=0.03, height_range=(28, -8))
num = a.get_action_space().num_actions()
rng = get_rng(num)
import time
......@@ -204,6 +207,5 @@ if __name__ == '__main__':
print(act)
r, o = a.action(act)
a.current_state()
#time.sleep(0.1)
# time.sleep(0.1)
print(r, o)
......@@ -2,8 +2,10 @@
# -*- coding: utf-8 -*-
# File: common.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import random, time
import threading, multiprocessing
import random
import time
import threading
import multiprocessing
import numpy as np
from tqdm import tqdm
from six.moves import queue
......@@ -11,11 +13,12 @@ from six.moves import queue
from tensorpack import *
from tensorpack.predict import get_predict_func
from tensorpack.utils.concurrency import *
from tensorpack.utils.stats import *
from tensorpack.utils.stats import *
global get_player
get_player = None
def play_one_episode(player, func, verbose=False):
def f(s):
spc = player.get_action_space()
......@@ -27,6 +30,7 @@ def play_one_episode(player, func, verbose=False):
return act
return np.mean(player.play_one_episode(f))
def play_model(cfg):
player = get_player(viz=0.01)
predfunc = get_predict_func(cfg)
......@@ -34,8 +38,10 @@ def play_model(cfg):
score = play_one_episode(player, predfunc)
print("Total:", score)
def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread):
def __init__(self, func, queue):
super(Worker, self).__init__()
self._func = func
......@@ -51,7 +57,7 @@ def eval_with_funcs(predict_funcs, nr_eval):
while not self.stopped():
try:
score = play_one_episode(player, self.func)
#print "Score, ", score
# print "Score, ", score
except RuntimeError:
return
self.queue_put_stoppable(self.q, score)
......@@ -61,15 +67,17 @@ def eval_with_funcs(predict_funcs, nr_eval):
for k in threads:
k.start()
time.sleep(0.1) # avoid simulator bugs
time.sleep(0.1) # avoid simulator bugs
stat = StatCounter()
try:
for _ in tqdm(range(nr_eval), **get_tqdm_kwargs()):
r = q.get()
stat.feed(r)
logger.info("Waiting for all the workers to finish the last run...")
for k in threads: k.stop()
for k in threads: k.join()
for k in threads:
k.stop()
for k in threads:
k.join()
while q.qsize():
r = q.get()
stat.feed(r)
......@@ -80,13 +88,16 @@ def eval_with_funcs(predict_funcs, nr_eval):
return (stat.average, stat.max)
return (0, 0)
def eval_model_multithread(cfg, nr_eval):
func = get_predict_func(cfg)
NR_PROC = min(multiprocessing.cpu_count() // 2, 8)
mean, max = eval_with_funcs([func] * NR_PROC, nr_eval)
logger.info("Average Score: {}; Max Score: {}".format(mean, max))
class Evaluator(Callback):
def __init__(self, nr_eval, input_names, output_names):
self.eval_episode = nr_eval
self.input_names = input_names
......
......@@ -13,26 +13,31 @@ from tensorpack.utils.argtools import memoized
import matplotlib.pyplot as plt
_CM = plt.get_cmap('jet')
def colorize(img, heatmap):
""" img: bgr, [0,255]
heatmap: [0,1]
"""
heatmap = _CM(heatmap)[:,:,[2,1,0]] * 255.0
heatmap = _CM(heatmap)[:, :, [2, 1, 0]] * 255.0
return img * 0.5 + heatmap * 0.5
@memoized
def get_gaussian_map():
sigma = 21
gaussian_map = np.zeros((368, 368), dtype='float32')
for x_p in range(368):
for y_p in range(368):
dist_sq = (x_p - 368/2) * (x_p - 368/2) + \
(y_p - 368/2) * (y_p - 368/2)
dist_sq = (x_p - 368 / 2) * (x_p - 368 / 2) + \
(y_p - 368 / 2) * (y_p - 368 / 2)
exponent = dist_sq / 2.0 / (21**2)
gaussian_map[y_p, x_p] = np.exp(-exponent)
return gaussian_map.reshape((1,368,368,1))
return gaussian_map.reshape((1, 368, 368, 1))
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 368, 368, 3), 'input'),
InputVar(tf.float32, (None, 368, 368, 15), 'label'),
......@@ -43,32 +48,32 @@ class Model(ModelDesc):
image = image / 256.0 - 0.5
gmap = tf.constant(get_gaussian_map())
gmap = tf.pad(gmap, [[0,0],[0,1],[0,1],[0,0]])
gmap = tf.pad(gmap, [[0, 0], [0, 1], [0, 1], [0, 0]])
pool_center = AvgPooling('mappool', gmap, 9, stride=8, padding='VALID')
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu,
W_init=tf.random_normal_initializer(stddev=0.01)):
W_init=tf.random_normal_initializer(stddev=0.01)):
shared = (LinearWrap(image)
.Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64)
.MaxPooling('pool1', 2)
# 184
.Conv2D('conv2_1', 128)
.Conv2D('conv2_2', 128)
.MaxPooling('pool2', 2)
# 92
.Conv2D('conv3_1', 256)
.Conv2D('conv3_2', 256)
.Conv2D('conv3_3', 256)
.Conv2D('conv3_4', 256)
.MaxPooling('pool3', 2)
# 46
.Conv2D('conv4_1', 512)
.Conv2D('conv4_2', 512)
.Conv2D('conv4_3_CPM', 256)
.Conv2D('conv4_4_CPM', 256)
.Conv2D('conv4_5_CPM', 256)
.Conv2D('conv4_6_CPM', 256)
.Conv2D('conv4_7_CPM', 128)())
.Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64)
.MaxPooling('pool1', 2)
# 184
.Conv2D('conv2_1', 128)
.Conv2D('conv2_2', 128)
.MaxPooling('pool2', 2)
# 92
.Conv2D('conv3_1', 256)
.Conv2D('conv3_2', 256)
.Conv2D('conv3_3', 256)
.Conv2D('conv3_4', 256)
.MaxPooling('pool3', 2)
# 46
.Conv2D('conv4_1', 512)
.Conv2D('conv4_2', 512)
.Conv2D('conv4_3_CPM', 256)
.Conv2D('conv4_4_CPM', 256)
.Conv2D('conv4_5_CPM', 256)
.Conv2D('conv4_6_CPM', 256)
.Conv2D('conv4_7_CPM', 128)())
def add_stage(stage, l):
l = tf.concat(3, [l, shared, pool_center], name='concat_stage{}'.format(stage))
......@@ -76,20 +81,21 @@ class Model(ModelDesc):
l = Conv2D('Mconv{}_stage{}'.format(i, stage), l, 128)
l = Conv2D('Mconv6_stage{}'.format(stage), l, 128, kernel_shape=1)
l = Conv2D('Mconv7_stage{}'.format(stage),
l, 15, kernel_shape=1, nl=tf.identity)
l, 15, kernel_shape=1, nl=tf.identity)
return l
with argscope(Conv2D, kernel_shape=7, nl=tf.nn.relu):
out1 = (LinearWrap(shared)
.Conv2D('conv5_1_CPM', 512, kernel_shape=1)
.Conv2D('conv5_2_CPM', 15, kernel_shape=1, nl=tf.identity)())
.Conv2D('conv5_1_CPM', 512, kernel_shape=1)
.Conv2D('conv5_2_CPM', 15, kernel_shape=1, nl=tf.identity)())
out2 = add_stage(2, out1)
out3 = add_stage(3, out2)
out4 = add_stage(4, out3)
out5 = add_stage(5, out4)
out6 = add_stage(6, out4)
resized_map = tf.image.resize_bilinear(out6,
[368,368], name='resized_map')
[368, 368], name='resized_map')
def run_test(model_path, img_file):
param_dict = np.load(model_path, encoding='latin1').item()
......@@ -101,9 +107,9 @@ def run_test(model_path, img_file):
))
im = cv2.imread(img_file, cv2.IMREAD_COLOR).astype('float32')
im = cv2.resize(im, (368,368))
im = cv2.resize(im, (368, 368))
out = predict_func([[im]])[0][0]
hm = out[:,:,:14].sum(axis=2)
hm = out[:, :, :14].sum(axis=2)
viz = colorize(im, hm)
cv2.imwrite("output.jpg", viz)
......
......@@ -5,7 +5,9 @@
from tensorpack import ProxyDataFlow, get_rng
class DisturbLabel(ProxyDataFlow):
def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds)
self.prob = prob
......@@ -19,4 +21,3 @@ class DisturbLabel(ProxyDataFlow):
if self.rng.rand() < self.prob:
l = self.rng.choice(10)
yield [img, l]
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import argparse
from tensorpack import *
......@@ -13,9 +14,10 @@ from disturb import DisturbLabel
import imp
mnist_example = imp.load_source('mnist_example',
os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py'))
os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py'))
get_config = mnist_example.get_config
def get_data():
dataset_train = BatchData(DisturbLabel(dataset.Mnist('train'), args.prob), 128)
dataset_test = BatchData(dataset.Mnist('test'), 256, remainder=True)
......@@ -24,19 +26,21 @@ mnist_example.get_data = get_data
IMAGE_SIZE = 28
class Model(mnist_example.Model):
def _build_graph(self, input_vars):
image, label = input_vars
image = tf.expand_dims(image, 3)
with argscope(Conv2D, kernel_shape=5, nl=tf.nn.relu):
logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0', out_channel=32, padding='VALID')
.MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=64, padding='VALID')
.MaxPooling('pool1', 2)
.FullyConnected('fc0', 512, nl=tf.nn.relu)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)())
logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0', out_channel=32, padding='VALID')
.MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=64, padding='VALID')
.MaxPooling('pool1', 2)
.FullyConnected('fc0', 512, nl=tf.nn.relu)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob')
wrong = symbolic_functions.prediction_incorrect(logits, label)
......@@ -63,4 +67,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -16,20 +16,20 @@ import imp
svhn_example = imp.load_source('svhn_example',
os.path.join(os.path.dirname(__file__), '..', 'svhn-digit-convnet.py')))
Model = svhn_example.Model
get_config = svhn_example.get_config
Model=svhn_example.Model
get_config=svhn_example.get_config
def get_data():
d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra')
data_train = RandomMixData([d1, d2])
data_train = DisturbLabel(data_train, args.prob)
data_test = dataset.SVHNDigit('test')
d1=dataset.SVHNDigit('train')
d2=dataset.SVHNDigit('extra')
data_train=RandomMixData([d1, d2])
data_train=DisturbLabel(data_train, args.prob)
data_test=dataset.SVHNDigit('test')
augmentors = [
augmentors=[
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)),
imgaug.Contrast((0.5, 1.5)),
]
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
......
......@@ -9,7 +9,8 @@ import argparse
import numpy as np
import multiprocessing
import msgpack
import os, sys
import os
import sys
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
......@@ -69,10 +70,12 @@ BITG = 6
TOTAL_BATCH_SIZE = 128
BATCH_SIZE = 64
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -81,6 +84,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer
......@@ -102,41 +106,41 @@ class Model(ModelDesc):
with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity):
logits = (LinearWrap(image)
.Conv2D('conv0', 96, 12, stride=4, padding='VALID')
.apply(activate)
.Conv2D('conv1', 256, 5, padding='SAME', split=2)
.apply(fg)
.BatchNorm('bn1')
.MaxPooling('pool1', 3, 2, padding='SAME')
.apply(activate)
.Conv2D('conv2', 384, 3)
.apply(fg)
.BatchNorm('bn2')
.MaxPooling('pool2', 3, 2, padding='SAME')
.apply(activate)
.Conv2D('conv3', 384, 3, split=2)
.apply(fg)
.BatchNorm('bn3')
.apply(activate)
.Conv2D('conv4', 256, 3, split=2)
.apply(fg)
.BatchNorm('bn4')
.MaxPooling('pool4', 3, 2, padding='VALID')
.apply(activate)
.FullyConnected('fc0', 4096)
.apply(fg)
.BatchNorm('bnfc0')
.apply(activate)
.FullyConnected('fc1', 4096)
.apply(fg)
.BatchNorm('bnfc1')
.apply(nonlin)
.FullyConnected('fct', 1000, use_bias=True)())
.Conv2D('conv0', 96, 12, stride=4, padding='VALID')
.apply(activate)
.Conv2D('conv1', 256, 5, padding='SAME', split=2)
.apply(fg)
.BatchNorm('bn1')
.MaxPooling('pool1', 3, 2, padding='SAME')
.apply(activate)
.Conv2D('conv2', 384, 3)
.apply(fg)
.BatchNorm('bn2')
.MaxPooling('pool2', 3, 2, padding='SAME')
.apply(activate)
.Conv2D('conv3', 384, 3, split=2)
.apply(fg)
.BatchNorm('bn3')
.apply(activate)
.Conv2D('conv4', 256, 3, split=2)
.apply(fg)
.BatchNorm('bn4')
.MaxPooling('pool4', 3, 2, padding='VALID')
.apply(activate)
.FullyConnected('fc0', 4096)
.apply(fg)
.BatchNorm('bnfc0')
.apply(activate)
.FullyConnected('fc1', 4096)
.apply(fg)
.BatchNorm('bnfc1')
.apply(nonlin)
.FullyConnected('fct', 1000, use_bias=True)())
tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output')
......@@ -156,28 +160,31 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(dataset_name):
isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain)
meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
if isTrain:
class Resize(imgaug.ImageAugmentor):
def __init__(self):
self._init(locals())
def _augment(self, img, _):
h, w = img.shape[:2]
size = 224
scale = self.rng.randint(size, 308) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\
max(size, min(h, scaleY * h))))
desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize),
interpolation=cv2.INTER_CUBIC)
interpolation=cv2.INTER_CUBIC)
return dst
augmentors = [
......@@ -186,11 +193,11 @@ def get_data(dataset_name):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True),
imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True),
imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((224, 224)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(224, 224), 0.2, 3), 0.1),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_224),
......@@ -199,7 +206,7 @@ def get_data(dataset_name):
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\
desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
......@@ -214,6 +221,7 @@ def get_data(dataset_name):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds
def get_config():
logger.auto_set_dir()
......@@ -228,19 +236,20 @@ def get_config():
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
#HumanHyperParamSetter('learning_rate'),
# HumanHyperParamSetter('learning_rate'),
ScheduledHyperParamSetter(
'learning_rate', [(56, 2e-5), (64, 4e-6)]),
InferenceRunner(data_test,
[ScalarStats('cost'),
ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')])
[ScalarStats('cost'),
ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')])
]),
model=Model(),
step_per_epoch=10000,
max_epoch=100,
)
def run_image(model, sess_init, inputs):
pred_config = PredictConfig(
model=model,
......@@ -252,13 +261,13 @@ def run_image(model, sess_init, inputs):
predict_func = get_predict_func(pred_config)
meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
words = meta.get_synset_words_1000()
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\
desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
......@@ -272,7 +281,7 @@ def run_image(model, sess_init, inputs):
img = cv2.imread(f).astype('float32')
assert img is not None
img = transformers.augment(img)[np.newaxis, :,:,:]
img = transformers.augment(img)[np.newaxis, :, :, :]
outputs = predict_func([img])[0]
prob = outputs[0]
ret = prob.argsort()[-10:][::-1]
......@@ -287,7 +296,7 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load a checkpoint, or a npy (given as the pretrained model)')
parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--dorefa',
help='number of bits for W,A,G, separated by comma', required=True)
help='number of bits for W,A,G, separated by comma', required=True)
parser.add_argument('--run', help='run on a list of images with the pretrained model', nargs='*')
args = parser.parse_args()
......
......@@ -6,6 +6,7 @@
import tensorflow as tf
from tensorpack.utils.argtools import memoized
@memoized
def get_dorefa(bitW, bitA, bitG):
"""
......@@ -15,7 +16,7 @@ def get_dorefa(bitW, bitA, bitG):
G = tf.get_default_graph()
def quantize(x, k):
n = float(2**k-1)
n = float(2**k - 1)
with G.gradient_override_map({"Floor": "Identity"}):
return tf.floor(x * n + 0.5) / n
......@@ -39,11 +40,11 @@ def get_dorefa(bitW, bitA, bitG):
def grad_fg(op, x):
rank = x.get_shape().ndims
assert rank is not None
maxx = tf.reduce_max(tf.abs(x), list(range(1,rank)), keep_dims=True)
maxx = tf.reduce_max(tf.abs(x), list(range(1, rank)), keep_dims=True)
x = x / maxx
n = float(2**bitG-1)
n = float(2**bitG - 1)
x = x * 0.5 + 0.5 + tf.random_uniform(
tf.shape(x), minval=-0.5/n, maxval=0.5/n)
tf.shape(x), minval=-0.5 / n, maxval=0.5 / n)
x = tf.clip_by_value(x, 0.0, 1.0)
x = quantize(x, bitG) - 0.5
return x * maxx * 2
......@@ -54,4 +55,3 @@ def get_dorefa(bitW, bitA, bitG):
with G.gradient_override_map({"Identity": "FGGrad"}):
return tf.identity(x)
return fw, fa, fg
......@@ -40,10 +40,12 @@ BITW = 1
BITA = 2
BITG = 4
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -52,6 +54,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer
......@@ -62,9 +65,9 @@ class Model(ModelDesc):
return fw(v)
tf.get_variable = new_get_variable
def cabs(x):
return tf.minimum(1.0, tf.abs(x), name='cabs')
def activate(x):
return fa(cabs(x))
......@@ -73,38 +76,38 @@ class Model(ModelDesc):
with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
argscope(Conv2D, use_bias=False, nl=tf.identity):
logits = (LinearWrap(image)
.Conv2D('conv0', 48, 5, padding='VALID', use_bias=True)
.MaxPooling('pool0', 2, padding='SAME')
.apply(activate)
# 18
.Conv2D('conv1', 64, 3, padding='SAME')
.apply(fg)
.BatchNorm('bn1').apply(activate)
.Conv2D('conv2', 64, 3, padding='SAME')
.apply(fg)
.BatchNorm('bn2')
.MaxPooling('pool1', 2, padding='SAME')
.apply(activate)
# 9
.Conv2D('conv3', 128, 3, padding='VALID')
.apply(fg)
.BatchNorm('bn3').apply(activate)
# 7
.Conv2D('conv4', 128, 3, padding='SAME')
.apply(fg)
.BatchNorm('bn4').apply(activate)
.Conv2D('conv5', 128, 3, padding='VALID')
.apply(fg)
.BatchNorm('bn5').apply(activate)
# 5
.tf.nn.dropout(0.5 if is_training else 1.0)
.Conv2D('conv6', 512, 5, padding='VALID')
.apply(fg).BatchNorm('bn6')
.apply(cabs)
.FullyConnected('fc1', 10, nl=tf.identity)())
.Conv2D('conv0', 48, 5, padding='VALID', use_bias=True)
.MaxPooling('pool0', 2, padding='SAME')
.apply(activate)
# 18
.Conv2D('conv1', 64, 3, padding='SAME')
.apply(fg)
.BatchNorm('bn1').apply(activate)
.Conv2D('conv2', 64, 3, padding='SAME')
.apply(fg)
.BatchNorm('bn2')
.MaxPooling('pool1', 2, padding='SAME')
.apply(activate)
# 9
.Conv2D('conv3', 128, 3, padding='VALID')
.apply(fg)
.BatchNorm('bn3').apply(activate)
# 7
.Conv2D('conv4', 128, 3, padding='SAME')
.apply(fg)
.BatchNorm('bn4').apply(activate)
.Conv2D('conv5', 128, 3, padding='VALID')
.apply(fg)
.BatchNorm('bn5').apply(activate)
# 5
.tf.nn.dropout(0.5 if is_training else 1.0)
.Conv2D('conv6', 512, 5, padding='VALID')
.apply(fg).BatchNorm('bn6')
.apply(cabs)
.FullyConnected('fc1', 10, nl=tf.identity)())
tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output')
......@@ -122,6 +125,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_config():
logger.auto_set_dir()
......@@ -134,17 +138,17 @@ def get_config():
augmentors = [
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)),
#imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus
#[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
#(40,40), 0.2, 3),
imgaug.Contrast((0.5, 1.5)),
# imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus
#[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
#(40,40), 0.2, 3),
]
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
data_train = PrefetchDataZMQ(data_train, 5)
step_per_epoch = data_train.size()
augmentors = [ imgaug.Resize((40, 40)) ]
augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True)
......@@ -162,7 +166,7 @@ def get_config():
StatPrinter(),
ModelSaver(),
InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()])
[ScalarStats('cost'), ClassificationError()])
]),
model=Model(),
step_per_epoch=step_per_epoch,
......@@ -174,8 +178,8 @@ if __name__ == '__main__':
parser.add_argument('--gpu', help='the GPU to use')
parser.add_argument('--load', help='load a checkpoint')
parser.add_argument('--dorefa',
help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'',
default='1,2,4')
help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'',
default='1,2,4')
args = parser.parse_args()
BITW, BITA, BITG = map(int, args.dorefa.split(','))
......
......@@ -5,8 +5,10 @@
import numpy as np
import tensorflow as tf
import glob, pickle
import os, sys
import glob
import pickle
import os
import sys
import argparse
import cv2
......@@ -32,15 +34,17 @@ CFG.SHAPE = 64
CFG.BATCH = 128
CFG.Z_DIM = 100
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input') ]
return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input')]
def generator(self, z):
""" return a image generated from z"""
nf = 64
l = FullyConnected('fc0', z, nf * 8 * 4 * 4, nl=tf.identity)
l = tf.reshape(l, [-1, 4, 4, nf*8])
l = tf.reshape(l, [-1, 4, 4, nf * 8])
l = BNReLU(l)
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
l = Deconv2D('deconv1', l, [8, 8, nf * 4])
......@@ -56,14 +60,14 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv1', nf*2)
.BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', nf*4)
.BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', nf*8)
.BatchNorm('bn3').LeakyReLU()
.FullyConnected('fct', 1, nl=tf.identity)())
.Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv1', nf * 2)
.BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', nf * 4)
.BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', nf * 8)
.BatchNorm('bn3').LeakyReLU()
.FullyConnected('fct', 1, nl=tf.identity)())
return l
def _build_graph(self, input_vars):
......@@ -74,7 +78,7 @@ class Model(ModelDesc):
z = tf.placeholder_with_default(z, [None, CFG.Z_DIM], name='z')
with argscope([Conv2D, Deconv2D, FullyConnected],
W_init=tf.truncated_normal_initializer(stddev=0.02)):
W_init=tf.truncated_normal_initializer(stddev=0.02)):
with tf.variable_scope('gen'):
image_gen = self.generator(z)
tf.summary.image('gen', image_gen, max_outputs=30)
......@@ -88,16 +92,18 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data():
datadir = CFG.data
imgs = glob.glob(datadir + '/*.jpg')
ds = ImageFromFile(imgs, channel=3, shuffle=True)
augs = [ imgaug.CenterCrop(140), imgaug.Resize(64) ]
augs = [imgaug.CenterCrop(140), imgaug.Resize(64)]
ds = AugmentImageComponent(ds, augs)
ds = BatchData(ds, CFG.BATCH)
ds = PrefetchDataZMQ(ds, 1)
return ds
def get_config():
logger.auto_set_dir()
dataset = get_data()
......@@ -114,17 +120,18 @@ def get_config():
max_epoch=200,
)
def sample(model_path):
pred = PredictConfig(
session_init=get_model_loader(model_path),
model=Model(),
input_names=['z'],
output_names=['gen/gen', 'z'])
session_init=get_model_loader(model_path),
model=Model(),
input_names=['z'],
output_names=['gen/gen', 'z'])
pred = SimpleDatasetPredictor(pred, RandomZData((100, 100)))
for o in pred.get_result():
o, zs = o[0] + 1, o[1]
o = o * 128.0
o = o[:,:,:,::-1]
o = o[:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=10, nr_col=10, viz=True))
if __name__ == '__main__':
......
......@@ -7,11 +7,13 @@ import tensorflow as tf
import numpy as np
import time
from tensorpack import (FeedfreeTrainer, TowerContext,
get_global_step_var, QueueInput)
get_global_step_var, QueueInput)
from tensorpack.tfutils.summary import summary_moving_average, add_moving_summary
from tensorpack.dataflow import DataFlow
class GANTrainer(FeedfreeTrainer):
def __init__(self, config):
self._input_method = QueueInput(config.dataset)
super(GANTrainer, self).__init__(config)
......@@ -22,10 +24,10 @@ class GANTrainer(FeedfreeTrainer):
actual_inputs = self._get_input_tensors()
self.model.build_graph(actual_inputs)
self.g_min = self.config.optimizer.minimize(self.model.g_loss,
var_list=self.model.g_vars, name='g_op')
var_list=self.model.g_vars, name='g_op')
with tf.control_dependencies([self.g_min]):
self.d_min = self.config.optimizer.minimize(self.model.d_loss,
var_list=self.model.d_vars, name='d_op')
var_list=self.model.d_vars, name='d_op')
self.gs_incr = tf.assign_add(get_global_step_var(), 1, name='global_step_incr')
self.summary_op = summary_moving_average()
self.train_op = tf.group(self.d_min, self.summary_op, self.gs_incr)
......@@ -33,14 +35,18 @@ class GANTrainer(FeedfreeTrainer):
def run_step(self):
self.sess.run(self.train_op)
class RandomZData(DataFlow):
def __init__(self, shape):
super(RandomZData, self).__init__()
self.shape = shape
def get_data(self):
while True:
yield [np.random.uniform(-1, 1, size=self.shape)]
def build_GAN_losses(vecpos, vecneg):
"""
:param vecpos, vecneg: output of the discriminator (logits) for real
......@@ -64,6 +70,6 @@ def build_GAN_losses(vecpos, vecneg):
vecneg, tf.ones_like(vecneg)), name='g_CE_loss')
d_loss = tf.add(d_loss_pos, d_loss_neg, name='d_CE_loss')
add_moving_summary(d_loss_pos, d_loss_neg,
g_loss, d_loss,
d_pos_acc, d_neg_acc)
g_loss, d_loss,
d_pos_acc, d_neg_acc)
return g_loss, d_loss
......@@ -5,8 +5,10 @@
import numpy as np
import tensorflow as tf
import glob, pickle
import os, sys
import glob
import pickle
import os
import sys
import argparse
import cv2
......@@ -36,12 +38,14 @@ BATCH = 1
IN_CH = 3
OUT_CH = 3
LAMBDA = 100
NF = 64 # number of filter
NF = 64 # number of filter
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input') ,
InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output') ]
return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input'),
InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output')]
def generator(self, imgs):
# imgs: input: 256x256xch
......@@ -50,49 +54,49 @@ class Model(ModelDesc):
argscope(Dropout, is_training=True):
# always use local stat for BN, and apply dropout even in testing
with argscope(Conv2D, kernel_shape=4, stride=2,
nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)):
nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)):
e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU)
e2 = Conv2D('conv2', e1, NF*2)
e3 = Conv2D('conv3', e2, NF*4)
e4 = Conv2D('conv4', e3, NF*8)
e5 = Conv2D('conv5', e4, NF*8)
e6 = Conv2D('conv6', e5, NF*8)
e7 = Conv2D('conv7', e6, NF*8)
e8 = Conv2D('conv8', e7, NF*8, nl=BNReLU) # 1x1
e2 = Conv2D('conv2', e1, NF * 2)
e3 = Conv2D('conv3', e2, NF * 4)
e4 = Conv2D('conv4', e3, NF * 8)
e5 = Conv2D('conv5', e4, NF * 8)
e6 = Conv2D('conv6', e5, NF * 8)
e7 = Conv2D('conv7', e6, NF * 8)
e8 = Conv2D('conv8', e7, NF * 8, nl=BNReLU) # 1x1
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
return (LinearWrap(e8)
.Deconv2D('deconv1', NF*8)
.Dropout()
.ConcatWith(3, e7)
.Deconv2D('deconv2', NF*8)
.Dropout()
.ConcatWith(3, e6)
.Deconv2D('deconv3', NF*8)
.Dropout()
.ConcatWith(3, e5)
.Deconv2D('deconv4', NF*8)
.ConcatWith(3, e4)
.Deconv2D('deconv5', NF*4)
.ConcatWith(3, e3)
.Deconv2D('deconv6', NF*2)
.ConcatWith(3, e2)
.Deconv2D('deconv7', NF*1)
.ConcatWith(3, e1)
.Deconv2D('deconv8', OUT_CH, nl=tf.tanh)())
.Deconv2D('deconv1', NF * 8)
.Dropout()
.ConcatWith(3, e7)
.Deconv2D('deconv2', NF * 8)
.Dropout()
.ConcatWith(3, e6)
.Deconv2D('deconv3', NF * 8)
.Dropout()
.ConcatWith(3, e5)
.Deconv2D('deconv4', NF * 8)
.ConcatWith(3, e4)
.Deconv2D('deconv5', NF * 4)
.ConcatWith(3, e3)
.Deconv2D('deconv6', NF * 2)
.ConcatWith(3, e2)
.Deconv2D('deconv7', NF * 1)
.ConcatWith(3, e1)
.Deconv2D('deconv8', OUT_CH, nl=tf.tanh)())
def discriminator(self, inputs, outputs):
""" return a (b, 1) logits"""
l = tf.concat(3, [inputs, outputs])
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(l)
.Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv1', NF*2)
.BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', NF*4)
.BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', NF*8, stride=1, padding='VALID')
.BatchNorm('bn3').LeakyReLU()
.Conv2D('convlast', 1, stride=1, padding='VALID')())
.Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv1', NF * 2)
.BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', NF * 4)
.BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', NF * 8, stride=1, padding='VALID')
.BatchNorm('bn3').LeakyReLU()
.Conv2D('convlast', 1, stride=1, padding='VALID')())
return l
def _build_graph(self, input_vars):
......@@ -100,7 +104,7 @@ class Model(ModelDesc):
input, output = input / 128.0 - 1, output / 128.0 - 1
with argscope([Conv2D, Deconv2D],
W_init=tf.truncated_normal_initializer(stddev=0.02)), \
W_init=tf.truncated_normal_initializer(stddev=0.02)), \
argscope(LeakyReLU, alpha=0.2):
with tf.variable_scope('gen'):
fake_output = self.generator(input)
......@@ -128,33 +132,36 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def split_input(img):
"""
img: an image with shape (s, 2s, 3)
:return: [input, output]
"""
s = img.shape[0]
input, output = img[:,:s,:], img[:,s:,:]
input, output = img[:, :s, :], img[:, s:, :]
if args.mode == 'BtoA':
input, output = output, input
if IN_CH == 1:
input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis]
input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
if OUT_CH == 1:
output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis]
output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
return [input, output]
def get_data():
datadir = args.data
# assume each image is 512x256 split to left and right
imgs = glob.glob(os.path.join(datadir, '*.jpg'))
ds = ImageFromFile(imgs, channel=3, shuffle=True)
ds = MapData(ds, lambda dp: split_input(dp[0]))
augs = [ imgaug.Resize(286), imgaug.RandomCrop(256) ]
augs = [imgaug.Resize(286), imgaug.RandomCrop(256)]
ds = AugmentImageComponents(ds, augs, (0, 1))
ds = BatchData(ds, BATCH)
ds = PrefetchDataZMQ(ds, 1)
return ds
def get_config():
logger.auto_set_dir()
dataset = get_data()
......@@ -171,12 +178,13 @@ def get_config():
max_epoch=300,
)
def sample(datadir, model_path):
pred = PredictConfig(
session_init=get_model_loader(model_path),
model=Model(),
input_names=['input', 'output'],
output_names=['viz'])
session_init=get_model_loader(model_path),
model=Model(),
input_names=['input', 'output'],
output_names=['viz'])
imgs = glob.glob(os.path.join(datadir, '*.jpg'))
ds = ImageFromFile(imgs, channel=3, shuffle=True)
......@@ -184,7 +192,7 @@ def sample(datadir, model_path):
pred = SimpleDatasetPredictor(pred, ds)
for o in pred.get_result():
o = o[0][:,:,:,::-1]
o = o[0][:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=3, nr_col=2, viz=True))
if __name__ == '__main__':
......
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import cv2
import argparse
......@@ -16,9 +17,11 @@ from GAN import GANTrainer, build_GAN_losses
BATCH = 128
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 28, 28), 'input') ]
return [InputVar(tf.float32, (None, 28, 28), 'input')]
def generator(self, z):
l = FullyConnected('fc0', z, 1024, nl=BNReLU)
......@@ -34,18 +37,18 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs)
.Conv2D('conv0', 64)
.LeakyReLU()
.Conv2D('conv1', 128)
.BatchNorm('bn1').LeakyReLU()
.FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU()())
.Conv2D('conv0', 64)
.LeakyReLU()
.Conv2D('conv1', 128)
.BatchNorm('bn1').LeakyReLU()
.FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU()())
logits = FullyConnected('fct', l, 1, nl=tf.identity)
encoder = (LinearWrap(l)
.FullyConnected('fce1', 128, nl=tf.identity)
.BatchNorm('bne').LeakyReLU()
.FullyConnected('fce-out', 10, nl=tf.identity)())
.FullyConnected('fce1', 128, nl=tf.identity)
.BatchNorm('bne').LeakyReLU()
.FullyConnected('fce-out', 10, nl=tf.identity)())
return logits, encoder
def _build_graph(self, input_vars):
......@@ -54,7 +57,7 @@ class Model(ModelDesc):
prior_prob = tf.constant([0.1] * 10, name='prior_prob')
# assume first 10 is categorical
ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:,0]
ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:, 0]
zc = tf.one_hot(ids, 10, name='zc_train')
zc = tf.placeholder_with_default(zc, [None, 10], name='zc')
......@@ -63,7 +66,7 @@ class Model(ModelDesc):
z = tf.concat(1, [zc, z], name='fullz')
with argscope([Conv2D, Deconv2D, FullyConnected],
W_init=tf.truncated_normal_initializer(stddev=0.02)):
W_init=tf.truncated_normal_initializer(stddev=0.02)):
with tf.variable_scope('gen'):
image_gen = self.generator(z)
tf.summary.image('gen', image_gen, max_outputs=30)
......@@ -71,10 +74,10 @@ class Model(ModelDesc):
vecpos, _ = self.discriminator(image_pos)
with tf.variable_scope('discrim', reuse=True):
vecneg, dist_param = self.discriminator(image_gen)
logprob = tf.nn.log_softmax(dist_param) # log prob of each category
logprob = tf.nn.log_softmax(dist_param) # log prob of each category
# Q(c|x) = Q(zc | image_gen)
log_qc_given_x = tf.reduce_sum(logprob * zc, 1, name='logQc_x') # bx1
log_qc_given_x = tf.reduce_sum(logprob * zc, 1, name='logQc_x') # bx1
log_qc = tf.reduce_sum(prior_prob * zc, 1, name='logQc')
Elog_qc_given_x = tf.reduce_mean(log_qc_given_x, name='ElogQc_x')
Hc = tf.reduce_mean(-log_qc, name='Hc')
......@@ -89,11 +92,13 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH)
return ds
def get_config():
logger.auto_set_dir()
dataset = get_data()
......@@ -110,12 +115,13 @@ def get_config():
max_epoch=100,
)
def sample(model_path):
pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(model_path),
model=Model(),
input_names=['zc'],
output_names=['gen/gen']))
session_init=get_model_loader(model_path),
model=Model(),
input_names=['zc'],
output_names=['gen/gen']))
eye = []
for k in np.eye(10):
......@@ -143,4 +149,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
GANTrainer(config).train()
......@@ -8,16 +8,19 @@ import tensorflow as tf
import argparse
import numpy as np
from six.moves import zip
import os, sys
import os
import sys
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, None, 3], 'image'),
InputVar(tf.int32, [None, None, None], 'edgemap') ]
InputVar(tf.int32, [None, None, None], 'edgemap')]
def _build_graph(self, input_vars):
image, edgemap = input_vars
......@@ -27,9 +30,9 @@ class Model(ModelDesc):
def branch(name, l, up):
with tf.variable_scope(name) as scope:
l = Conv2D('convfc', l, 1, kernel_shape=1, nl=tf.identity,
use_bias=True,
W_init=tf.constant_initializer(),
b_init=tf.constant_initializer())
use_bias=True,
W_init=tf.constant_initializer(),
b_init=tf.constant_initializer())
while up != 1:
l = BilinearUpSample('upsample{}'.format(up), l, 2)
up = up / 2
......@@ -64,15 +67,15 @@ class Model(ModelDesc):
b5 = branch('branch5', l, 16)
final_map = Conv2D('convfcweight',
tf.concat(3, [b1, b2, b3, b4, b5]), 1, 1,
W_init=tf.constant_initializer(0.2),
use_bias=False, nl=tf.identity)
tf.concat(3, [b1, b2, b3, b4, b5]), 1, 1,
W_init=tf.constant_initializer(0.2),
use_bias=False, nl=tf.identity)
costs = []
for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
output = tf.nn.sigmoid(b, name='output{}'.format(idx+1))
output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
xentropy = class_balanced_sigmoid_cross_entropy(
b, edgemap,
name='xentropy{}'.format(idx+1))
name='xentropy{}'.format(idx + 1))
costs.append(xentropy)
# some magic threshold
......@@ -91,13 +94,15 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost')
def get_gradient_processor(self):
return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ]
return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)])]
def get_data(name):
isTrain = name == 'train'
ds = dataset.BSDS500(name, shuffle=True)
class CropMultiple16(imgaug.ImageAugmentor):
def _get_augment_params(self, img):
newh = img.shape[0] // 16 * 16
neww = img.shape[1] // 16 * 16
......@@ -110,12 +115,12 @@ def get_data(name):
def _augment(self, img, param):
h0, w0, newh, neww = param
return img[h0:h0+newh,w0:w0+neww]
return img[h0:h0 + newh, w0:w0 + neww]
if isTrain:
shape_aug = [
imgaug.RandomResize(xrange=(0.7,1.5), yrange=(0.7,1.5),
aspect_ratio_thres=0.15),
imgaug.RandomResize(xrange=(0.7, 1.5), yrange=(0.7, 1.5),
aspect_ratio_thres=0.15),
imgaug.RotationAndCropValid(90),
CropMultiple16(),
imgaug.Flip(horiz=True),
......@@ -128,15 +133,15 @@ def get_data(name):
ds = AugmentImageComponents(ds, shape_aug, (0, 1))
def f(m):
m[m>=0.50] = 1
m[m<0.50] = 0
m[m >= 0.50] = 1
m[m < 0.50] = 0
return m
ds = MapDataComponent(ds, f, 1)
if isTrain:
augmentors = [
imgaug.Brightness(63, clip=False),
imgaug.Contrast((0.4,1.5)),
imgaug.Contrast((0.4, 1.5)),
]
ds = AugmentImageComponent(ds, augmentors)
ds = BatchDataByShape(ds, 8, idx=0)
......@@ -145,6 +150,7 @@ def get_data(name):
ds = BatchData(ds, 1)
return ds
def view_data():
ds = RepeatedData(get_data('train'), -1)
ds.reset_state()
......@@ -156,6 +162,7 @@ def view_data():
cv2.imshow("edge", edgemap)
cv2.waitKey(1000)
def get_config():
logger.auto_set_dir()
dataset_train = get_data('train')
......@@ -178,12 +185,13 @@ def get_config():
max_epoch=100,
)
def run(model_path, image_path, output):
pred_config = PredictConfig(
model=Model(),
session_init=get_model_loader(model_path),
input_names=['image'],
output_names=['output' + str(k) for k in range(1, 7)])
model=Model(),
session_init=get_model_loader(model_path),
input_names=['image'],
output_names=['output' + str(k) for k in range(1, 7)])
predict_func = get_predict_func(pred_config)
im = cv2.imread(image_path)
assert im is not None
......@@ -193,7 +201,7 @@ def run(model_path, image_path, output):
for k in range(6):
pred = outputs[k][0]
cv2.imwrite("out{}.png".format(
'-fused' if k == 5 else str(k+1)), pred * 255)
'-fused' if k == 5 else str(k + 1)), pred * 255)
else:
pred = outputs[5][0]
cv2.imwrite(output, pred * 255)
......
......@@ -27,10 +27,12 @@ This config reaches 71% single-crop validation accuracy after 150k steps with 6
Learning rate may need a different schedule for different number of GPUs (because batch size will be different).
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -117,6 +119,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False)
......@@ -128,7 +131,7 @@ def get_data(train_or_test):
augmentors = [
imgaug.Resize((256, 256)),
imgaug.Brightness(30, False),
imgaug.Contrast((0.8,1.2), True),
imgaug.Contrast((0.8, 1.2), True),
imgaug.MapImage(lambda x: x - pp_mean),
imgaug.RandomCrop((224, 224)),
imgaug.Flip(horiz=True),
......@@ -166,7 +169,7 @@ def get_config():
ScheduledHyperParamSetter('learning_rate',
[(8, 0.03), (14, 0.02), (17, 5e-3),
(19, 3e-3), (24, 1e-3), (26, 2e-4),
(30, 5e-5) ])
(30, 5e-5)])
]),
session_config=get_default_sess_config(0.99),
model=Model(),
......
......@@ -32,10 +32,12 @@ NR_GPU = 8
BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU
INPUT_SHAPE = 299
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -61,28 +63,28 @@ class Model(ModelDesc):
def proj_77(l, ch_r, ch):
return (LinearWrap(l)
.Conv2D('conv77r', ch_r, 1)
.Conv2D('conv77a', ch_r, [1,7])
.Conv2D('conv77b', ch, [7,1])())
.Conv2D('conv77r', ch_r, 1)
.Conv2D('conv77a', ch_r, [1, 7])
.Conv2D('conv77b', ch, [7, 1])())
def proj_277(l, ch_r, ch):
return (LinearWrap(l)
.Conv2D('conv277r', ch_r, 1)
.Conv2D('conv277aa', ch_r, [7,1])
.Conv2D('conv277ab', ch_r, [1,7])
.Conv2D('conv277ba', ch_r, [7,1])
.Conv2D('conv277bb', ch, [1,7])())
.Conv2D('conv277r', ch_r, 1)
.Conv2D('conv277aa', ch_r, [7, 1])
.Conv2D('conv277ab', ch_r, [1, 7])
.Conv2D('conv277ba', ch_r, [7, 1])
.Conv2D('conv277bb', ch, [1, 7])())
with argscope(Conv2D, nl=BNReLU, use_bias=False),\
argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
l = (LinearWrap(image)
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299
.Conv2D('conv1', 32, 3, padding='VALID') #149
.Conv2D('conv2', 64, 3, padding='SAME') # 147
.MaxPooling('pool2', 3, 2)
.Conv2D('conv3', 80, 1, padding='SAME') # 73
.Conv2D('conv4', 192, 3, padding='VALID') # 71
.MaxPooling('pool4', 3, 2)()) # 35
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') # 299
.Conv2D('conv1', 32, 3, padding='VALID') # 149
.Conv2D('conv2', 64, 3, padding='SAME') # 147
.MaxPooling('pool2', 3, 2)
.Conv2D('conv3', 80, 1, padding='SAME') # 73
.Conv2D('conv4', 192, 3, padding='VALID') # 71
.MaxPooling('pool4', 3, 2)()) # 35
with tf.variable_scope('incep-35-256a'):
l = tf.concat(3, [
......@@ -140,7 +142,7 @@ class Model(ModelDesc):
br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID')
br1 = Conv2D('conv11', br1, 128, 1)
shape = br1.get_shape().as_list()
br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID') # TODO gauss, stddev=0.01
br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID') # TODO gauss, stddev=0.01
br1 = FullyConnected('fc', br1, 1000, nl=tf.identity)
with tf.variable_scope('incep-17-1280a'):
......@@ -194,27 +196,30 @@ class Model(ModelDesc):
self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.ILSVRC12(args.data, train_or_test,
shuffle=True if isTrain else False, dir_structure='train')
shuffle=True if isTrain else False, dir_structure='train')
meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_299 = cv2.resize(pp_mean, (299, 299))
if isTrain:
class Resize(imgaug.ImageAugmentor):
def __init__(self):
self._init(locals())
def _augment(self, img, _):
h, w = img.shape[:2]
size = 299
scale = self.rng.randint(size, 340) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\
max(size, min(h, scaleY * h))))
desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return dst
......@@ -224,11 +229,11 @@ def get_data(train_or_test):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True),
imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True),
imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((299, 299)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(299, 299), 0.2, 3), 0.1),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_299),
......@@ -237,7 +242,7 @@ def get_data(train_or_test):
def resize_func(im):
h, w = im.shape[:2]
scale = 340.0 / min(h, w)
desSize = map(int, (max(299, min(w, scale * w)),\
desSize = map(int, (max(299, min(w, scale * w)),
max(299, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
......
......@@ -5,7 +5,10 @@
import numpy as np
import tensorflow as tf
import os, sys, re, time
import os
import sys
import re
import time
import random
import argparse
import six
......@@ -23,6 +26,7 @@ ENV_NAME = None
from common import play_one_episode
def get_player(dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
......@@ -33,12 +37,14 @@ def get_player(dumpdir=None):
pl = HistoryFramePlayer(pl, FRAME_HISTORY)
return pl
class Model(ModelDesc):
def _get_input_vars(self):
assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ]
InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image):
image = image / 255.0
......@@ -61,6 +67,7 @@ class Model(ModelDesc):
policy = self._get_NN_prediction(state)
self.logits = tf.nn.softmax(policy, name='logits')
def run_submission(cfg, output, nr):
player = get_player(dumpdir=output)
predfunc = get_predict_func(cfg)
......@@ -71,6 +78,7 @@ def run_submission(cfg, output, nr):
score = play_one_episode(player, predfunc)
print("Score:", score)
def do_submit(output):
gym.upload(output, api_key='xxx')
......@@ -80,21 +88,22 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load model', required=True)
parser.add_argument('--env', help='environment name', required=True)
parser.add_argument('--episode', help='number of episodes to run',
type=int, default=100)
type=int, default=100)
parser.add_argument('--output', help='output directory', default='gym-submit')
args = parser.parse_args()
ENV_NAME = args.env
assert ENV_NAME
logger.info("Environment Name: {}".format(ENV_NAME))
p = get_player(); del p # set NUM_ACTIONS
p = get_player()
del p # set NUM_ACTIONS
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
cfg = PredictConfig(
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['logits'])
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['logits'])
run_submission(cfg, args.output, args.episode)
......@@ -5,11 +5,15 @@
import numpy as np
import tensorflow as tf
import os, sys, re, time
import os
import sys
import re
import time
import random
import uuid
import argparse
import multiprocessing, threading
import multiprocessing
import threading
from collections import deque
import six
from six.moves import queue
......@@ -17,7 +21,7 @@ from six.moves import queue
from tensorpack import *
from tensorpack.utils.concurrency import *
from tensorpack.utils.serialize import *
from tensorpack.utils.stats import *
from tensorpack.utils.stats import *
from tensorpack.tfutils import symbolic_functions as symbf
from tensorpack.RL import *
......@@ -42,8 +46,10 @@ EVALUATE_PROC = min(multiprocessing.cpu_count() // 2, 20)
NUM_ACTIONS = None
ENV_NAME = None
def get_player(viz=False, train=False, dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
def func(img):
return cv2.resize(img, IMAGE_SIZE[::-1])
pl = MapPlayerState(pl, func)
......@@ -58,16 +64,20 @@ def get_player(viz=False, train=False, dumpdir=None):
return pl
common.get_player = get_player
class MySimulatorWorker(SimulatorProcess):
def _build_player(self):
return get_player(train=True)
class Model(ModelDesc):
def _get_input_vars(self):
assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ]
InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image):
image = image / 255.0
......@@ -89,11 +99,11 @@ class Model(ModelDesc):
def _build_graph(self, inputs):
state, action, futurereward = inputs
policy, self.value = self._get_NN_prediction(state)
self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,)
self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,)
self.logits = tf.nn.softmax(policy, name='logits')
expf = tf.get_variable('explore_factor', shape=[],
initializer=tf.constant_initializer(1), trainable=False)
initializer=tf.constant_initializer(1), trainable=False)
logitsT = tf.nn.softmax(policy * expf, name='logitsT')
is_training = get_current_tower_context().is_training
if not is_training:
......@@ -101,38 +111,40 @@ class Model(ModelDesc):
log_probs = tf.log(self.logits + 1e-6)
log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.sub(tf.stop_gradient(self.value), futurereward, name='advantage')
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage, name='policy_loss')
xentropy_loss = tf.reduce_sum(
self.logits * log_probs, name='xentropy_loss')
self.logits * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(self.value, name='predict_reward')
advantage = symbf.rms(advantage, name='rms_advantage')
summary.add_moving_summary(policy_loss, xentropy_loss, value_loss, pred_reward, advantage)
entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False)
initializer=tf.constant_initializer(0.01), trainable=False)
self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
self.cost = tf.truediv(self.cost,
tf.cast(tf.shape(futurereward)[0], tf.float32),
name='cost')
tf.cast(tf.shape(futurereward)[0], tf.float32),
name='cost')
def get_gradient_processor(self):
return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()]
class MySimulatorMaster(SimulatorMaster, Callback):
def __init__(self, pipe_c2s, pipe_s2c, model):
super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
self.M = model
self.queue = queue.Queue(maxsize=BATCH_SIZE*8*2)
self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
def _setup_graph(self):
self.sess = self.trainer.sess
self.async_predictor = MultiThreadAsyncPredictor(
self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'],
PREDICTOR_THREAD), batch_size=15)
self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'],
PREDICTOR_THREAD), batch_size=15)
self.async_predictor.run()
def _on_state(self, state, ident):
......@@ -172,6 +184,7 @@ class MySimulatorMaster(SimulatorMaster, Callback):
else:
client.memory = []
def get_config():
logger.auto_set_dir()
M = Model()
......@@ -196,7 +209,7 @@ def get_config():
ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]),
ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
ScheduledHyperParamSetter('explore_factor',
[(80, 2), (100, 3), (120, 4), (140, 5)]),
[(80, 2), (100, 3), (120, 4), (140, 5)]),
master,
StartProcOrThread(master),
PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 2),
......@@ -213,12 +226,13 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load model')
parser.add_argument('--env', help='env', required=True)
parser.add_argument('--task', help='task to perform',
choices=['play', 'eval', 'train'], default='train')
choices=['play', 'eval', 'train'], default='train')
args = parser.parse_args()
ENV_NAME = args.env
assert ENV_NAME
p = get_player(); del p # set NUM_ACTIONS
p = get_player()
del p # set NUM_ACTIONS
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
......@@ -227,10 +241,10 @@ if __name__ == '__main__':
if args.task != 'train':
cfg = PredictConfig(
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['logits'])
model=Model(),
session_init=SaverRestore(args.load),
input_names=['state'],
output_names=['logits'])
if args.task == 'play':
play_model(cfg)
elif args.task == 'eval':
......@@ -239,11 +253,11 @@ if __name__ == '__main__':
if args.gpu:
nr_gpu = get_nr_gpu()
if nr_gpu > 1:
predict_tower = range(nr_gpu)[-nr_gpu//2:]
predict_tower = range(nr_gpu)[-nr_gpu // 2:]
else:
predict_tower = [0]
PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
train_tower = range(nr_gpu)[:-nr_gpu//2] or [0]
train_tower = range(nr_gpu)[:-nr_gpu // 2] or [0]
logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format(
','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
trainer = AsyncMultiGPUTrainer
......
......@@ -30,14 +30,16 @@ This model uses the whole training set instead of a train-val split.
BATCH_SIZE = 128
NUM_UNITS = None
class Model(ModelDesc):
def __init__(self, n):
super(Model, self).__init__()
self.n = n
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -60,13 +62,13 @@ class Model(ModelDesc):
c2 = Conv2D('conv2', c1, out_channel)
if increase_dim:
l = AvgPooling('pool', l, 2)
l = tf.pad(l, [[0,0], [0,0], [0,0], [in_channel//2, in_channel//2]])
l = tf.pad(l, [[0, 0], [0, 0], [0, 0], [in_channel // 2, in_channel // 2]])
l = c2 + l
return l
with argscope(Conv2D, nl=tf.identity, use_bias=False, kernel_shape=3,
W_init=variance_scaling_initializer(mode='FAN_OUT')):
W_init=variance_scaling_initializer(mode='FAN_OUT')):
l = Conv2D('conv0', image, 16, nl=BNReLU)
l = residual('res1.0', l, first=True)
for k in range(1, self.n):
......@@ -104,6 +106,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test)
......@@ -125,6 +128,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 3, 2)
return ds
def get_config():
logger.auto_set_dir()
......@@ -140,7 +144,7 @@ def get_config():
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError()]),
[ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
]),
......@@ -153,8 +157,8 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('-n', '--num_units',
help='number of units in each stage',
type=int, default=18)
help='number of units in each stage',
type=int, default=18)
parser.add_argument('--load', help='load model')
args = parser.parse_args()
NUM_UNITS = args.num_units
......
......@@ -26,10 +26,12 @@ TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224
DEPTH = None
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -73,32 +75,32 @@ class Model(ModelDesc):
with tf.variable_scope(layername):
with tf.variable_scope('block0'):
l = block_func(l, features, stride,
'no_preact' if first else 'both_preact')
'no_preact' if first else 'both_preact')
for i in range(1, count):
with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features, 1, 'default')
return l
cfg = {
18: ([2,2,2,2], basicblock),
34: ([3,4,6,3], basicblock),
50: ([3,4,6,3], bottleneck),
101: ([3,4,23,3], bottleneck)
18: ([2, 2, 2, 2], basicblock),
34: ([3, 4, 6, 3], basicblock),
50: ([3, 4, 6, 3], bottleneck),
101: ([3, 4, 23, 3], bottleneck)
}
defs, block_func = cfg[DEPTH]
with argscope(Conv2D, nl=tf.identity, use_bias=False,
W_init=variance_scaling_initializer(mode='FAN_OUT')):
W_init=variance_scaling_initializer(mode='FAN_OUT')):
logits = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', block_func, 64, defs[0], 1, first=True)
.apply(layer, 'group1', block_func, 128, defs[1], 2)
.apply(layer, 'group2', block_func, 256, defs[2], 2)
.apply(layer, 'group3', block_func, 512, defs[3], 2)
.BNReLU('bnlast')
.GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)())
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', block_func, 64, defs[0], 1, first=True)
.apply(layer, 'group1', block_func, 128, defs[1], 2)
.apply(layer, 'group2', block_func, 256, defs[2], 2)
.apply(layer, 'group3', block_func, 512, defs[3], 2)
.BNReLU('bnlast')
.GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)())
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
loss = tf.reduce_mean(loss, name='xentropy-loss')
......@@ -113,12 +115,13 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
datadir = args.data
ds = dataset.ILSVRC12(datadir, train_or_test,
shuffle=True if isTrain else False, dir_structure='original')
shuffle=True if isTrain else False, dir_structure='original')
image_mean = np.array([0.485, 0.456, 0.406], dtype='float32')
image_std = np.array([0.229, 0.224, 0.225], dtype='float32')
......@@ -128,12 +131,13 @@ def get_data(train_or_test):
crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google.
"""
def _augment(self, img, _):
h, w = img.shape[:2]
area = h * w
for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75,1.333)
aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5:
......@@ -141,10 +145,10 @@ def get_data(train_or_test):
if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1+hh,x1:x1+ww]
out = cv2.resize(out, (224,224), interpolation=cv2.INTER_CUBIC)
out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
out = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC)
out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
augmentors = [
......@@ -154,11 +158,11 @@ def get_data(train_or_test):
imgaug.Contrast((0.8, 1.2), clip=False),
imgaug.Saturation(0.4),
imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045],
eigvec=[[ -0.5675, 0.7192, 0.4009],
[ -0.5808, -0.0045, -0.8140],
[ -0.5836, -0.6948, 0.4203]]
)]),
eigval=[0.2175, 0.0188, 0.0045],
eigvec=[[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]]
)]),
imgaug.Clip(),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
......@@ -175,6 +179,7 @@ def get_data(train_or_test):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds
def get_config():
# prepare dataset
dataset_train = get_data('train')
......@@ -190,7 +195,7 @@ def get_config():
ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]),
ScheduledHyperParamSetter('learning_rate',
[(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]),
[(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]),
HumanHyperParamSetter('learning_rate'),
]),
model=Model(),
......@@ -198,6 +203,7 @@ def get_config():
max_epoch=110,
)
def eval_on_ILSVRC12(model_file, data_dir):
ds = get_data('val')
pred_config = PredictConfig(
......@@ -221,7 +227,7 @@ if __name__ == '__main__':
parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--load', help='load model')
parser.add_argument('-d', '--depth', help='resnet depth',
type=int, default=18, choices=[18, 34, 50, 101])
type=int, default=18, choices=[18, 34, 50, 101])
parser.add_argument('--eval', action='store_true')
args = parser.parse_args()
......
......@@ -7,7 +7,8 @@
import cv2
import tensorflow as tf
import argparse
import os, re
import os
import re
import numpy as np
import six
from six.moves import zip
......@@ -22,7 +23,9 @@ from tensorpack.dataflow.dataset import ILSVRCMeta
MODEL_DEPTH = None
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label')]
......@@ -57,48 +60,49 @@ class Model(ModelDesc):
with tf.variable_scope(layername):
with tf.variable_scope('block0'):
l = bottleneck(l, features, stride,
'no_preact' if first else 'both_preact')
'no_preact' if first else 'both_preact')
for i in range(1, count):
with tf.variable_scope('block{}'.format(i)):
l = bottleneck(l, features, 1, 'both_preact')
return l
cfg = {
50: ([3,4,6,3]),
101: ([3,4,23,3]),
152: ([3,8,36,3])
50: ([3, 4, 6, 3]),
101: ([3, 4, 23, 3]),
152: ([3, 8, 36, 3])
}
defs = cfg[MODEL_DEPTH]
with argscope(Conv2D, nl=tf.identity, use_bias=False,
W_init=variance_scaling_initializer(mode='FAN_OUT')):
W_init=variance_scaling_initializer(mode='FAN_OUT')):
# tensorflow with padding=SAME will by default pad [2,3] here.
# but caffe conv with stride will pad [3,3]
image = tf.pad(image, [[0,0],[3,3],[3,3],[0,0]])
image = tf.pad(image, [[0, 0], [3, 3], [3, 3], [0, 0]])
fc1000 = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', 64, defs[0], 1, first=True)
.apply(layer, 'group1', 128, defs[1], 2)
.apply(layer, 'group2', 256, defs[2], 2)
.apply(layer, 'group3', 512, defs[3], 2)
.tf.nn.relu()
.GlobalAvgPooling('gap')
.FullyConnected('fc1000', 1000, nl=tf.identity)())
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', 64, defs[0], 1, first=True)
.apply(layer, 'group1', 128, defs[1], 2)
.apply(layer, 'group2', 256, defs[2], 2)
.apply(layer, 'group3', 512, defs[3], 2)
.tf.nn.relu()
.GlobalAvgPooling('gap')
.FullyConnected('fc1000', 1000, nl=tf.identity)())
prob = tf.nn.softmax(fc1000, name='prob')
nr_wrong = prediction_incorrect(fc1000, label, name='wrong-top1')
nr_wrong = prediction_incorrect(fc1000, label, 5, name='wrong-top5')
def get_inference_augmentor():
# load ResNet mean from Kaiming:
#from tensorpack.utils.loadcaffe import get_caffe_pb
#obj = get_caffe_pb().BlobProto()
#obj.ParseFromString(open('ResNet_mean.binaryproto').read())
# obj.ParseFromString(open('ResNet_mean.binaryproto').read())
#pp_mean_224 = np.array(obj.data).reshape(3, 224, 224).transpose(1,2,0)
meta = ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
transformers = imgaug.AugmentorList([
imgaug.ResizeShortestEdge(256),
......@@ -107,6 +111,7 @@ def get_inference_augmentor():
])
return transformers
def run_test(params, input):
pred_config = PredictConfig(
model=Model(),
......@@ -119,7 +124,7 @@ def run_test(params, input):
prepro = get_inference_augmentor()
im = cv2.imread(input).astype('float32')
im = prepro.augment(im)
im = np.reshape( im, (1, 224, 224, 3))
im = np.reshape(im, (1, 224, 224, 3))
outputs = predict_func([im])
prob = outputs[0]
......@@ -128,6 +133,7 @@ def run_test(params, input):
meta = ILSVRCMeta().get_synset_words_1000()
print([meta[k] for k in ret])
def eval_on_ILSVRC12(params, data_dir):
ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False, dir_structure='train')
ds = AugmentImageComponent(ds, get_inference_augmentor())
......@@ -147,16 +153,17 @@ def eval_on_ILSVRC12(params, data_dir):
print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio))
def name_conversion(caffe_layer_name):
""" Convert a caffe parameter name to a tensorflow parameter name as
defined in the above model """
# beginning & end mapping
NAME_MAP = {'bn_conv1/beta': 'conv0/bn/beta',
'bn_conv1/gamma': 'conv0/bn/gamma',
'bn_conv1/mean/EMA': 'conv0/bn/mean/EMA',
'bn_conv1/variance/EMA': 'conv0/bn/variance/EMA',
'conv1/W': 'conv0/W', 'conv1/b': 'conv0/b',
'fc1000/W': 'fc1000/W', 'fc1000/b': 'fc1000/b'}
'bn_conv1/gamma': 'conv0/bn/gamma',
'bn_conv1/mean/EMA': 'conv0/bn/mean/EMA',
'bn_conv1/variance/EMA': 'conv0/bn/variance/EMA',
'conv1/W': 'conv0/W', 'conv1/b': 'conv0/b',
'fc1000/W': 'fc1000/W', 'fc1000/b': 'fc1000/b'}
if caffe_layer_name in NAME_MAP:
return NAME_MAP[caffe_layer_name]
......@@ -178,13 +185,13 @@ def name_conversion(caffe_layer_name):
layer_id = re.search('_branch[0-9]([a-z])/', caffe_layer_name).group(1)
layer_id = ord(layer_id) - ord('a') + 1
TYPE_DICT = {'res':'conv', 'bn':'bn'}
TYPE_DICT = {'res': 'conv', 'bn': 'bn'}
tf_name = caffe_layer_name[caffe_layer_name.index('/'):]
layer_type = TYPE_DICT[layer_type] + \
(str(layer_id) if layer_branch == 2 else 'shortcut')
tf_name = 'group{}/block{}/{}'.format(
int(layer_group) - 2, layer_block, layer_type) + tf_name
int(layer_group) - 2, layer_block, layer_type) + tf_name
return tf_name
if __name__ == '__main__':
......
......@@ -20,11 +20,12 @@ You might need to adjust the learning rate schedule when running with 1 GPU.
import imp
cifar_example = imp.load_source('cifar_example',
os.path.join(os.path.dirname(__file__), 'cifar10-resnet.py'))
os.path.join(os.path.dirname(__file__), 'cifar10-resnet.py'))
Model = cifar_example.Model
BATCH_SIZE = 128
def get_data(train_or_test):
isTrain = train_or_test == 'train'
pp_mean = dataset.SVHNDigit.get_per_pixel_mean()
......@@ -39,9 +40,9 @@ def get_data(train_or_test):
augmentors = [
imgaug.CenterPaste((40, 40)),
imgaug.Brightness(10),
imgaug.Contrast((0.8,1.2)),
imgaug.Contrast((0.8, 1.2)),
imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3),
imgaug.RandomCrop((32, 32)),
imgaug.MapImage(lambda x: x - pp_mean),
......@@ -56,6 +57,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 5, 5)
return ds
def get_config():
logger.auto_set_dir()
......@@ -72,7 +74,7 @@ def get_config():
StatPrinter(),
ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]),
[ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)])
]),
......
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import argparse
from tensorpack import *
......@@ -15,36 +16,38 @@ IMAGE_SIZE = 42
WARP_TARGET_SIZE = 28
HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'),
InputVar(tf.int32, (None,), 'label') ]
InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars):
xys = np.array([(y,x,1) for y in range(WARP_TARGET_SIZE)
xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
for x in range(WARP_TARGET_SIZE)], dtype='float32')
xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3
image, label = input_vars
image = image / 255.0 - 0.5 # bhw2
image = image / 255.0 - 0.5 # bhw2
def get_stn(image):
stn = (LinearWrap(image)
.AvgPooling('downsample', 2)
.Conv2D('conv0', 20, 5, padding='VALID')
.MaxPooling('pool0', 2)
.Conv2D('conv1', 20, 5, padding='VALID')
.FullyConnected('fc1', out_dim=32)
.FullyConnected('fct', out_dim=6, nl=tf.identity,
W_init=tf.constant_initializer(),
b_init=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
.AvgPooling('downsample', 2)
.Conv2D('conv0', 20, 5, padding='VALID')
.MaxPooling('pool0', 2)
.Conv2D('conv1', 20, 5, padding='VALID')
.FullyConnected('fc1', out_dim=32)
.FullyConnected('fct', out_dim=6, nl=tf.identity,
W_init=tf.constant_initializer(),
b_init=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
# output 6 parameters for affine transformation
stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3
stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2)
stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3
stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2)
coor = tf.reshape(tf.matmul(xys, stn),
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2
sampled = ImageSample('warp', [image, coor], borderMode='constant')
return sampled
......@@ -55,21 +58,21 @@ class Model(ModelDesc):
sampled2 = get_stn(image)
# For visualization in tensorboard
padded1 = tf.pad(sampled1, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]])
padded2 = tf.pad(sampled2, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]])
img_orig = tf.concat(1, [image[:,:,:,0], image[:,:,:,1]]) #b x 2h x w
transform1 = tf.concat(1, [padded1[:,:,:,0], padded1[:,:,:,1]])
transform2 = tf.concat(1, [padded2[:,:,:,0], padded2[:,:,:,1]])
padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
img_orig = tf.concat(1, [image[:, :, :, 0], image[:, :, :, 1]]) # b x 2h x w
transform1 = tf.concat(1, [padded1[:, :, :, 0], padded1[:, :, :, 1]])
transform2 = tf.concat(1, [padded2[:, :, :, 0], padded2[:, :, :, 1]])
stacked = tf.concat(2, [img_orig, transform1, transform2], 'viz')
tf.summary.image('visualize',
tf.expand_dims(stacked, -1), max_images=30)
tf.expand_dims(stacked, -1), max_images=30)
sampled = tf.concat(3, [sampled1, sampled2], 'sampled_concat')
logits = (LinearWrap(sampled)
.apply(symbf.batch_flatten)
.FullyConnected('fc1', out_dim=256, nl=tf.nn.relu)
.FullyConnected('fc2', out_dim=128, nl=tf.nn.relu)
.FullyConnected('fct', out_dim=19, nl=tf.identity)())
.apply(symbf.batch_flatten)
.FullyConnected('fc1', out_dim=256, nl=tf.nn.relu)
.FullyConnected('fc2', out_dim=128, nl=tf.nn.relu)
.FullyConnected('fct', out_dim=19, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
......@@ -87,6 +90,7 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
ScaleGradient([('STN.*', 0.1)]), SummaryGradient()]
def get_data(isTrain):
ds = dataset.Mnist('train' if isTrain else 'test')
# create augmentation for both training and testing
......@@ -105,20 +109,21 @@ def get_data(isTrain):
ds = BatchData(ds, 128)
return ds
def view_warp(modelpath):
pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(modelpath),
model=Model(),
input_names=['input'],
output_names=['viz', 'STN1/affine', 'STN2/affine']))
session_init=get_model_loader(modelpath),
model=Model(),
input_names=['input'],
output_names=['viz', 'STN1/affine', 'STN2/affine']))
xys = np.array([[0, 0, 1],
[WARP_TARGET_SIZE, 0, 1],
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1],
[0, WARP_TARGET_SIZE, 1]], dtype='float32')
[WARP_TARGET_SIZE, 0, 1],
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1],
[0, WARP_TARGET_SIZE, 1]], dtype='float32')
def draw_rect(img, affine, c, offset=[0,0]):
a = np.transpose(affine) #3x2
def draw_rect(img, affine, c, offset=[0, 0]):
a = np.transpose(affine) # 3x2
a = (np.matmul(xys, a) + offset).astype('int32')
cv2.line(img, tuple(a[0][::-1]), tuple(a[1][::-1]), c)
cv2.line(img, tuple(a[1][::-1]), tuple(a[2][::-1]), c)
......@@ -133,11 +138,12 @@ def view_warp(modelpath):
for idx, viz in enumerate(outputs):
viz = cv2.cvtColor(viz, cv2.COLOR_GRAY2BGR)
# Here we assume the second branch focuses on the first digit
draw_rect(viz, affine2[idx], (0,0,255))
draw_rect(viz, affine1[idx], (0,0,255), offset=[IMAGE_SIZE, 0])
draw_rect(viz, affine2[idx], (0, 0, 255))
draw_rect(viz, affine1[idx], (0, 0, 255), offset=[IMAGE_SIZE, 0])
cv2.imwrite('{:03d}.png'.format(idx), (viz + 0.5) * 255)
break
def get_config():
logger.auto_set_dir()
......@@ -152,7 +158,7 @@ def get_config():
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]),
[ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
]),
session_config=get_default_sess_config(0.5),
......@@ -176,4 +182,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train()
......@@ -2,7 +2,8 @@
# -*- coding: utf-8 -*-
# File: create-lmdb.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import sys, os
import sys
import os
import scipy.io.wavfile as wavfile
import string
import numpy as np
......@@ -14,10 +15,12 @@ from tensorpack.utils.stats import OnlineMoments
import bob.ap
CHARSET = set(string.ascii_lowercase + ' ')
PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(',')
PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(
',')
PHONEME_DIC = {v: k for k, v in enumerate(PHONEME_LIST)}
WORD_DIC = {v: k for k, v in enumerate(string.ascii_lowercase + ' ')}
def read_timit_txt(f):
f = open(f)
line = f.readlines()[0].strip().split(' ')
......@@ -30,6 +33,7 @@ def read_timit_txt(f):
ret.append(WORD_DIC[c])
return np.asarray(ret)
def read_timit_phoneme(f):
f = open(f)
pho = []
......@@ -39,15 +43,17 @@ def read_timit_phoneme(f):
f.close()
return np.asarray(pho)
@memoized
def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5,
n_filters=55, n_ceps=15, f_min=0., f_max=6000,
delta_win=2, pre_emphasis_coef=0.95, dct_norm=True,
mel_scale=True):
ret = bob.ap.Ceps(fs, win_length_ms, win_shift_ms, n_filters, n_ceps, f_min,
f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm)
f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm)
return ret
def diff_feature(feat, nd=1):
diff = feat[1:] - feat[:-1]
feat = feat[1:]
......@@ -57,6 +63,7 @@ def diff_feature(feat, nd=1):
d2 = diff[1:] - diff[:-1]
return np.concatenate((feat[1:], diff[1:], d2), axis=1)
def get_feature(f):
fs, signal = wavfile.read(f)
signal = signal.astype('float64')
......@@ -64,12 +71,14 @@ def get_feature(f):
feat = diff_feature(feat, nd=2)
return feat
class RawTIMIT(DataFlow):
def __init__(self, dirname, label='phoneme'):
self.dirname = dirname
assert os.path.isdir(dirname), dirname
self.filelists = [k for k in fs.recursive_walk(self.dirname)
if k.endswith('.wav')]
if k.endswith('.wav')]
logger.info("Found {} wav files ...".format(len(self.filelists)))
assert len(self.filelists), self.filelists
assert label in ['phoneme', 'letter'], label
......@@ -87,12 +96,13 @@ class RawTIMIT(DataFlow):
label = read_timit_txt(f[:-4] + '.TXT')
yield [feat, label]
def compute_mean_std(db, fname):
ds = LMDBDataPoint(db, shuffle=False)
o = OnlineMoments()
with get_tqdm(total=ds.size()) as bar:
for dp in ds.get_data():
feat = dp[0] #len x dim
feat = dp[0] # len x dim
for f in feat:
o.feed(f)
bar.update()
......@@ -105,13 +115,13 @@ if __name__ == '__main__':
subparsers = parser.add_subparsers(title='command', dest='command')
parser_db = subparsers.add_parser('build', help='build a LMDB database')
parser_db.add_argument('--dataset',
help='path to TIMIT TRAIN or TEST directory', required=True)
help='path to TIMIT TRAIN or TEST directory', required=True)
parser_db.add_argument('--db', help='output lmdb file', required=True)
parser_stat = subparsers.add_parser('stat', help='compute statistics (mean/std) of dataset')
parser_stat.add_argument('--db', help='input lmdb file', required=True)
parser_stat.add_argument('-o', '--output',
help='output statistics file', default='stats.data')
help='output statistics file', default='stats.data')
args = parser.parse_args()
if args.command == 'build':
......@@ -119,4 +129,3 @@ if __name__ == '__main__':
dftools.dump_dataflow_to_lmdb(ds, args.db)
elif args.command == 'stat':
compute_mean_std(args.db, args.output)
......@@ -9,15 +9,17 @@ from six.moves import range
__all__ = ['TIMITBatch']
def batch_feature(feats):
# pad to the longest in the batch
maxlen = max([k.shape[0] for k in feats])
bsize = len(feats)
ret = np.zeros((bsize, maxlen, feats[0].shape[1]))
for idx, feat in enumerate(feats):
ret[idx,:feat.shape[0],:] = feat
ret[idx, :feat.shape[0], :] = feat
return ret
def sparse_label(labels):
maxlen = max([k.shape[0] for k in labels])
shape = [len(labels), maxlen] # bxt
......@@ -31,7 +33,9 @@ def sparse_label(labels):
values = np.asarray(values)
return (indices, values, shape)
class TIMITBatch(ProxyDataFlow):
def __init__(self, ds, batch):
self.batch = batch
self.ds = ds
......@@ -52,4 +56,3 @@ class TIMITBatch(ProxyDataFlow):
batchlab = sparse_label(labs)
seqlen = np.asarray([k.shape[0] for k in feats])
yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen]
......@@ -5,7 +5,8 @@
import tensorflow as tf
import numpy as np
import os, sys
import os
import sys
import argparse
from collections import Counter
import operator
......@@ -13,7 +14,7 @@ import six
from six.moves import map, range
from tensorpack import *
from tensorpack.tfutils.gradproc import *
from tensorpack.tfutils.gradproc import *
from tensorpack.utils.globvars import globalns as param
import tensorpack.tfutils.symbolic_functions as symbf
from timitdata import TIMITBatch
......@@ -21,13 +22,15 @@ from timitdata import TIMITBatch
BATCH = 64
NLAYER = 2
HIDDEN = 128
NR_CLASS = 61 + 1
NR_CLASS = 61 + 1
FEATUREDIM = 39
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, FEATUREDIM], 'feat'), # bxmaxseqx39
InputVar(tf.int64, None, 'labelidx'), #label is b x maxlen, sparse
InputVar(tf.int64, None, 'labelidx'), # label is b x maxlen, sparse
InputVar(tf.int32, None, 'labelvalue'),
InputVar(tf.int64, None, 'labelshape'),
InputVar(tf.int32, [None], 'seqlen'), # b
......@@ -43,36 +46,37 @@ class Model(ModelDesc):
initial = cell.zero_state(tf.shape(feat)[0], tf.float32)
outputs, last_state = tf.nn.dynamic_rnn(cell, feat,
seqlen, initial,
dtype=tf.float32, scope='rnn')
seqlen, initial,
dtype=tf.float32, scope='rnn')
# o: b x t x HIDDEN
output = tf.reshape(outputs, [-1, HIDDEN]) # (Bxt) x rnnsize
logits = FullyConnected('fc', output, NR_CLASS, nl=tf.identity,
W_init=tf.truncated_normal_initializer(stddev=0.01))
W_init=tf.truncated_normal_initializer(stddev=0.01))
logits = tf.reshape(logits, (BATCH, -1, NR_CLASS))
loss = tf.nn.ctc_loss(logits, label, seqlen, time_major=False)
self.cost = tf.reduce_mean(loss, name='cost')
logits = tf.transpose(logits, [1,0,2])
logits = tf.transpose(logits, [1, 0, 2])
isTrain = get_current_tower_context().is_training
if isTrain:
# beam search is too slow to run in training
predictions = tf.to_int32(
tf.nn.ctc_greedy_decoder(logits, seqlen)[0][0])
tf.nn.ctc_greedy_decoder(logits, seqlen)[0][0])
else:
predictions = tf.to_int32(
tf.nn.ctc_beam_search_decoder(logits, seqlen)[0][0])
tf.nn.ctc_beam_search_decoder(logits, seqlen)[0][0])
err = tf.edit_distance(predictions, label, normalize=True)
err.set_shape([None])
err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err)
def get_gradient_processor(self):
return [GlobalNormClip(5), SummaryGradient() ]
return [GlobalNormClip(5), SummaryGradient()]
def get_data(path, isTrain, stat_file):
ds = LMDBDataPoint(path, shuffle=isTrain)
......@@ -83,6 +87,7 @@ def get_data(path, isTrain, stat_file):
ds = PrefetchDataZMQ(ds, 1)
return ds
def get_config(ds_train, ds_test):
step_per_epoch = ds_train.size()
......@@ -94,7 +99,7 @@ def get_config(ds_train, ds_test):
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
StatMonitorParamSetter('learning_rate', 'error',
lambda x: x * 0.2, 0, 5),
lambda x: x * 0.2, 0, 5),
HumanHyperParamSetter('learning_rate'),
PeriodicCallback(
InferenceRunner(ds_test, [ScalarStats('error')]), 2),
......@@ -124,4 +129,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -5,7 +5,8 @@
import tensorflow as tf
import numpy as np
import os, sys
import os
import sys
import argparse
from collections import Counter
import operator
......@@ -13,7 +14,7 @@ import six
from six.moves import map, range
from tensorpack import *
from tensorpack.tfutils.gradproc import *
from tensorpack.tfutils.gradproc import *
from tensorpack.utils.lut import LookUpTable
from tensorpack.utils.globvars import globalns as param
......@@ -27,7 +28,9 @@ param.vocab_size = None
param.softmax_temprature = 1
param.corpus = 'input.txt'
class CharRNNData(RNGDataFlow):
def __init__(self, input_file, size):
self.seq_length = param.seq_len
self._size = size
......@@ -51,16 +54,17 @@ class CharRNNData(RNGDataFlow):
def get_data(self):
random_starts = self.rng.randint(0,
self.whole_seq.shape[0] - self.seq_length - 1, (self._size,))
self.whole_seq.shape[0] - self.seq_length - 1, (self._size,))
for st in random_starts:
seq = self.whole_seq[st:st + self.seq_length + 1]
yield [seq[:-1], seq[1:]]
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.int32, (None, param.seq_len), 'input'),
InputVar(tf.int32, (None, param.seq_len), 'nextinput') ]
InputVar(tf.int32, (None, param.seq_len), 'nextinput')]
def _build_graph(self, input_vars):
input, nextinput = input_vars
......@@ -71,9 +75,9 @@ class Model(ModelDesc):
self.initial = initial = cell.zero_state(tf.shape(input)[0], tf.float32)
embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
input_list = tf.unstack(input_feature, axis=1) #seqlen x (Bxrnnsize)
input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize)
# seqlen is 1 in inference. don't need loop_function
outputs, last_state = tf.nn.rnn(cell, input_list, initial, scope='rnnlm')
......@@ -85,13 +89,14 @@ class Model(ModelDesc):
self.prob = tf.nn.softmax(logits / param.softmax_temprature)
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, symbolic_functions.flatten(nextinput))
logits, symbolic_functions.flatten(nextinput))
self.cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W
def get_gradient_processor(self):
return [GlobalNormClip(5)]
def get_config():
logger.auto_set_dir()
......@@ -114,6 +119,8 @@ def get_config():
)
# TODO rewrite using Predictor interface
def sample(path, start, length):
"""
:param path: path to the model
......@@ -130,7 +137,7 @@ def sample(path, start, length):
sess = tf.Session()
tfutils.SaverRestore(path).init(sess)
dummy_input = np.zeros((1,1), dtype='int32')
dummy_input = np.zeros((1, 1), dtype='int32')
with sess.as_default():
# feed the starting sentence
state = model.initial.eval({input_vars[0]: dummy_input})
......@@ -149,7 +156,7 @@ def sample(path, start, length):
for k in range(length):
x = np.array([[ds.lut.get_idx(c)]], dtype='int32')
[prob, state] = sess.run([model.prob, model.last_state],
{input_vars[0]: x, model.initial: state})
{input_vars[0]: x, model.initial: state})
c = ds.lut.get_obj(pick(prob[0]))
ret += c
print(ret)
......@@ -161,11 +168,11 @@ if __name__ == '__main__':
subparsers = parser.add_subparsers(title='command', dest='command')
parser_sample = subparsers.add_parser('sample', help='sample a trained model')
parser_sample.add_argument('-n', '--num', type=int,
default=300, help='length of text to generate')
default=300, help='length of text to generate')
parser_sample.add_argument('-s', '--start',
default='The ', help='initial text sequence')
default='The ', help='initial text sequence')
parser_sample.add_argument('-t', '--temperature', type=float,
default=1, help='softmax temperature')
default=1, help='softmax temperature')
parser_train = subparsers.add_parser('train', help='train')
args = parser.parse_args()
if args.gpu:
......@@ -181,4 +188,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -22,7 +22,9 @@ Cifar10:
Not a good model for Cifar100, just for demonstration.
"""
class Model(ModelDesc):
def __init__(self, cifar_classnum):
super(Model, self).__init__()
self.cifar_classnum = cifar_classnum
......@@ -30,7 +32,7 @@ class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 30, 30, 3], 'input'),
InputVar(tf.int32, [None], 'label')
]
]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -43,18 +45,18 @@ class Model(ModelDesc):
image = image / 4.0 # just to make range smaller
with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3):
logits = LinearWrap(image) \
.Conv2D('conv1.1', out_channel=64) \
.Conv2D('conv1.2', out_channel=64) \
.MaxPooling('pool1', 3, stride=2, padding='SAME') \
.Conv2D('conv2.1', out_channel=128) \
.Conv2D('conv2.2', out_channel=128) \
.MaxPooling('pool2', 3, stride=2, padding='SAME') \
.Conv2D('conv3.1', out_channel=128, padding='VALID') \
.Conv2D('conv3.2', out_channel=128, padding='VALID') \
.FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \
.tf.nn.dropout(keep_prob) \
.FullyConnected('fc1', 512, nl=tf.nn.relu) \
.FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()
.Conv2D('conv1.1', out_channel=64) \
.Conv2D('conv1.2', out_channel=64) \
.MaxPooling('pool1', 3, stride=2, padding='SAME') \
.Conv2D('conv2.1', out_channel=128) \
.Conv2D('conv2.2', out_channel=128) \
.MaxPooling('pool2', 3, stride=2, padding='SAME') \
.Conv2D('conv3.1', out_channel=128, padding='VALID') \
.Conv2D('conv3.2', out_channel=128, padding='VALID') \
.FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \
.tf.nn.dropout(keep_prob) \
.FullyConnected('fc1', 512, nl=tf.nn.relu) \
.FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
......@@ -72,6 +74,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train'
if cifar_classnum == 10:
......@@ -83,10 +86,10 @@ def get_data(train_or_test, cifar_classnum):
imgaug.RandomCrop((30, 30)),
imgaug.Flip(horiz=True),
imgaug.Brightness(63),
imgaug.Contrast((0.2,1.8)),
imgaug.Contrast((0.2, 1.8)),
imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(30,30), 0.2, 3),
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(30, 30), 0.2, 3),
imgaug.MeanVarianceNormalize(all_channel=True)
]
else:
......@@ -100,6 +103,7 @@ def get_data(train_or_test, cifar_classnum):
ds = PrefetchData(ds, 3, 2)
return ds
def get_config(cifar_classnum):
logger.auto_set_dir()
......@@ -111,6 +115,7 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5)
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
def lr_func(lr):
if lr < 3e-5:
raise StopTraining()
......@@ -123,7 +128,7 @@ def get_config(cifar_classnum):
StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test, ClassificationError()),
StatMonitorParamSetter('learning_rate', 'val_error', lr_func,
threshold=0.001, last_k=10),
threshold=0.001, last_k=10),
]),
session_config=sess_config,
model=Model(cifar_classnum),
......
......@@ -6,7 +6,9 @@
from __future__ import print_function
import tensorflow as tf
import numpy as np
import os, cv2, argparse
import os
import cv2
import argparse
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
......@@ -19,9 +21,11 @@ Usage:
./load-alexnet.py --load alexnet.npy --input cat.png
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 227, 227, 3), 'input') ]
return [InputVar(tf.float32, (None, 227, 227, 3), 'input')]
def _build_graph(self, inputs):
# img: 227x227x3
......@@ -48,6 +52,7 @@ class Model(ModelDesc):
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig(
......@@ -59,8 +64,8 @@ def run_test(path, input):
im = cv2.imread(input)
assert im is not None, input
im = cv2.resize(im, (227, 227))[:,:,::-1].reshape(
(1,227,227,3)).astype('float32') - 110
im = cv2.resize(im, (227, 227))[:, :, ::-1].reshape(
(1, 227, 227, 3)).astype('float32') - 110
outputs = predict_func([im])[0]
prob = outputs[0]
ret = prob.argsort()[-10:][::-1]
......
......@@ -7,7 +7,8 @@ from __future__ import print_function
import cv2
import tensorflow as tf
import numpy as np
import os, argparse
import os
import argparse
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
......@@ -20,44 +21,47 @@ Usage:
./load-vgg16.py --load vgg16.npy --input cat.png
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ]
return [InputVar(tf.float32, (None, 224, 224, 3), 'input')]
def _build_graph(self, inputs):
image = inputs[0]
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
logits = (LinearWrap(image)
.Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64)
.MaxPooling('pool1', 2)
# 112
.Conv2D('conv2_1', 128)
.Conv2D('conv2_2', 128)
.MaxPooling('pool2', 2)
# 56
.Conv2D('conv3_1', 256)
.Conv2D('conv3_2', 256)
.Conv2D('conv3_3', 256)
.MaxPooling('pool3', 2)
# 28
.Conv2D('conv4_1', 512)
.Conv2D('conv4_2', 512)
.Conv2D('conv4_3', 512)
.MaxPooling('pool4', 2)
# 14
.Conv2D('conv5_1', 512)
.Conv2D('conv5_2', 512)
.Conv2D('conv5_3', 512)
.MaxPooling('pool5', 2)
# 7
.FullyConnected('fc6', 4096, nl=tf.nn.relu)
.Dropout('drop0', 0.5)
.FullyConnected('fc7', 4096, nl=tf.nn.relu)
.Dropout('drop1', 0.5)
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
.Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64)
.MaxPooling('pool1', 2)
# 112
.Conv2D('conv2_1', 128)
.Conv2D('conv2_2', 128)
.MaxPooling('pool2', 2)
# 56
.Conv2D('conv3_1', 256)
.Conv2D('conv3_2', 256)
.Conv2D('conv3_3', 256)
.MaxPooling('pool3', 2)
# 28
.Conv2D('conv4_1', 512)
.Conv2D('conv4_2', 512)
.Conv2D('conv4_3', 512)
.MaxPooling('pool4', 2)
# 14
.Conv2D('conv5_1', 512)
.Conv2D('conv5_2', 512)
.Conv2D('conv5_3', 512)
.MaxPooling('pool5', 2)
# 7
.FullyConnected('fc6', 4096, nl=tf.nn.relu)
.Dropout('drop0', 0.5)
.FullyConnected('fc7', 4096, nl=tf.nn.relu)
.Dropout('drop1', 0.5)
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig(
......@@ -70,7 +74,7 @@ def run_test(path, input):
im = cv2.imread(input)
assert im is not None, input
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224)).reshape((1,224,224,3)).astype('float32')
im = cv2.resize(im, (224, 224)).reshape((1, 224, 224, 3)).astype('float32')
im = im - 110
outputs = predict_func([im])[0]
prob = outputs[0]
......
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import argparse
"""
......@@ -18,12 +19,14 @@ from tensorpack import *
IMAGE_SIZE = 28
class Model(ModelDesc):
def _get_input_vars(self):
"""Define all the input variables (with type, shape, name) that'll be
fed into the graph to produce a cost. """
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
InputVar(tf.int32, (None,), 'label') ]
InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars):
"""This function should build the model which takes the input variables
......@@ -47,19 +50,20 @@ class Model(ModelDesc):
l = MaxPooling('pool0', image, 2)
... """
logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0')
.MaxPooling('pool0', 2)
.Conv2D('conv1', padding='SAME')
.Conv2D('conv2')
.MaxPooling('pool1', 2)
.Conv2D('conv3')
.FullyConnected('fc0', 512, nl=tf.nn.relu)
.Dropout('dropout', 0.5)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)())
logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0')
.MaxPooling('pool0', 2)
.Conv2D('conv1', padding='SAME')
.Conv2D('conv2')
.MaxPooling('pool1', 2)
.Conv2D('conv3')
.FullyConnected('fc0', 512, nl=tf.nn.relu)
.Dropout('dropout', 0.5)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) # a vector of length B with loss of each sample
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, label) # a vector of length B with loss of each sample
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
# compute the "incorrect vector", for the callback ClassificationError to use at validation time
......@@ -83,11 +87,13 @@ class Model(ModelDesc):
summary.add_param_summary([('.*/W', ['histogram'])])
self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
def get_config():
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
......@@ -135,4 +141,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train()
......@@ -20,10 +20,12 @@ Each epoch iterates over the whole training set (4721 iterations).
Speed is about 43 it/s on TitanX.
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -32,16 +34,16 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=BNReLU, use_bias=False):
logits = (LinearWrap(image)
.Conv2D('conv1', 24, 5, padding='VALID')
.MaxPooling('pool1', 2, padding='SAME')
.Conv2D('conv2', 32, 3, padding='VALID')
.Conv2D('conv3', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME')
.Conv2D('conv4', 64, 3, padding='VALID')
.Dropout('drop', 0.5)
.FullyConnected('fc0', 512,
b_init=tf.constant_initializer(0.1), nl=tf.nn.relu)
.FullyConnected('linear', out_dim=10, nl=tf.identity)())
.Conv2D('conv1', 24, 5, padding='VALID')
.MaxPooling('pool1', 2, padding='SAME')
.Conv2D('conv2', 32, 3, padding='VALID')
.Conv2D('conv3', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME')
.Conv2D('conv4', 64, 3, padding='VALID')
.Dropout('drop', 0.5)
.FullyConnected('fc0', 512,
b_init=tf.constant_initializer(0.1), nl=tf.nn.relu)
.FullyConnected('linear', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='output')
# compute the number of failed samples, for ClassificationError to use at test time
......@@ -58,6 +60,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data():
d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra')
......@@ -67,20 +70,21 @@ def get_data():
augmentors = [
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)),
imgaug.Contrast((0.5, 1.5)),
imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(40,40), 0.2, 3),
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3),
]
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
data_train = PrefetchData(data_train, 5, 5)
augmentors = [ imgaug.Resize((40, 40)) ]
augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True)
return data_train, data_test
def get_config():
logger.auto_set_dir()
......@@ -100,7 +104,7 @@ def get_config():
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()])
[ScalarStats('cost'), ClassificationError()])
]),
model=Model(),
step_per_epoch=step_per_epoch,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment