Commit 233b3b90 authored by Yuxin Wu's avatar Yuxin Wu

run autopep8 over examples

parent fb2a051c
...@@ -6,11 +6,15 @@ ...@@ -6,11 +6,15 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys, re, time import os
import sys
import re
import time
import random import random
import argparse import argparse
import subprocess import subprocess
import multiprocessing, threading import multiprocessing
import threading
from collections import deque from collections import deque
from tensorpack import * from tensorpack import *
...@@ -47,6 +51,7 @@ NUM_ACTIONS = None ...@@ -47,6 +51,7 @@ NUM_ACTIONS = None
ROM_FILE = None ROM_FILE = None
METHOD = None METHOD = None
def get_player(viz=False, train=False): def get_player(viz=False, train=False):
pl = AtariPlayer(ROM_FILE, frame_skip=ACTION_REPEAT, pl = AtariPlayer(ROM_FILE, frame_skip=ACTION_REPEAT,
image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train) image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train)
...@@ -59,15 +64,18 @@ def get_player(viz=False, train=False): ...@@ -59,15 +64,18 @@ def get_player(viz=False, train=False):
return pl return pl
common.get_player = get_player # so that eval functions in common can use the player common.get_player = get_player # so that eval functions in common can use the player
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
if NUM_ACTIONS is None: if NUM_ACTIONS is None:
p = get_player(); del p p = get_player()
del p
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'), return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'), InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'reward'), InputVar(tf.float32, (None,), 'reward'),
InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'), InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'),
InputVar(tf.bool, (None,), 'isOver') ] InputVar(tf.bool, (None,), 'isOver')]
def _get_DQN_prediction(self, image): def _get_DQN_prediction(self, image):
""" image: [0,255]""" """ image: [0,255]"""
...@@ -101,7 +109,7 @@ class Model(ModelDesc): ...@@ -101,7 +109,7 @@ class Model(ModelDesc):
state, action, reward, next_state, isOver = inputs state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state) self.predict_value = self._get_DQN_prediction(state)
action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0) action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N, pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N,
max_pred_reward = tf.reduce_mean(tf.reduce_max( max_pred_reward = tf.reduce_mean(tf.reduce_max(
self.predict_value, 1), name='predict_reward') self.predict_value, 1), name='predict_reward')
add_moving_summary(max_pred_reward) add_moving_summary(max_pred_reward)
...@@ -125,7 +133,7 @@ class Model(ModelDesc): ...@@ -125,7 +133,7 @@ class Model(ModelDesc):
self.cost = tf.truediv(symbf.huber_loss(target - pred_action_value), self.cost = tf.truediv(symbf.huber_loss(target - pred_action_value),
tf.cast(BATCH_SIZE, tf.float32), name='cost') tf.cast(BATCH_SIZE, tf.float32), name='cost')
summary.add_param_summary([('conv.*/W', ['histogram', 'rms']), summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms']) ]) # monitor all W ('fc.*/W', ['histogram', 'rms'])]) # monitor all W
def update_target_param(self): def update_target_param(self):
vars = tf.trainable_variables() vars = tf.trainable_variables()
...@@ -142,6 +150,7 @@ class Model(ModelDesc): ...@@ -142,6 +150,7 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]), return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
SummaryGradient()] SummaryGradient()]
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -213,4 +222,3 @@ if __name__ == '__main__': ...@@ -213,4 +222,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import numpy as np import numpy as np
import time, os import time
import os
import cv2 import cv2
from collections import deque from collections import deque
import threading import threading
...@@ -22,13 +23,15 @@ __all__ = ['AtariPlayer'] ...@@ -22,13 +23,15 @@ __all__ = ['AtariPlayer']
ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms" ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms"
_ALE_LOCK = threading.Lock() _ALE_LOCK = threading.Lock()
class AtariPlayer(RLEnvironment): class AtariPlayer(RLEnvironment):
""" """
A wrapper for atari emulator. A wrapper for atari emulator.
Will automatically restart when a real episode ends (isOver might be just Will automatically restart when a real episode ends (isOver might be just
lost of lives but not game over). lost of lives but not game over).
""" """
def __init__(self, rom_file, viz=0, height_range=(None,None),
def __init__(self, rom_file, viz=0, height_range=(None, None),
frame_skip=4, image_shape=(84, 84), nullop_start=30, frame_skip=4, image_shape=(84, 84), nullop_start=30,
live_lost_as_eoe=True): live_lost_as_eoe=True):
""" """
...@@ -84,7 +87,6 @@ class AtariPlayer(RLEnvironment): ...@@ -84,7 +87,6 @@ class AtariPlayer(RLEnvironment):
self.width, self.height = self.ale.getScreenDims() self.width, self.height = self.ale.getScreenDims()
self.actions = self.ale.getMinimalActionSet() self.actions = self.ale.getMinimalActionSet()
self.live_lost_as_eoe = live_lost_as_eoe self.live_lost_as_eoe = live_lost_as_eoe
self.frame_skip = frame_skip self.frame_skip = frame_skip
self.nullop_start = nullop_start self.nullop_start = nullop_start
...@@ -112,7 +114,7 @@ class AtariPlayer(RLEnvironment): ...@@ -112,7 +114,7 @@ class AtariPlayer(RLEnvironment):
if isinstance(self.viz, float): if isinstance(self.viz, float):
cv2.imshow(self.windowname, ret) cv2.imshow(self.windowname, ret)
time.sleep(self.viz) time.sleep(self.viz)
ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') ret = ret[self.height_range[0]:self.height_range[1], :].astype('float32')
# 0.299,0.587.0.114. same as rgb2y in torch/image # 0.299,0.587.0.114. same as rgb2y in torch/image
ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
ret = cv2.resize(ret, self.image_shape) ret = cv2.resize(ret, self.image_shape)
...@@ -169,7 +171,7 @@ if __name__ == '__main__': ...@@ -169,7 +171,7 @@ if __name__ == '__main__':
import time import time
def benchmark(): def benchmark():
a = AtariPlayer(sys.argv[1], viz=False, height_range=(28,-8)) a = AtariPlayer(sys.argv[1], viz=False, height_range=(28, -8))
num = a.get_action_space().num_actions() num = a.get_action_space().num_actions()
rng = get_rng(num) rng = get_rng(num)
start = time.time() start = time.time()
...@@ -184,7 +186,8 @@ if __name__ == '__main__': ...@@ -184,7 +186,8 @@ if __name__ == '__main__':
print(time.time() - start) print(time.time() - start)
if len(sys.argv) == 3 and sys.argv[2] == 'benchmark': if len(sys.argv) == 3 and sys.argv[2] == 'benchmark':
import threading, multiprocessing import threading
import multiprocessing
for k in range(3): for k in range(3):
#th = multiprocessing.Process(target=benchmark) #th = multiprocessing.Process(target=benchmark)
th = threading.Thread(target=benchmark) th = threading.Thread(target=benchmark)
...@@ -193,7 +196,7 @@ if __name__ == '__main__': ...@@ -193,7 +196,7 @@ if __name__ == '__main__':
benchmark() benchmark()
else: else:
a = AtariPlayer(sys.argv[1], a = AtariPlayer(sys.argv[1],
viz=0.03, height_range=(28,-8)) viz=0.03, height_range=(28, -8))
num = a.get_action_space().num_actions() num = a.get_action_space().num_actions()
rng = get_rng(num) rng = get_rng(num)
import time import time
...@@ -204,6 +207,5 @@ if __name__ == '__main__': ...@@ -204,6 +207,5 @@ if __name__ == '__main__':
print(act) print(act)
r, o = a.action(act) r, o = a.action(act)
a.current_state() a.current_state()
#time.sleep(0.1) # time.sleep(0.1)
print(r, o) print(r, o)
...@@ -2,8 +2,10 @@ ...@@ -2,8 +2,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: common.py # File: common.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import random, time import random
import threading, multiprocessing import time
import threading
import multiprocessing
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
from six.moves import queue from six.moves import queue
...@@ -16,6 +18,7 @@ from tensorpack.utils.stats import * ...@@ -16,6 +18,7 @@ from tensorpack.utils.stats import *
global get_player global get_player
get_player = None get_player = None
def play_one_episode(player, func, verbose=False): def play_one_episode(player, func, verbose=False):
def f(s): def f(s):
spc = player.get_action_space() spc = player.get_action_space()
...@@ -27,6 +30,7 @@ def play_one_episode(player, func, verbose=False): ...@@ -27,6 +30,7 @@ def play_one_episode(player, func, verbose=False):
return act return act
return np.mean(player.play_one_episode(f)) return np.mean(player.play_one_episode(f))
def play_model(cfg): def play_model(cfg):
player = get_player(viz=0.01) player = get_player(viz=0.01)
predfunc = get_predict_func(cfg) predfunc = get_predict_func(cfg)
...@@ -34,8 +38,10 @@ def play_model(cfg): ...@@ -34,8 +38,10 @@ def play_model(cfg):
score = play_one_episode(player, predfunc) score = play_one_episode(player, predfunc)
print("Total:", score) print("Total:", score)
def eval_with_funcs(predict_funcs, nr_eval): def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread): class Worker(StoppableThread):
def __init__(self, func, queue): def __init__(self, func, queue):
super(Worker, self).__init__() super(Worker, self).__init__()
self._func = func self._func = func
...@@ -51,7 +57,7 @@ def eval_with_funcs(predict_funcs, nr_eval): ...@@ -51,7 +57,7 @@ def eval_with_funcs(predict_funcs, nr_eval):
while not self.stopped(): while not self.stopped():
try: try:
score = play_one_episode(player, self.func) score = play_one_episode(player, self.func)
#print "Score, ", score # print "Score, ", score
except RuntimeError: except RuntimeError:
return return
self.queue_put_stoppable(self.q, score) self.queue_put_stoppable(self.q, score)
...@@ -68,8 +74,10 @@ def eval_with_funcs(predict_funcs, nr_eval): ...@@ -68,8 +74,10 @@ def eval_with_funcs(predict_funcs, nr_eval):
r = q.get() r = q.get()
stat.feed(r) stat.feed(r)
logger.info("Waiting for all the workers to finish the last run...") logger.info("Waiting for all the workers to finish the last run...")
for k in threads: k.stop() for k in threads:
for k in threads: k.join() k.stop()
for k in threads:
k.join()
while q.qsize(): while q.qsize():
r = q.get() r = q.get()
stat.feed(r) stat.feed(r)
...@@ -80,13 +88,16 @@ def eval_with_funcs(predict_funcs, nr_eval): ...@@ -80,13 +88,16 @@ def eval_with_funcs(predict_funcs, nr_eval):
return (stat.average, stat.max) return (stat.average, stat.max)
return (0, 0) return (0, 0)
def eval_model_multithread(cfg, nr_eval): def eval_model_multithread(cfg, nr_eval):
func = get_predict_func(cfg) func = get_predict_func(cfg)
NR_PROC = min(multiprocessing.cpu_count() // 2, 8) NR_PROC = min(multiprocessing.cpu_count() // 2, 8)
mean, max = eval_with_funcs([func] * NR_PROC, nr_eval) mean, max = eval_with_funcs([func] * NR_PROC, nr_eval)
logger.info("Average Score: {}; Max Score: {}".format(mean, max)) logger.info("Average Score: {}; Max Score: {}".format(mean, max))
class Evaluator(Callback): class Evaluator(Callback):
def __init__(self, nr_eval, input_names, output_names): def __init__(self, nr_eval, input_names, output_names):
self.eval_episode = nr_eval self.eval_episode = nr_eval
self.input_names = input_names self.input_names = input_names
......
...@@ -13,26 +13,31 @@ from tensorpack.utils.argtools import memoized ...@@ -13,26 +13,31 @@ from tensorpack.utils.argtools import memoized
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
_CM = plt.get_cmap('jet') _CM = plt.get_cmap('jet')
def colorize(img, heatmap): def colorize(img, heatmap):
""" img: bgr, [0,255] """ img: bgr, [0,255]
heatmap: [0,1] heatmap: [0,1]
""" """
heatmap = _CM(heatmap)[:,:,[2,1,0]] * 255.0 heatmap = _CM(heatmap)[:, :, [2, 1, 0]] * 255.0
return img * 0.5 + heatmap * 0.5 return img * 0.5 + heatmap * 0.5
@memoized @memoized
def get_gaussian_map(): def get_gaussian_map():
sigma = 21 sigma = 21
gaussian_map = np.zeros((368, 368), dtype='float32') gaussian_map = np.zeros((368, 368), dtype='float32')
for x_p in range(368): for x_p in range(368):
for y_p in range(368): for y_p in range(368):
dist_sq = (x_p - 368/2) * (x_p - 368/2) + \ dist_sq = (x_p - 368 / 2) * (x_p - 368 / 2) + \
(y_p - 368/2) * (y_p - 368/2) (y_p - 368 / 2) * (y_p - 368 / 2)
exponent = dist_sq / 2.0 / (21**2) exponent = dist_sq / 2.0 / (21**2)
gaussian_map[y_p, x_p] = np.exp(-exponent) gaussian_map[y_p, x_p] = np.exp(-exponent)
return gaussian_map.reshape((1,368,368,1)) return gaussian_map.reshape((1, 368, 368, 1))
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 368, 368, 3), 'input'), return [InputVar(tf.float32, (None, 368, 368, 3), 'input'),
InputVar(tf.float32, (None, 368, 368, 15), 'label'), InputVar(tf.float32, (None, 368, 368, 15), 'label'),
...@@ -43,7 +48,7 @@ class Model(ModelDesc): ...@@ -43,7 +48,7 @@ class Model(ModelDesc):
image = image / 256.0 - 0.5 image = image / 256.0 - 0.5
gmap = tf.constant(get_gaussian_map()) gmap = tf.constant(get_gaussian_map())
gmap = tf.pad(gmap, [[0,0],[0,1],[0,1],[0,0]]) gmap = tf.pad(gmap, [[0, 0], [0, 1], [0, 1], [0, 0]])
pool_center = AvgPooling('mappool', gmap, 9, stride=8, padding='VALID') pool_center = AvgPooling('mappool', gmap, 9, stride=8, padding='VALID')
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu,
W_init=tf.random_normal_initializer(stddev=0.01)): W_init=tf.random_normal_initializer(stddev=0.01)):
...@@ -89,7 +94,8 @@ class Model(ModelDesc): ...@@ -89,7 +94,8 @@ class Model(ModelDesc):
out5 = add_stage(5, out4) out5 = add_stage(5, out4)
out6 = add_stage(6, out4) out6 = add_stage(6, out4)
resized_map = tf.image.resize_bilinear(out6, resized_map = tf.image.resize_bilinear(out6,
[368,368], name='resized_map') [368, 368], name='resized_map')
def run_test(model_path, img_file): def run_test(model_path, img_file):
param_dict = np.load(model_path, encoding='latin1').item() param_dict = np.load(model_path, encoding='latin1').item()
...@@ -101,9 +107,9 @@ def run_test(model_path, img_file): ...@@ -101,9 +107,9 @@ def run_test(model_path, img_file):
)) ))
im = cv2.imread(img_file, cv2.IMREAD_COLOR).astype('float32') im = cv2.imread(img_file, cv2.IMREAD_COLOR).astype('float32')
im = cv2.resize(im, (368,368)) im = cv2.resize(im, (368, 368))
out = predict_func([[im]])[0][0] out = predict_func([[im]])[0][0]
hm = out[:,:,:14].sum(axis=2) hm = out[:, :, :14].sum(axis=2)
viz = colorize(im, hm) viz = colorize(im, hm)
cv2.imwrite("output.jpg", viz) cv2.imwrite("output.jpg", viz)
......
...@@ -5,7 +5,9 @@ ...@@ -5,7 +5,9 @@
from tensorpack import ProxyDataFlow, get_rng from tensorpack import ProxyDataFlow, get_rng
class DisturbLabel(ProxyDataFlow): class DisturbLabel(ProxyDataFlow):
def __init__(self, ds, prob): def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds) super(DisturbLabel, self).__init__(ds)
self.prob = prob self.prob = prob
...@@ -19,4 +21,3 @@ class DisturbLabel(ProxyDataFlow): ...@@ -19,4 +21,3 @@ class DisturbLabel(ProxyDataFlow):
if self.rng.rand() < self.prob: if self.rng.rand() < self.prob:
l = self.rng.choice(10) l = self.rng.choice(10)
yield [img, l] yield [img, l]
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import argparse import argparse
from tensorpack import * from tensorpack import *
...@@ -16,6 +17,7 @@ mnist_example = imp.load_source('mnist_example', ...@@ -16,6 +17,7 @@ mnist_example = imp.load_source('mnist_example',
os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py')) os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py'))
get_config = mnist_example.get_config get_config = mnist_example.get_config
def get_data(): def get_data():
dataset_train = BatchData(DisturbLabel(dataset.Mnist('train'), args.prob), 128) dataset_train = BatchData(DisturbLabel(dataset.Mnist('train'), args.prob), 128)
dataset_test = BatchData(dataset.Mnist('test'), 256, remainder=True) dataset_test = BatchData(dataset.Mnist('test'), 256, remainder=True)
...@@ -24,7 +26,9 @@ mnist_example.get_data = get_data ...@@ -24,7 +26,9 @@ mnist_example.get_data = get_data
IMAGE_SIZE = 28 IMAGE_SIZE = 28
class Model(mnist_example.Model): class Model(mnist_example.Model):
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
image = tf.expand_dims(image, 3) image = tf.expand_dims(image, 3)
...@@ -63,4 +67,3 @@ if __name__ == '__main__': ...@@ -63,4 +67,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -16,20 +16,20 @@ import imp ...@@ -16,20 +16,20 @@ import imp
svhn_example = imp.load_source('svhn_example', svhn_example = imp.load_source('svhn_example',
os.path.join(os.path.dirname(__file__), '..', 'svhn-digit-convnet.py'))) os.path.join(os.path.dirname(__file__), '..', 'svhn-digit-convnet.py')))
Model = svhn_example.Model Model=svhn_example.Model
get_config = svhn_example.get_config get_config=svhn_example.get_config
def get_data(): def get_data():
d1 = dataset.SVHNDigit('train') d1=dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra') d2=dataset.SVHNDigit('extra')
data_train = RandomMixData([d1, d2]) data_train=RandomMixData([d1, d2])
data_train = DisturbLabel(data_train, args.prob) data_train=DisturbLabel(data_train, args.prob)
data_test = dataset.SVHNDigit('test') data_test=dataset.SVHNDigit('test')
augmentors = [ augmentors=[
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.Brightness(30), imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5, 1.5)),
] ]
data_train = AugmentImageComponent(data_train, augmentors) data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128) data_train = BatchData(data_train, 128)
......
...@@ -9,7 +9,8 @@ import argparse ...@@ -9,7 +9,8 @@ import argparse
import numpy as np import numpy as np
import multiprocessing import multiprocessing
import msgpack import msgpack
import os, sys import os
import sys
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -69,10 +70,12 @@ BITG = 6 ...@@ -69,10 +70,12 @@ BITG = 6
TOTAL_BATCH_SIZE = 128 TOTAL_BATCH_SIZE = 128
BATCH_SIZE = 64 BATCH_SIZE = 64
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'), return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -81,6 +84,7 @@ class Model(ModelDesc): ...@@ -81,6 +84,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG) fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw # monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs): def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs) v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer # don't binarize first and last layer
...@@ -156,25 +160,28 @@ class Model(ModelDesc): ...@@ -156,25 +160,28 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(dataset_name): def get_data(dataset_name):
isTrain = dataset_name == 'train' isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain) ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain)
meta = dataset.ILSVRCMeta() meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:] pp_mean_224 = pp_mean[16:-16, 16:-16, :]
if isTrain: if isTrain:
class Resize(imgaug.ImageAugmentor): class Resize(imgaug.ImageAugmentor):
def __init__(self): def __init__(self):
self._init(locals()) self._init(locals())
def _augment(self, img, _): def _augment(self, img, _):
h, w = img.shape[:2] h, w = img.shape[:2]
size = 224 size = 224
scale = self.rng.randint(size, 308) * 1.0 / min(h, w) scale = self.rng.randint(size, 308) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15) scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15) scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\ desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h)))) max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize), dst = cv2.resize(img, tuple(desSize),
interpolation=cv2.INTER_CUBIC) interpolation=cv2.INTER_CUBIC)
...@@ -186,11 +193,11 @@ def get_data(dataset_name): ...@@ -186,11 +193,11 @@ def get_data(dataset_name):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5), imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True), imgaug.Brightness(30, True),
imgaug.Gamma(), imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True), imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((224, 224)), imgaug.RandomCrop((224, 224)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8), imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform( imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(224, 224), 0.2, 3), 0.1), (224, 224), 0.2, 3), 0.1),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_224), imgaug.MapImage(lambda x: x - pp_mean_224),
...@@ -199,7 +206,7 @@ def get_data(dataset_name): ...@@ -199,7 +206,7 @@ def get_data(dataset_name):
def resize_func(im): def resize_func(im):
h, w = im.shape[:2] h, w = im.shape[:2]
scale = 256.0 / min(h, w) scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\ desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h)))) max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im return im
...@@ -214,6 +221,7 @@ def get_data(dataset_name): ...@@ -214,6 +221,7 @@ def get_data(dataset_name):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count())) ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -228,7 +236,7 @@ def get_config(): ...@@ -228,7 +236,7 @@ def get_config():
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5), optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
#HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('learning_rate'),
ScheduledHyperParamSetter( ScheduledHyperParamSetter(
'learning_rate', [(56, 2e-5), (64, 4e-6)]), 'learning_rate', [(56, 2e-5), (64, 4e-6)]),
InferenceRunner(data_test, InferenceRunner(data_test,
...@@ -241,6 +249,7 @@ def get_config(): ...@@ -241,6 +249,7 @@ def get_config():
max_epoch=100, max_epoch=100,
) )
def run_image(model, sess_init, inputs): def run_image(model, sess_init, inputs):
pred_config = PredictConfig( pred_config = PredictConfig(
model=model, model=model,
...@@ -252,13 +261,13 @@ def run_image(model, sess_init, inputs): ...@@ -252,13 +261,13 @@ def run_image(model, sess_init, inputs):
predict_func = get_predict_func(pred_config) predict_func = get_predict_func(pred_config)
meta = dataset.ILSVRCMeta() meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:] pp_mean_224 = pp_mean[16:-16, 16:-16, :]
words = meta.get_synset_words_1000() words = meta.get_synset_words_1000()
def resize_func(im): def resize_func(im):
h, w = im.shape[:2] h, w = im.shape[:2]
scale = 256.0 / min(h, w) scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\ desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h)))) max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im return im
...@@ -272,7 +281,7 @@ def run_image(model, sess_init, inputs): ...@@ -272,7 +281,7 @@ def run_image(model, sess_init, inputs):
img = cv2.imread(f).astype('float32') img = cv2.imread(f).astype('float32')
assert img is not None assert img is not None
img = transformers.augment(img)[np.newaxis, :,:,:] img = transformers.augment(img)[np.newaxis, :, :, :]
outputs = predict_func([img])[0] outputs = predict_func([img])[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import tensorflow as tf import tensorflow as tf
from tensorpack.utils.argtools import memoized from tensorpack.utils.argtools import memoized
@memoized @memoized
def get_dorefa(bitW, bitA, bitG): def get_dorefa(bitW, bitA, bitG):
""" """
...@@ -15,7 +16,7 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -15,7 +16,7 @@ def get_dorefa(bitW, bitA, bitG):
G = tf.get_default_graph() G = tf.get_default_graph()
def quantize(x, k): def quantize(x, k):
n = float(2**k-1) n = float(2**k - 1)
with G.gradient_override_map({"Floor": "Identity"}): with G.gradient_override_map({"Floor": "Identity"}):
return tf.floor(x * n + 0.5) / n return tf.floor(x * n + 0.5) / n
...@@ -39,11 +40,11 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -39,11 +40,11 @@ def get_dorefa(bitW, bitA, bitG):
def grad_fg(op, x): def grad_fg(op, x):
rank = x.get_shape().ndims rank = x.get_shape().ndims
assert rank is not None assert rank is not None
maxx = tf.reduce_max(tf.abs(x), list(range(1,rank)), keep_dims=True) maxx = tf.reduce_max(tf.abs(x), list(range(1, rank)), keep_dims=True)
x = x / maxx x = x / maxx
n = float(2**bitG-1) n = float(2**bitG - 1)
x = x * 0.5 + 0.5 + tf.random_uniform( x = x * 0.5 + 0.5 + tf.random_uniform(
tf.shape(x), minval=-0.5/n, maxval=0.5/n) tf.shape(x), minval=-0.5 / n, maxval=0.5 / n)
x = tf.clip_by_value(x, 0.0, 1.0) x = tf.clip_by_value(x, 0.0, 1.0)
x = quantize(x, bitG) - 0.5 x = quantize(x, bitG) - 0.5
return x * maxx * 2 return x * maxx * 2
...@@ -54,4 +55,3 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -54,4 +55,3 @@ def get_dorefa(bitW, bitA, bitG):
with G.gradient_override_map({"Identity": "FGGrad"}): with G.gradient_override_map({"Identity": "FGGrad"}):
return tf.identity(x) return tf.identity(x)
return fw, fa, fg return fw, fa, fg
...@@ -40,10 +40,12 @@ BITW = 1 ...@@ -40,10 +40,12 @@ BITW = 1
BITA = 2 BITA = 2
BITG = 4 BITG = 4
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -52,6 +54,7 @@ class Model(ModelDesc): ...@@ -52,6 +54,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG) fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw # monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs): def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs) v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer # don't binarize first and last layer
...@@ -62,9 +65,9 @@ class Model(ModelDesc): ...@@ -62,9 +65,9 @@ class Model(ModelDesc):
return fw(v) return fw(v)
tf.get_variable = new_get_variable tf.get_variable = new_get_variable
def cabs(x): def cabs(x):
return tf.minimum(1.0, tf.abs(x), name='cabs') return tf.minimum(1.0, tf.abs(x), name='cabs')
def activate(x): def activate(x):
return fa(cabs(x)) return fa(cabs(x))
...@@ -122,6 +125,7 @@ class Model(ModelDesc): ...@@ -122,6 +125,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -134,8 +138,8 @@ def get_config(): ...@@ -134,8 +138,8 @@ def get_config():
augmentors = [ augmentors = [
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.Brightness(30), imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5, 1.5)),
#imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus # imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus
#[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], #[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
#(40,40), 0.2, 3), #(40,40), 0.2, 3),
] ]
...@@ -144,7 +148,7 @@ def get_config(): ...@@ -144,7 +148,7 @@ def get_config():
data_train = PrefetchDataZMQ(data_train, 5) data_train = PrefetchDataZMQ(data_train, 5)
step_per_epoch = data_train.size() step_per_epoch = data_train.size()
augmentors = [ imgaug.Resize((40, 40)) ] augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors) data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True) data_test = BatchData(data_test, 128, remainder=True)
......
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import glob, pickle import glob
import os, sys import pickle
import os
import sys
import argparse import argparse
import cv2 import cv2
...@@ -32,15 +34,17 @@ CFG.SHAPE = 64 ...@@ -32,15 +34,17 @@ CFG.SHAPE = 64
CFG.BATCH = 128 CFG.BATCH = 128
CFG.Z_DIM = 100 CFG.Z_DIM = 100
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input') ] return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input')]
def generator(self, z): def generator(self, z):
""" return a image generated from z""" """ return a image generated from z"""
nf = 64 nf = 64
l = FullyConnected('fc0', z, nf * 8 * 4 * 4, nl=tf.identity) l = FullyConnected('fc0', z, nf * 8 * 4 * 4, nl=tf.identity)
l = tf.reshape(l, [-1, 4, 4, nf*8]) l = tf.reshape(l, [-1, 4, 4, nf * 8])
l = BNReLU(l) l = BNReLU(l)
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2): with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
l = Deconv2D('deconv1', l, [8, 8, nf * 4]) l = Deconv2D('deconv1', l, [8, 8, nf * 4])
...@@ -57,11 +61,11 @@ class Model(ModelDesc): ...@@ -57,11 +61,11 @@ class Model(ModelDesc):
argscope(LeakyReLU, alpha=0.2): argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU) .Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv1', nf*2) .Conv2D('conv1', nf * 2)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', nf*4) .Conv2D('conv2', nf * 4)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', nf*8) .Conv2D('conv3', nf * 8)
.BatchNorm('bn3').LeakyReLU() .BatchNorm('bn3').LeakyReLU()
.FullyConnected('fct', 1, nl=tf.identity)()) .FullyConnected('fct', 1, nl=tf.identity)())
return l return l
...@@ -88,16 +92,18 @@ class Model(ModelDesc): ...@@ -88,16 +92,18 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')] self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')] self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data(): def get_data():
datadir = CFG.data datadir = CFG.data
imgs = glob.glob(datadir + '/*.jpg') imgs = glob.glob(datadir + '/*.jpg')
ds = ImageFromFile(imgs, channel=3, shuffle=True) ds = ImageFromFile(imgs, channel=3, shuffle=True)
augs = [ imgaug.CenterCrop(140), imgaug.Resize(64) ] augs = [imgaug.CenterCrop(140), imgaug.Resize(64)]
ds = AugmentImageComponent(ds, augs) ds = AugmentImageComponent(ds, augs)
ds = BatchData(ds, CFG.BATCH) ds = BatchData(ds, CFG.BATCH)
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 1)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
...@@ -114,6 +120,7 @@ def get_config(): ...@@ -114,6 +120,7 @@ def get_config():
max_epoch=200, max_epoch=200,
) )
def sample(model_path): def sample(model_path):
pred = PredictConfig( pred = PredictConfig(
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
...@@ -124,7 +131,7 @@ def sample(model_path): ...@@ -124,7 +131,7 @@ def sample(model_path):
for o in pred.get_result(): for o in pred.get_result():
o, zs = o[0] + 1, o[1] o, zs = o[0] + 1, o[1]
o = o * 128.0 o = o * 128.0
o = o[:,:,:,::-1] o = o[:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=10, nr_col=10, viz=True)) viz = next(build_patch_list(o, nr_row=10, nr_col=10, viz=True))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -11,7 +11,9 @@ from tensorpack import (FeedfreeTrainer, TowerContext, ...@@ -11,7 +11,9 @@ from tensorpack import (FeedfreeTrainer, TowerContext,
from tensorpack.tfutils.summary import summary_moving_average, add_moving_summary from tensorpack.tfutils.summary import summary_moving_average, add_moving_summary
from tensorpack.dataflow import DataFlow from tensorpack.dataflow import DataFlow
class GANTrainer(FeedfreeTrainer): class GANTrainer(FeedfreeTrainer):
def __init__(self, config): def __init__(self, config):
self._input_method = QueueInput(config.dataset) self._input_method = QueueInput(config.dataset)
super(GANTrainer, self).__init__(config) super(GANTrainer, self).__init__(config)
...@@ -33,14 +35,18 @@ class GANTrainer(FeedfreeTrainer): ...@@ -33,14 +35,18 @@ class GANTrainer(FeedfreeTrainer):
def run_step(self): def run_step(self):
self.sess.run(self.train_op) self.sess.run(self.train_op)
class RandomZData(DataFlow): class RandomZData(DataFlow):
def __init__(self, shape): def __init__(self, shape):
super(RandomZData, self).__init__() super(RandomZData, self).__init__()
self.shape = shape self.shape = shape
def get_data(self): def get_data(self):
while True: while True:
yield [np.random.uniform(-1, 1, size=self.shape)] yield [np.random.uniform(-1, 1, size=self.shape)]
def build_GAN_losses(vecpos, vecneg): def build_GAN_losses(vecpos, vecneg):
""" """
:param vecpos, vecneg: output of the discriminator (logits) for real :param vecpos, vecneg: output of the discriminator (logits) for real
......
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import glob, pickle import glob
import os, sys import pickle
import os
import sys
import argparse import argparse
import cv2 import cv2
...@@ -38,10 +40,12 @@ OUT_CH = 3 ...@@ -38,10 +40,12 @@ OUT_CH = 3
LAMBDA = 100 LAMBDA = 100
NF = 64 # number of filter NF = 64 # number of filter
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input') , return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input'),
InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output') ] InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output')]
def generator(self, imgs): def generator(self, imgs):
# imgs: input: 256x256xch # imgs: input: 256x256xch
...@@ -52,31 +56,31 @@ class Model(ModelDesc): ...@@ -52,31 +56,31 @@ class Model(ModelDesc):
with argscope(Conv2D, kernel_shape=4, stride=2, with argscope(Conv2D, kernel_shape=4, stride=2,
nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)): nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)):
e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU) e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU)
e2 = Conv2D('conv2', e1, NF*2) e2 = Conv2D('conv2', e1, NF * 2)
e3 = Conv2D('conv3', e2, NF*4) e3 = Conv2D('conv3', e2, NF * 4)
e4 = Conv2D('conv4', e3, NF*8) e4 = Conv2D('conv4', e3, NF * 8)
e5 = Conv2D('conv5', e4, NF*8) e5 = Conv2D('conv5', e4, NF * 8)
e6 = Conv2D('conv6', e5, NF*8) e6 = Conv2D('conv6', e5, NF * 8)
e7 = Conv2D('conv7', e6, NF*8) e7 = Conv2D('conv7', e6, NF * 8)
e8 = Conv2D('conv8', e7, NF*8, nl=BNReLU) # 1x1 e8 = Conv2D('conv8', e7, NF * 8, nl=BNReLU) # 1x1
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2): with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
return (LinearWrap(e8) return (LinearWrap(e8)
.Deconv2D('deconv1', NF*8) .Deconv2D('deconv1', NF * 8)
.Dropout() .Dropout()
.ConcatWith(3, e7) .ConcatWith(3, e7)
.Deconv2D('deconv2', NF*8) .Deconv2D('deconv2', NF * 8)
.Dropout() .Dropout()
.ConcatWith(3, e6) .ConcatWith(3, e6)
.Deconv2D('deconv3', NF*8) .Deconv2D('deconv3', NF * 8)
.Dropout() .Dropout()
.ConcatWith(3, e5) .ConcatWith(3, e5)
.Deconv2D('deconv4', NF*8) .Deconv2D('deconv4', NF * 8)
.ConcatWith(3, e4) .ConcatWith(3, e4)
.Deconv2D('deconv5', NF*4) .Deconv2D('deconv5', NF * 4)
.ConcatWith(3, e3) .ConcatWith(3, e3)
.Deconv2D('deconv6', NF*2) .Deconv2D('deconv6', NF * 2)
.ConcatWith(3, e2) .ConcatWith(3, e2)
.Deconv2D('deconv7', NF*1) .Deconv2D('deconv7', NF * 1)
.ConcatWith(3, e1) .ConcatWith(3, e1)
.Deconv2D('deconv8', OUT_CH, nl=tf.tanh)()) .Deconv2D('deconv8', OUT_CH, nl=tf.tanh)())
...@@ -86,11 +90,11 @@ class Model(ModelDesc): ...@@ -86,11 +90,11 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2): with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(l) l = (LinearWrap(l)
.Conv2D('conv0', NF, nl=LeakyReLU) .Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv1', NF*2) .Conv2D('conv1', NF * 2)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', NF*4) .Conv2D('conv2', NF * 4)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', NF*8, stride=1, padding='VALID') .Conv2D('conv3', NF * 8, stride=1, padding='VALID')
.BatchNorm('bn3').LeakyReLU() .BatchNorm('bn3').LeakyReLU()
.Conv2D('convlast', 1, stride=1, padding='VALID')()) .Conv2D('convlast', 1, stride=1, padding='VALID')())
return l return l
...@@ -128,33 +132,36 @@ class Model(ModelDesc): ...@@ -128,33 +132,36 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')] self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')] self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def split_input(img): def split_input(img):
""" """
img: an image with shape (s, 2s, 3) img: an image with shape (s, 2s, 3)
:return: [input, output] :return: [input, output]
""" """
s = img.shape[0] s = img.shape[0]
input, output = img[:,:s,:], img[:,s:,:] input, output = img[:, :s, :], img[:, s:, :]
if args.mode == 'BtoA': if args.mode == 'BtoA':
input, output = output, input input, output = output, input
if IN_CH == 1: if IN_CH == 1:
input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis] input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
if OUT_CH == 1: if OUT_CH == 1:
output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis] output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
return [input, output] return [input, output]
def get_data(): def get_data():
datadir = args.data datadir = args.data
# assume each image is 512x256 split to left and right # assume each image is 512x256 split to left and right
imgs = glob.glob(os.path.join(datadir, '*.jpg')) imgs = glob.glob(os.path.join(datadir, '*.jpg'))
ds = ImageFromFile(imgs, channel=3, shuffle=True) ds = ImageFromFile(imgs, channel=3, shuffle=True)
ds = MapData(ds, lambda dp: split_input(dp[0])) ds = MapData(ds, lambda dp: split_input(dp[0]))
augs = [ imgaug.Resize(286), imgaug.RandomCrop(256) ] augs = [imgaug.Resize(286), imgaug.RandomCrop(256)]
ds = AugmentImageComponents(ds, augs, (0, 1)) ds = AugmentImageComponents(ds, augs, (0, 1))
ds = BatchData(ds, BATCH) ds = BatchData(ds, BATCH)
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 1)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
...@@ -171,6 +178,7 @@ def get_config(): ...@@ -171,6 +178,7 @@ def get_config():
max_epoch=300, max_epoch=300,
) )
def sample(datadir, model_path): def sample(datadir, model_path):
pred = PredictConfig( pred = PredictConfig(
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
...@@ -184,7 +192,7 @@ def sample(datadir, model_path): ...@@ -184,7 +192,7 @@ def sample(datadir, model_path):
pred = SimpleDatasetPredictor(pred, ds) pred = SimpleDatasetPredictor(pred, ds)
for o in pred.get_result(): for o in pred.get_result():
o = o[0][:,:,:,::-1] o = o[0][:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=3, nr_col=2, viz=True)) viz = next(build_patch_list(o, nr_row=3, nr_col=2, viz=True))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import cv2 import cv2
import argparse import argparse
...@@ -16,9 +17,11 @@ from GAN import GANTrainer, build_GAN_losses ...@@ -16,9 +17,11 @@ from GAN import GANTrainer, build_GAN_losses
BATCH = 128 BATCH = 128
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 28, 28), 'input') ] return [InputVar(tf.float32, (None, 28, 28), 'input')]
def generator(self, z): def generator(self, z):
l = FullyConnected('fc0', z, 1024, nl=BNReLU) l = FullyConnected('fc0', z, 1024, nl=BNReLU)
...@@ -54,7 +57,7 @@ class Model(ModelDesc): ...@@ -54,7 +57,7 @@ class Model(ModelDesc):
prior_prob = tf.constant([0.1] * 10, name='prior_prob') prior_prob = tf.constant([0.1] * 10, name='prior_prob')
# assume first 10 is categorical # assume first 10 is categorical
ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:,0] ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:, 0]
zc = tf.one_hot(ids, 10, name='zc_train') zc = tf.one_hot(ids, 10, name='zc_train')
zc = tf.placeholder_with_default(zc, [None, 10], name='zc') zc = tf.placeholder_with_default(zc, [None, 10], name='zc')
...@@ -89,11 +92,13 @@ class Model(ModelDesc): ...@@ -89,11 +92,13 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')] self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')] self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data(): def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')]) ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH) ds = BatchData(ds, BATCH)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
...@@ -110,6 +115,7 @@ def get_config(): ...@@ -110,6 +115,7 @@ def get_config():
max_epoch=100, max_epoch=100,
) )
def sample(model_path): def sample(model_path):
pred = OfflinePredictor(PredictConfig( pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
...@@ -143,4 +149,3 @@ if __name__ == '__main__': ...@@ -143,4 +149,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
GANTrainer(config).train() GANTrainer(config).train()
...@@ -8,16 +8,19 @@ import tensorflow as tf ...@@ -8,16 +8,19 @@ import tensorflow as tf
import argparse import argparse
import numpy as np import numpy as np
from six.moves import zip from six.moves import zip
import os, sys import os
import sys
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, None, 3], 'image'), return [InputVar(tf.float32, [None, None, None, 3], 'image'),
InputVar(tf.int32, [None, None, None], 'edgemap') ] InputVar(tf.int32, [None, None, None], 'edgemap')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, edgemap = input_vars image, edgemap = input_vars
...@@ -69,10 +72,10 @@ class Model(ModelDesc): ...@@ -69,10 +72,10 @@ class Model(ModelDesc):
use_bias=False, nl=tf.identity) use_bias=False, nl=tf.identity)
costs = [] costs = []
for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]): for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
output = tf.nn.sigmoid(b, name='output{}'.format(idx+1)) output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
xentropy = class_balanced_sigmoid_cross_entropy( xentropy = class_balanced_sigmoid_cross_entropy(
b, edgemap, b, edgemap,
name='xentropy{}'.format(idx+1)) name='xentropy{}'.format(idx + 1))
costs.append(xentropy) costs.append(xentropy)
# some magic threshold # some magic threshold
...@@ -91,13 +94,15 @@ class Model(ModelDesc): ...@@ -91,13 +94,15 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost') self.cost = tf.add_n(costs, name='cost')
def get_gradient_processor(self): def get_gradient_processor(self):
return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ] return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)])]
def get_data(name): def get_data(name):
isTrain = name == 'train' isTrain = name == 'train'
ds = dataset.BSDS500(name, shuffle=True) ds = dataset.BSDS500(name, shuffle=True)
class CropMultiple16(imgaug.ImageAugmentor): class CropMultiple16(imgaug.ImageAugmentor):
def _get_augment_params(self, img): def _get_augment_params(self, img):
newh = img.shape[0] // 16 * 16 newh = img.shape[0] // 16 * 16
neww = img.shape[1] // 16 * 16 neww = img.shape[1] // 16 * 16
...@@ -110,11 +115,11 @@ def get_data(name): ...@@ -110,11 +115,11 @@ def get_data(name):
def _augment(self, img, param): def _augment(self, img, param):
h0, w0, newh, neww = param h0, w0, newh, neww = param
return img[h0:h0+newh,w0:w0+neww] return img[h0:h0 + newh, w0:w0 + neww]
if isTrain: if isTrain:
shape_aug = [ shape_aug = [
imgaug.RandomResize(xrange=(0.7,1.5), yrange=(0.7,1.5), imgaug.RandomResize(xrange=(0.7, 1.5), yrange=(0.7, 1.5),
aspect_ratio_thres=0.15), aspect_ratio_thres=0.15),
imgaug.RotationAndCropValid(90), imgaug.RotationAndCropValid(90),
CropMultiple16(), CropMultiple16(),
...@@ -128,15 +133,15 @@ def get_data(name): ...@@ -128,15 +133,15 @@ def get_data(name):
ds = AugmentImageComponents(ds, shape_aug, (0, 1)) ds = AugmentImageComponents(ds, shape_aug, (0, 1))
def f(m): def f(m):
m[m>=0.50] = 1 m[m >= 0.50] = 1
m[m<0.50] = 0 m[m < 0.50] = 0
return m return m
ds = MapDataComponent(ds, f, 1) ds = MapDataComponent(ds, f, 1)
if isTrain: if isTrain:
augmentors = [ augmentors = [
imgaug.Brightness(63, clip=False), imgaug.Brightness(63, clip=False),
imgaug.Contrast((0.4,1.5)), imgaug.Contrast((0.4, 1.5)),
] ]
ds = AugmentImageComponent(ds, augmentors) ds = AugmentImageComponent(ds, augmentors)
ds = BatchDataByShape(ds, 8, idx=0) ds = BatchDataByShape(ds, 8, idx=0)
...@@ -145,6 +150,7 @@ def get_data(name): ...@@ -145,6 +150,7 @@ def get_data(name):
ds = BatchData(ds, 1) ds = BatchData(ds, 1)
return ds return ds
def view_data(): def view_data():
ds = RepeatedData(get_data('train'), -1) ds = RepeatedData(get_data('train'), -1)
ds.reset_state() ds.reset_state()
...@@ -156,6 +162,7 @@ def view_data(): ...@@ -156,6 +162,7 @@ def view_data():
cv2.imshow("edge", edgemap) cv2.imshow("edge", edgemap)
cv2.waitKey(1000) cv2.waitKey(1000)
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset_train = get_data('train') dataset_train = get_data('train')
...@@ -178,6 +185,7 @@ def get_config(): ...@@ -178,6 +185,7 @@ def get_config():
max_epoch=100, max_epoch=100,
) )
def run(model_path, image_path, output): def run(model_path, image_path, output):
pred_config = PredictConfig( pred_config = PredictConfig(
model=Model(), model=Model(),
...@@ -193,7 +201,7 @@ def run(model_path, image_path, output): ...@@ -193,7 +201,7 @@ def run(model_path, image_path, output):
for k in range(6): for k in range(6):
pred = outputs[k][0] pred = outputs[k][0]
cv2.imwrite("out{}.png".format( cv2.imwrite("out{}.png".format(
'-fused' if k == 5 else str(k+1)), pred * 255) '-fused' if k == 5 else str(k + 1)), pred * 255)
else: else:
pred = outputs[5][0] pred = outputs[5][0]
cv2.imwrite(output, pred * 255) cv2.imwrite(output, pred * 255)
......
...@@ -27,10 +27,12 @@ This config reaches 71% single-crop validation accuracy after 150k steps with 6 ...@@ -27,10 +27,12 @@ This config reaches 71% single-crop validation accuracy after 150k steps with 6
Learning rate may need a different schedule for different number of GPUs (because batch size will be different). Learning rate may need a different schedule for different number of GPUs (because batch size will be different).
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -117,6 +119,7 @@ class Model(ModelDesc): ...@@ -117,6 +119,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False) ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False)
...@@ -128,7 +131,7 @@ def get_data(train_or_test): ...@@ -128,7 +131,7 @@ def get_data(train_or_test):
augmentors = [ augmentors = [
imgaug.Resize((256, 256)), imgaug.Resize((256, 256)),
imgaug.Brightness(30, False), imgaug.Brightness(30, False),
imgaug.Contrast((0.8,1.2), True), imgaug.Contrast((0.8, 1.2), True),
imgaug.MapImage(lambda x: x - pp_mean), imgaug.MapImage(lambda x: x - pp_mean),
imgaug.RandomCrop((224, 224)), imgaug.RandomCrop((224, 224)),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
...@@ -166,7 +169,7 @@ def get_config(): ...@@ -166,7 +169,7 @@ def get_config():
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(8, 0.03), (14, 0.02), (17, 5e-3), [(8, 0.03), (14, 0.02), (17, 5e-3),
(19, 3e-3), (24, 1e-3), (26, 2e-4), (19, 3e-3), (24, 1e-3), (26, 2e-4),
(30, 5e-5) ]) (30, 5e-5)])
]), ]),
session_config=get_default_sess_config(0.99), session_config=get_default_sess_config(0.99),
model=Model(), model=Model(),
......
...@@ -32,10 +32,12 @@ NR_GPU = 8 ...@@ -32,10 +32,12 @@ NR_GPU = 8
BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU
INPUT_SHAPE = 299 INPUT_SHAPE = 299
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -62,22 +64,22 @@ class Model(ModelDesc): ...@@ -62,22 +64,22 @@ class Model(ModelDesc):
def proj_77(l, ch_r, ch): def proj_77(l, ch_r, ch):
return (LinearWrap(l) return (LinearWrap(l)
.Conv2D('conv77r', ch_r, 1) .Conv2D('conv77r', ch_r, 1)
.Conv2D('conv77a', ch_r, [1,7]) .Conv2D('conv77a', ch_r, [1, 7])
.Conv2D('conv77b', ch, [7,1])()) .Conv2D('conv77b', ch, [7, 1])())
def proj_277(l, ch_r, ch): def proj_277(l, ch_r, ch):
return (LinearWrap(l) return (LinearWrap(l)
.Conv2D('conv277r', ch_r, 1) .Conv2D('conv277r', ch_r, 1)
.Conv2D('conv277aa', ch_r, [7,1]) .Conv2D('conv277aa', ch_r, [7, 1])
.Conv2D('conv277ab', ch_r, [1,7]) .Conv2D('conv277ab', ch_r, [1, 7])
.Conv2D('conv277ba', ch_r, [7,1]) .Conv2D('conv277ba', ch_r, [7, 1])
.Conv2D('conv277bb', ch, [1,7])()) .Conv2D('conv277bb', ch, [1, 7])())
with argscope(Conv2D, nl=BNReLU, use_bias=False),\ with argscope(Conv2D, nl=BNReLU, use_bias=False),\
argscope(BatchNorm, decay=0.9997, epsilon=1e-3): argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
l = (LinearWrap(image) l = (LinearWrap(image)
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299 .Conv2D('conv0', 32, 3, stride=2, padding='VALID') # 299
.Conv2D('conv1', 32, 3, padding='VALID') #149 .Conv2D('conv1', 32, 3, padding='VALID') # 149
.Conv2D('conv2', 64, 3, padding='SAME') # 147 .Conv2D('conv2', 64, 3, padding='SAME') # 147
.MaxPooling('pool2', 3, 2) .MaxPooling('pool2', 3, 2)
.Conv2D('conv3', 80, 1, padding='SAME') # 73 .Conv2D('conv3', 80, 1, padding='SAME') # 73
...@@ -194,6 +196,7 @@ class Model(ModelDesc): ...@@ -194,6 +196,7 @@ class Model(ModelDesc):
self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost') self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -205,15 +208,17 @@ def get_data(train_or_test): ...@@ -205,15 +208,17 @@ def get_data(train_or_test):
if isTrain: if isTrain:
class Resize(imgaug.ImageAugmentor): class Resize(imgaug.ImageAugmentor):
def __init__(self): def __init__(self):
self._init(locals()) self._init(locals())
def _augment(self, img, _): def _augment(self, img, _):
h, w = img.shape[:2] h, w = img.shape[:2]
size = 299 size = 299
scale = self.rng.randint(size, 340) * 1.0 / min(h, w) scale = self.rng.randint(size, 340) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15) scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15) scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\ desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h)))) max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC) dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return dst return dst
...@@ -224,11 +229,11 @@ def get_data(train_or_test): ...@@ -224,11 +229,11 @@ def get_data(train_or_test):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5), imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True), imgaug.Brightness(30, True),
imgaug.Gamma(), imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True), imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((299, 299)), imgaug.RandomCrop((299, 299)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8), imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform( imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(299, 299), 0.2, 3), 0.1), (299, 299), 0.2, 3), 0.1),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_299), imgaug.MapImage(lambda x: x - pp_mean_299),
...@@ -237,7 +242,7 @@ def get_data(train_or_test): ...@@ -237,7 +242,7 @@ def get_data(train_or_test):
def resize_func(im): def resize_func(im):
h, w = im.shape[:2] h, w = im.shape[:2]
scale = 340.0 / min(h, w) scale = 340.0 / min(h, w)
desSize = map(int, (max(299, min(w, scale * w)),\ desSize = map(int, (max(299, min(w, scale * w)),
max(299, min(h, scale * h)))) max(299, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im return im
......
...@@ -5,7 +5,10 @@ ...@@ -5,7 +5,10 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys, re, time import os
import sys
import re
import time
import random import random
import argparse import argparse
import six import six
...@@ -23,6 +26,7 @@ ENV_NAME = None ...@@ -23,6 +26,7 @@ ENV_NAME = None
from common import play_one_episode from common import play_one_episode
def get_player(dumpdir=None): def get_player(dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False) pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1])) pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
...@@ -33,12 +37,14 @@ def get_player(dumpdir=None): ...@@ -33,12 +37,14 @@ def get_player(dumpdir=None):
pl = HistoryFramePlayer(pl, FRAME_HISTORY) pl = HistoryFramePlayer(pl, FRAME_HISTORY)
return pl return pl
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
assert NUM_ACTIONS is not None assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'), return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int32, (None,), 'action'), InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ] InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image): def _get_NN_prediction(self, image):
image = image / 255.0 image = image / 255.0
...@@ -61,6 +67,7 @@ class Model(ModelDesc): ...@@ -61,6 +67,7 @@ class Model(ModelDesc):
policy = self._get_NN_prediction(state) policy = self._get_NN_prediction(state)
self.logits = tf.nn.softmax(policy, name='logits') self.logits = tf.nn.softmax(policy, name='logits')
def run_submission(cfg, output, nr): def run_submission(cfg, output, nr):
player = get_player(dumpdir=output) player = get_player(dumpdir=output)
predfunc = get_predict_func(cfg) predfunc = get_predict_func(cfg)
...@@ -71,6 +78,7 @@ def run_submission(cfg, output, nr): ...@@ -71,6 +78,7 @@ def run_submission(cfg, output, nr):
score = play_one_episode(player, predfunc) score = play_one_episode(player, predfunc)
print("Score:", score) print("Score:", score)
def do_submit(output): def do_submit(output):
gym.upload(output, api_key='xxx') gym.upload(output, api_key='xxx')
...@@ -87,7 +95,8 @@ if __name__ == '__main__': ...@@ -87,7 +95,8 @@ if __name__ == '__main__':
ENV_NAME = args.env ENV_NAME = args.env
assert ENV_NAME assert ENV_NAME
logger.info("Environment Name: {}".format(ENV_NAME)) logger.info("Environment Name: {}".format(ENV_NAME))
p = get_player(); del p # set NUM_ACTIONS p = get_player()
del p # set NUM_ACTIONS
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
......
...@@ -5,11 +5,15 @@ ...@@ -5,11 +5,15 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys, re, time import os
import sys
import re
import time
import random import random
import uuid import uuid
import argparse import argparse
import multiprocessing, threading import multiprocessing
import threading
from collections import deque from collections import deque
import six import six
from six.moves import queue from six.moves import queue
...@@ -42,8 +46,10 @@ EVALUATE_PROC = min(multiprocessing.cpu_count() // 2, 20) ...@@ -42,8 +46,10 @@ EVALUATE_PROC = min(multiprocessing.cpu_count() // 2, 20)
NUM_ACTIONS = None NUM_ACTIONS = None
ENV_NAME = None ENV_NAME = None
def get_player(viz=False, train=False, dumpdir=None): def get_player(viz=False, train=False, dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir) pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
def func(img): def func(img):
return cv2.resize(img, IMAGE_SIZE[::-1]) return cv2.resize(img, IMAGE_SIZE[::-1])
pl = MapPlayerState(pl, func) pl = MapPlayerState(pl, func)
...@@ -58,16 +64,20 @@ def get_player(viz=False, train=False, dumpdir=None): ...@@ -58,16 +64,20 @@ def get_player(viz=False, train=False, dumpdir=None):
return pl return pl
common.get_player = get_player common.get_player = get_player
class MySimulatorWorker(SimulatorProcess): class MySimulatorWorker(SimulatorProcess):
def _build_player(self): def _build_player(self):
return get_player(train=True) return get_player(train=True)
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
assert NUM_ACTIONS is not None assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'), return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'), InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ] InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image): def _get_NN_prediction(self, image):
image = image / 255.0 image = image / 255.0
...@@ -122,11 +132,13 @@ class Model(ModelDesc): ...@@ -122,11 +132,13 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)), return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()] SummaryGradient()]
class MySimulatorMaster(SimulatorMaster, Callback): class MySimulatorMaster(SimulatorMaster, Callback):
def __init__(self, pipe_c2s, pipe_s2c, model): def __init__(self, pipe_c2s, pipe_s2c, model):
super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c) super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
self.M = model self.M = model
self.queue = queue.Queue(maxsize=BATCH_SIZE*8*2) self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
def _setup_graph(self): def _setup_graph(self):
self.sess = self.trainer.sess self.sess = self.trainer.sess
...@@ -172,6 +184,7 @@ class MySimulatorMaster(SimulatorMaster, Callback): ...@@ -172,6 +184,7 @@ class MySimulatorMaster(SimulatorMaster, Callback):
else: else:
client.memory = [] client.memory = []
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
M = Model() M = Model()
...@@ -218,7 +231,8 @@ if __name__ == '__main__': ...@@ -218,7 +231,8 @@ if __name__ == '__main__':
ENV_NAME = args.env ENV_NAME = args.env
assert ENV_NAME assert ENV_NAME
p = get_player(); del p # set NUM_ACTIONS p = get_player()
del p # set NUM_ACTIONS
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
...@@ -239,11 +253,11 @@ if __name__ == '__main__': ...@@ -239,11 +253,11 @@ if __name__ == '__main__':
if args.gpu: if args.gpu:
nr_gpu = get_nr_gpu() nr_gpu = get_nr_gpu()
if nr_gpu > 1: if nr_gpu > 1:
predict_tower = range(nr_gpu)[-nr_gpu//2:] predict_tower = range(nr_gpu)[-nr_gpu // 2:]
else: else:
predict_tower = [0] predict_tower = [0]
PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
train_tower = range(nr_gpu)[:-nr_gpu//2] or [0] train_tower = range(nr_gpu)[:-nr_gpu // 2] or [0]
logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format( logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format(
','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
trainer = AsyncMultiGPUTrainer trainer = AsyncMultiGPUTrainer
......
...@@ -30,14 +30,16 @@ This model uses the whole training set instead of a train-val split. ...@@ -30,14 +30,16 @@ This model uses the whole training set instead of a train-val split.
BATCH_SIZE = 128 BATCH_SIZE = 128
NUM_UNITS = None NUM_UNITS = None
class Model(ModelDesc): class Model(ModelDesc):
def __init__(self, n): def __init__(self, n):
super(Model, self).__init__() super(Model, self).__init__()
self.n = n self.n = n
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 32, 32, 3], 'input'), return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -60,7 +62,7 @@ class Model(ModelDesc): ...@@ -60,7 +62,7 @@ class Model(ModelDesc):
c2 = Conv2D('conv2', c1, out_channel) c2 = Conv2D('conv2', c1, out_channel)
if increase_dim: if increase_dim:
l = AvgPooling('pool', l, 2) l = AvgPooling('pool', l, 2)
l = tf.pad(l, [[0,0], [0,0], [0,0], [in_channel//2, in_channel//2]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 0], [in_channel // 2, in_channel // 2]])
l = c2 + l l = c2 + l
return l return l
...@@ -104,6 +106,7 @@ class Model(ModelDesc): ...@@ -104,6 +106,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test) ds = dataset.Cifar10(train_or_test)
...@@ -125,6 +128,7 @@ def get_data(train_or_test): ...@@ -125,6 +128,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 3, 2) ds = PrefetchData(ds, 3, 2)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
......
...@@ -26,10 +26,12 @@ TOTAL_BATCH_SIZE = 256 ...@@ -26,10 +26,12 @@ TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224 INPUT_SHAPE = 224
DEPTH = None DEPTH = None
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -80,10 +82,10 @@ class Model(ModelDesc): ...@@ -80,10 +82,10 @@ class Model(ModelDesc):
return l return l
cfg = { cfg = {
18: ([2,2,2,2], basicblock), 18: ([2, 2, 2, 2], basicblock),
34: ([3,4,6,3], basicblock), 34: ([3, 4, 6, 3], basicblock),
50: ([3,4,6,3], bottleneck), 50: ([3, 4, 6, 3], bottleneck),
101: ([3,4,23,3], bottleneck) 101: ([3, 4, 23, 3], bottleneck)
} }
defs, block_func = cfg[DEPTH] defs, block_func = cfg[DEPTH]
...@@ -113,6 +115,7 @@ class Model(ModelDesc): ...@@ -113,6 +115,7 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost) add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost') self.cost = tf.add_n([loss, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -128,12 +131,13 @@ def get_data(train_or_test): ...@@ -128,12 +131,13 @@ def get_data(train_or_test):
crop 8%~100% of the original image crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google. See `Going Deeper with Convolutions` by Google.
""" """
def _augment(self, img, _): def _augment(self, img, _):
h, w = img.shape[:2] h, w = img.shape[:2]
area = h * w area = h * w
for _ in range(10): for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75,1.333) aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR)) ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR)) hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5: if self.rng.uniform() < 0.5:
...@@ -141,10 +145,10 @@ def get_data(train_or_test): ...@@ -141,10 +145,10 @@ def get_data(train_or_test):
if hh <= h and ww <= w: if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww) x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh) y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1+hh,x1:x1+ww] out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224,224), interpolation=cv2.INTER_CUBIC) out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out return out
out = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC) out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out return out
augmentors = [ augmentors = [
...@@ -155,9 +159,9 @@ def get_data(train_or_test): ...@@ -155,9 +159,9 @@ def get_data(train_or_test):
imgaug.Saturation(0.4), imgaug.Saturation(0.4),
imgaug.Lighting(0.1, imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045], eigval=[0.2175, 0.0188, 0.0045],
eigvec=[[ -0.5675, 0.7192, 0.4009], eigvec=[[-0.5675, 0.7192, 0.4009],
[ -0.5808, -0.0045, -0.8140], [-0.5808, -0.0045, -0.8140],
[ -0.5836, -0.6948, 0.4203]] [-0.5836, -0.6948, 0.4203]]
)]), )]),
imgaug.Clip(), imgaug.Clip(),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
...@@ -175,6 +179,7 @@ def get_data(train_or_test): ...@@ -175,6 +179,7 @@ def get_data(train_or_test):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count())) ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds return ds
def get_config(): def get_config():
# prepare dataset # prepare dataset
dataset_train = get_data('train') dataset_train = get_data('train')
...@@ -198,6 +203,7 @@ def get_config(): ...@@ -198,6 +203,7 @@ def get_config():
max_epoch=110, max_epoch=110,
) )
def eval_on_ILSVRC12(model_file, data_dir): def eval_on_ILSVRC12(model_file, data_dir):
ds = get_data('val') ds = get_data('val')
pred_config = PredictConfig( pred_config = PredictConfig(
......
...@@ -7,7 +7,8 @@ ...@@ -7,7 +7,8 @@
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
import argparse import argparse
import os, re import os
import re
import numpy as np import numpy as np
import six import six
from six.moves import zip from six.moves import zip
...@@ -22,7 +23,9 @@ from tensorpack.dataflow.dataset import ILSVRCMeta ...@@ -22,7 +23,9 @@ from tensorpack.dataflow.dataset import ILSVRCMeta
MODEL_DEPTH = None MODEL_DEPTH = None
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'), return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label')] InputVar(tf.int32, [None], 'label')]
...@@ -64,9 +67,9 @@ class Model(ModelDesc): ...@@ -64,9 +67,9 @@ class Model(ModelDesc):
return l return l
cfg = { cfg = {
50: ([3,4,6,3]), 50: ([3, 4, 6, 3]),
101: ([3,4,23,3]), 101: ([3, 4, 23, 3]),
152: ([3,8,36,3]) 152: ([3, 8, 36, 3])
} }
defs = cfg[MODEL_DEPTH] defs = cfg[MODEL_DEPTH]
...@@ -74,7 +77,7 @@ class Model(ModelDesc): ...@@ -74,7 +77,7 @@ class Model(ModelDesc):
W_init=variance_scaling_initializer(mode='FAN_OUT')): W_init=variance_scaling_initializer(mode='FAN_OUT')):
# tensorflow with padding=SAME will by default pad [2,3] here. # tensorflow with padding=SAME will by default pad [2,3] here.
# but caffe conv with stride will pad [3,3] # but caffe conv with stride will pad [3,3]
image = tf.pad(image, [[0,0],[3,3],[3,3],[0,0]]) image = tf.pad(image, [[0, 0], [3, 3], [3, 3], [0, 0]])
fc1000 = (LinearWrap(image) fc1000 = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID') .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
...@@ -89,16 +92,17 @@ class Model(ModelDesc): ...@@ -89,16 +92,17 @@ class Model(ModelDesc):
nr_wrong = prediction_incorrect(fc1000, label, name='wrong-top1') nr_wrong = prediction_incorrect(fc1000, label, name='wrong-top1')
nr_wrong = prediction_incorrect(fc1000, label, 5, name='wrong-top5') nr_wrong = prediction_incorrect(fc1000, label, 5, name='wrong-top5')
def get_inference_augmentor(): def get_inference_augmentor():
# load ResNet mean from Kaiming: # load ResNet mean from Kaiming:
#from tensorpack.utils.loadcaffe import get_caffe_pb #from tensorpack.utils.loadcaffe import get_caffe_pb
#obj = get_caffe_pb().BlobProto() #obj = get_caffe_pb().BlobProto()
#obj.ParseFromString(open('ResNet_mean.binaryproto').read()) # obj.ParseFromString(open('ResNet_mean.binaryproto').read())
#pp_mean_224 = np.array(obj.data).reshape(3, 224, 224).transpose(1,2,0) #pp_mean_224 = np.array(obj.data).reshape(3, 224, 224).transpose(1,2,0)
meta = ILSVRCMeta() meta = ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:] pp_mean_224 = pp_mean[16:-16, 16:-16, :]
transformers = imgaug.AugmentorList([ transformers = imgaug.AugmentorList([
imgaug.ResizeShortestEdge(256), imgaug.ResizeShortestEdge(256),
...@@ -107,6 +111,7 @@ def get_inference_augmentor(): ...@@ -107,6 +111,7 @@ def get_inference_augmentor():
]) ])
return transformers return transformers
def run_test(params, input): def run_test(params, input):
pred_config = PredictConfig( pred_config = PredictConfig(
model=Model(), model=Model(),
...@@ -119,7 +124,7 @@ def run_test(params, input): ...@@ -119,7 +124,7 @@ def run_test(params, input):
prepro = get_inference_augmentor() prepro = get_inference_augmentor()
im = cv2.imread(input).astype('float32') im = cv2.imread(input).astype('float32')
im = prepro.augment(im) im = prepro.augment(im)
im = np.reshape( im, (1, 224, 224, 3)) im = np.reshape(im, (1, 224, 224, 3))
outputs = predict_func([im]) outputs = predict_func([im])
prob = outputs[0] prob = outputs[0]
...@@ -128,6 +133,7 @@ def run_test(params, input): ...@@ -128,6 +133,7 @@ def run_test(params, input):
meta = ILSVRCMeta().get_synset_words_1000() meta = ILSVRCMeta().get_synset_words_1000()
print([meta[k] for k in ret]) print([meta[k] for k in ret])
def eval_on_ILSVRC12(params, data_dir): def eval_on_ILSVRC12(params, data_dir):
ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False, dir_structure='train') ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False, dir_structure='train')
ds = AugmentImageComponent(ds, get_inference_augmentor()) ds = AugmentImageComponent(ds, get_inference_augmentor())
...@@ -147,6 +153,7 @@ def eval_on_ILSVRC12(params, data_dir): ...@@ -147,6 +153,7 @@ def eval_on_ILSVRC12(params, data_dir):
print("Top1 Error: {}".format(acc1.ratio)) print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio)) print("Top5 Error: {}".format(acc5.ratio))
def name_conversion(caffe_layer_name): def name_conversion(caffe_layer_name):
""" Convert a caffe parameter name to a tensorflow parameter name as """ Convert a caffe parameter name to a tensorflow parameter name as
defined in the above model """ defined in the above model """
...@@ -178,7 +185,7 @@ def name_conversion(caffe_layer_name): ...@@ -178,7 +185,7 @@ def name_conversion(caffe_layer_name):
layer_id = re.search('_branch[0-9]([a-z])/', caffe_layer_name).group(1) layer_id = re.search('_branch[0-9]([a-z])/', caffe_layer_name).group(1)
layer_id = ord(layer_id) - ord('a') + 1 layer_id = ord(layer_id) - ord('a') + 1
TYPE_DICT = {'res':'conv', 'bn':'bn'} TYPE_DICT = {'res': 'conv', 'bn': 'bn'}
tf_name = caffe_layer_name[caffe_layer_name.index('/'):] tf_name = caffe_layer_name[caffe_layer_name.index('/'):]
layer_type = TYPE_DICT[layer_type] + \ layer_type = TYPE_DICT[layer_type] + \
......
...@@ -25,6 +25,7 @@ Model = cifar_example.Model ...@@ -25,6 +25,7 @@ Model = cifar_example.Model
BATCH_SIZE = 128 BATCH_SIZE = 128
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
pp_mean = dataset.SVHNDigit.get_per_pixel_mean() pp_mean = dataset.SVHNDigit.get_per_pixel_mean()
...@@ -39,9 +40,9 @@ def get_data(train_or_test): ...@@ -39,9 +40,9 @@ def get_data(train_or_test):
augmentors = [ augmentors = [
imgaug.CenterPaste((40, 40)), imgaug.CenterPaste((40, 40)),
imgaug.Brightness(10), imgaug.Brightness(10),
imgaug.Contrast((0.8,1.2)), imgaug.Contrast((0.8, 1.2)),
imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3), (40, 40), 0.2, 3),
imgaug.RandomCrop((32, 32)), imgaug.RandomCrop((32, 32)),
imgaug.MapImage(lambda x: x - pp_mean), imgaug.MapImage(lambda x: x - pp_mean),
...@@ -56,6 +57,7 @@ def get_data(train_or_test): ...@@ -56,6 +57,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 5, 5) ds = PrefetchData(ds, 5, 5)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -72,7 +74,7 @@ def get_config(): ...@@ -72,7 +74,7 @@ def get_config():
StatPrinter(), StatPrinter(),
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]), [ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)]) [(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)])
]), ]),
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import argparse import argparse
from tensorpack import * from tensorpack import *
...@@ -15,13 +16,15 @@ IMAGE_SIZE = 42 ...@@ -15,13 +16,15 @@ IMAGE_SIZE = 42
WARP_TARGET_SIZE = 28 WARP_TARGET_SIZE = 28
HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2 HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'), return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'),
InputVar(tf.int32, (None,), 'label') ] InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
xys = np.array([(y,x,1) for y in range(WARP_TARGET_SIZE) xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
for x in range(WARP_TARGET_SIZE)], dtype='float32') for x in range(WARP_TARGET_SIZE)], dtype='float32')
xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3 xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3
...@@ -55,11 +58,11 @@ class Model(ModelDesc): ...@@ -55,11 +58,11 @@ class Model(ModelDesc):
sampled2 = get_stn(image) sampled2 = get_stn(image)
# For visualization in tensorboard # For visualization in tensorboard
padded1 = tf.pad(sampled1, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]]) padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
padded2 = tf.pad(sampled2, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]]) padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
img_orig = tf.concat(1, [image[:,:,:,0], image[:,:,:,1]]) #b x 2h x w img_orig = tf.concat(1, [image[:, :, :, 0], image[:, :, :, 1]]) # b x 2h x w
transform1 = tf.concat(1, [padded1[:,:,:,0], padded1[:,:,:,1]]) transform1 = tf.concat(1, [padded1[:, :, :, 0], padded1[:, :, :, 1]])
transform2 = tf.concat(1, [padded2[:,:,:,0], padded2[:,:,:,1]]) transform2 = tf.concat(1, [padded2[:, :, :, 0], padded2[:, :, :, 1]])
stacked = tf.concat(2, [img_orig, transform1, transform2], 'viz') stacked = tf.concat(2, [img_orig, transform1, transform2], 'viz')
tf.summary.image('visualize', tf.summary.image('visualize',
tf.expand_dims(stacked, -1), max_images=30) tf.expand_dims(stacked, -1), max_images=30)
...@@ -87,6 +90,7 @@ class Model(ModelDesc): ...@@ -87,6 +90,7 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]), return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
ScaleGradient([('STN.*', 0.1)]), SummaryGradient()] ScaleGradient([('STN.*', 0.1)]), SummaryGradient()]
def get_data(isTrain): def get_data(isTrain):
ds = dataset.Mnist('train' if isTrain else 'test') ds = dataset.Mnist('train' if isTrain else 'test')
# create augmentation for both training and testing # create augmentation for both training and testing
...@@ -105,6 +109,7 @@ def get_data(isTrain): ...@@ -105,6 +109,7 @@ def get_data(isTrain):
ds = BatchData(ds, 128) ds = BatchData(ds, 128)
return ds return ds
def view_warp(modelpath): def view_warp(modelpath):
pred = OfflinePredictor(PredictConfig( pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(modelpath), session_init=get_model_loader(modelpath),
...@@ -117,8 +122,8 @@ def view_warp(modelpath): ...@@ -117,8 +122,8 @@ def view_warp(modelpath):
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1], [WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1],
[0, WARP_TARGET_SIZE, 1]], dtype='float32') [0, WARP_TARGET_SIZE, 1]], dtype='float32')
def draw_rect(img, affine, c, offset=[0,0]): def draw_rect(img, affine, c, offset=[0, 0]):
a = np.transpose(affine) #3x2 a = np.transpose(affine) # 3x2
a = (np.matmul(xys, a) + offset).astype('int32') a = (np.matmul(xys, a) + offset).astype('int32')
cv2.line(img, tuple(a[0][::-1]), tuple(a[1][::-1]), c) cv2.line(img, tuple(a[0][::-1]), tuple(a[1][::-1]), c)
cv2.line(img, tuple(a[1][::-1]), tuple(a[2][::-1]), c) cv2.line(img, tuple(a[1][::-1]), tuple(a[2][::-1]), c)
...@@ -133,11 +138,12 @@ def view_warp(modelpath): ...@@ -133,11 +138,12 @@ def view_warp(modelpath):
for idx, viz in enumerate(outputs): for idx, viz in enumerate(outputs):
viz = cv2.cvtColor(viz, cv2.COLOR_GRAY2BGR) viz = cv2.cvtColor(viz, cv2.COLOR_GRAY2BGR)
# Here we assume the second branch focuses on the first digit # Here we assume the second branch focuses on the first digit
draw_rect(viz, affine2[idx], (0,0,255)) draw_rect(viz, affine2[idx], (0, 0, 255))
draw_rect(viz, affine1[idx], (0,0,255), offset=[IMAGE_SIZE, 0]) draw_rect(viz, affine1[idx], (0, 0, 255), offset=[IMAGE_SIZE, 0])
cv2.imwrite('{:03d}.png'.format(idx), (viz + 0.5) * 255) cv2.imwrite('{:03d}.png'.format(idx), (viz + 0.5) * 255)
break break
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -152,7 +158,7 @@ def get_config(): ...@@ -152,7 +158,7 @@ def get_config():
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]), [ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)]) ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
]), ]),
session_config=get_default_sess_config(0.5), session_config=get_default_sess_config(0.5),
...@@ -176,4 +182,3 @@ if __name__ == '__main__': ...@@ -176,4 +182,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train() SimpleTrainer(config).train()
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: create-lmdb.py # File: create-lmdb.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import sys, os import sys
import os
import scipy.io.wavfile as wavfile import scipy.io.wavfile as wavfile
import string import string
import numpy as np import numpy as np
...@@ -14,10 +15,12 @@ from tensorpack.utils.stats import OnlineMoments ...@@ -14,10 +15,12 @@ from tensorpack.utils.stats import OnlineMoments
import bob.ap import bob.ap
CHARSET = set(string.ascii_lowercase + ' ') CHARSET = set(string.ascii_lowercase + ' ')
PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(',') PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(
',')
PHONEME_DIC = {v: k for k, v in enumerate(PHONEME_LIST)} PHONEME_DIC = {v: k for k, v in enumerate(PHONEME_LIST)}
WORD_DIC = {v: k for k, v in enumerate(string.ascii_lowercase + ' ')} WORD_DIC = {v: k for k, v in enumerate(string.ascii_lowercase + ' ')}
def read_timit_txt(f): def read_timit_txt(f):
f = open(f) f = open(f)
line = f.readlines()[0].strip().split(' ') line = f.readlines()[0].strip().split(' ')
...@@ -30,6 +33,7 @@ def read_timit_txt(f): ...@@ -30,6 +33,7 @@ def read_timit_txt(f):
ret.append(WORD_DIC[c]) ret.append(WORD_DIC[c])
return np.asarray(ret) return np.asarray(ret)
def read_timit_phoneme(f): def read_timit_phoneme(f):
f = open(f) f = open(f)
pho = [] pho = []
...@@ -39,6 +43,7 @@ def read_timit_phoneme(f): ...@@ -39,6 +43,7 @@ def read_timit_phoneme(f):
f.close() f.close()
return np.asarray(pho) return np.asarray(pho)
@memoized @memoized
def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5, def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5,
n_filters=55, n_ceps=15, f_min=0., f_max=6000, n_filters=55, n_ceps=15, f_min=0., f_max=6000,
...@@ -48,6 +53,7 @@ def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5, ...@@ -48,6 +53,7 @@ def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5,
f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm) f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm)
return ret return ret
def diff_feature(feat, nd=1): def diff_feature(feat, nd=1):
diff = feat[1:] - feat[:-1] diff = feat[1:] - feat[:-1]
feat = feat[1:] feat = feat[1:]
...@@ -57,6 +63,7 @@ def diff_feature(feat, nd=1): ...@@ -57,6 +63,7 @@ def diff_feature(feat, nd=1):
d2 = diff[1:] - diff[:-1] d2 = diff[1:] - diff[:-1]
return np.concatenate((feat[1:], diff[1:], d2), axis=1) return np.concatenate((feat[1:], diff[1:], d2), axis=1)
def get_feature(f): def get_feature(f):
fs, signal = wavfile.read(f) fs, signal = wavfile.read(f)
signal = signal.astype('float64') signal = signal.astype('float64')
...@@ -64,7 +71,9 @@ def get_feature(f): ...@@ -64,7 +71,9 @@ def get_feature(f):
feat = diff_feature(feat, nd=2) feat = diff_feature(feat, nd=2)
return feat return feat
class RawTIMIT(DataFlow): class RawTIMIT(DataFlow):
def __init__(self, dirname, label='phoneme'): def __init__(self, dirname, label='phoneme'):
self.dirname = dirname self.dirname = dirname
assert os.path.isdir(dirname), dirname assert os.path.isdir(dirname), dirname
...@@ -87,12 +96,13 @@ class RawTIMIT(DataFlow): ...@@ -87,12 +96,13 @@ class RawTIMIT(DataFlow):
label = read_timit_txt(f[:-4] + '.TXT') label = read_timit_txt(f[:-4] + '.TXT')
yield [feat, label] yield [feat, label]
def compute_mean_std(db, fname): def compute_mean_std(db, fname):
ds = LMDBDataPoint(db, shuffle=False) ds = LMDBDataPoint(db, shuffle=False)
o = OnlineMoments() o = OnlineMoments()
with get_tqdm(total=ds.size()) as bar: with get_tqdm(total=ds.size()) as bar:
for dp in ds.get_data(): for dp in ds.get_data():
feat = dp[0] #len x dim feat = dp[0] # len x dim
for f in feat: for f in feat:
o.feed(f) o.feed(f)
bar.update() bar.update()
...@@ -119,4 +129,3 @@ if __name__ == '__main__': ...@@ -119,4 +129,3 @@ if __name__ == '__main__':
dftools.dump_dataflow_to_lmdb(ds, args.db) dftools.dump_dataflow_to_lmdb(ds, args.db)
elif args.command == 'stat': elif args.command == 'stat':
compute_mean_std(args.db, args.output) compute_mean_std(args.db, args.output)
...@@ -9,15 +9,17 @@ from six.moves import range ...@@ -9,15 +9,17 @@ from six.moves import range
__all__ = ['TIMITBatch'] __all__ = ['TIMITBatch']
def batch_feature(feats): def batch_feature(feats):
# pad to the longest in the batch # pad to the longest in the batch
maxlen = max([k.shape[0] for k in feats]) maxlen = max([k.shape[0] for k in feats])
bsize = len(feats) bsize = len(feats)
ret = np.zeros((bsize, maxlen, feats[0].shape[1])) ret = np.zeros((bsize, maxlen, feats[0].shape[1]))
for idx, feat in enumerate(feats): for idx, feat in enumerate(feats):
ret[idx,:feat.shape[0],:] = feat ret[idx, :feat.shape[0], :] = feat
return ret return ret
def sparse_label(labels): def sparse_label(labels):
maxlen = max([k.shape[0] for k in labels]) maxlen = max([k.shape[0] for k in labels])
shape = [len(labels), maxlen] # bxt shape = [len(labels), maxlen] # bxt
...@@ -31,7 +33,9 @@ def sparse_label(labels): ...@@ -31,7 +33,9 @@ def sparse_label(labels):
values = np.asarray(values) values = np.asarray(values)
return (indices, values, shape) return (indices, values, shape)
class TIMITBatch(ProxyDataFlow): class TIMITBatch(ProxyDataFlow):
def __init__(self, ds, batch): def __init__(self, ds, batch):
self.batch = batch self.batch = batch
self.ds = ds self.ds = ds
...@@ -52,4 +56,3 @@ class TIMITBatch(ProxyDataFlow): ...@@ -52,4 +56,3 @@ class TIMITBatch(ProxyDataFlow):
batchlab = sparse_label(labs) batchlab = sparse_label(labs)
seqlen = np.asarray([k.shape[0] for k in feats]) seqlen = np.asarray([k.shape[0] for k in feats])
yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen] yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen]
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, sys import os
import sys
import argparse import argparse
from collections import Counter from collections import Counter
import operator import operator
...@@ -24,10 +25,12 @@ HIDDEN = 128 ...@@ -24,10 +25,12 @@ HIDDEN = 128
NR_CLASS = 61 + 1 NR_CLASS = 61 + 1
FEATUREDIM = 39 FEATUREDIM = 39
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, FEATUREDIM], 'feat'), # bxmaxseqx39 return [InputVar(tf.float32, [None, None, FEATUREDIM], 'feat'), # bxmaxseqx39
InputVar(tf.int64, None, 'labelidx'), #label is b x maxlen, sparse InputVar(tf.int64, None, 'labelidx'), # label is b x maxlen, sparse
InputVar(tf.int32, None, 'labelvalue'), InputVar(tf.int32, None, 'labelvalue'),
InputVar(tf.int64, None, 'labelshape'), InputVar(tf.int64, None, 'labelshape'),
InputVar(tf.int32, [None], 'seqlen'), # b InputVar(tf.int32, [None], 'seqlen'), # b
...@@ -56,7 +59,7 @@ class Model(ModelDesc): ...@@ -56,7 +59,7 @@ class Model(ModelDesc):
self.cost = tf.reduce_mean(loss, name='cost') self.cost = tf.reduce_mean(loss, name='cost')
logits = tf.transpose(logits, [1,0,2]) logits = tf.transpose(logits, [1, 0, 2])
isTrain = get_current_tower_context().is_training isTrain = get_current_tower_context().is_training
if isTrain: if isTrain:
...@@ -72,7 +75,8 @@ class Model(ModelDesc): ...@@ -72,7 +75,8 @@ class Model(ModelDesc):
summary.add_moving_summary(err) summary.add_moving_summary(err)
def get_gradient_processor(self): def get_gradient_processor(self):
return [GlobalNormClip(5), SummaryGradient() ] return [GlobalNormClip(5), SummaryGradient()]
def get_data(path, isTrain, stat_file): def get_data(path, isTrain, stat_file):
ds = LMDBDataPoint(path, shuffle=isTrain) ds = LMDBDataPoint(path, shuffle=isTrain)
...@@ -83,6 +87,7 @@ def get_data(path, isTrain, stat_file): ...@@ -83,6 +87,7 @@ def get_data(path, isTrain, stat_file):
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 1)
return ds return ds
def get_config(ds_train, ds_test): def get_config(ds_train, ds_test):
step_per_epoch = ds_train.size() step_per_epoch = ds_train.size()
...@@ -124,4 +129,3 @@ if __name__ == '__main__': ...@@ -124,4 +129,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, sys import os
import sys
import argparse import argparse
from collections import Counter from collections import Counter
import operator import operator
...@@ -27,7 +28,9 @@ param.vocab_size = None ...@@ -27,7 +28,9 @@ param.vocab_size = None
param.softmax_temprature = 1 param.softmax_temprature = 1
param.corpus = 'input.txt' param.corpus = 'input.txt'
class CharRNNData(RNGDataFlow): class CharRNNData(RNGDataFlow):
def __init__(self, input_file, size): def __init__(self, input_file, size):
self.seq_length = param.seq_len self.seq_length = param.seq_len
self._size = size self._size = size
...@@ -58,9 +61,10 @@ class CharRNNData(RNGDataFlow): ...@@ -58,9 +61,10 @@ class CharRNNData(RNGDataFlow):
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.int32, (None, param.seq_len), 'input'), return [InputVar(tf.int32, (None, param.seq_len), 'input'),
InputVar(tf.int32, (None, param.seq_len), 'nextinput') ] InputVar(tf.int32, (None, param.seq_len), 'nextinput')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
input, nextinput = input_vars input, nextinput = input_vars
...@@ -73,7 +77,7 @@ class Model(ModelDesc): ...@@ -73,7 +77,7 @@ class Model(ModelDesc):
embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
input_list = tf.unstack(input_feature, axis=1) #seqlen x (Bxrnnsize) input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize)
# seqlen is 1 in inference. don't need loop_function # seqlen is 1 in inference. don't need loop_function
outputs, last_state = tf.nn.rnn(cell, input_list, initial, scope='rnnlm') outputs, last_state = tf.nn.rnn(cell, input_list, initial, scope='rnnlm')
...@@ -92,6 +96,7 @@ class Model(ModelDesc): ...@@ -92,6 +96,7 @@ class Model(ModelDesc):
def get_gradient_processor(self): def get_gradient_processor(self):
return [GlobalNormClip(5)] return [GlobalNormClip(5)]
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -114,6 +119,8 @@ def get_config(): ...@@ -114,6 +119,8 @@ def get_config():
) )
# TODO rewrite using Predictor interface # TODO rewrite using Predictor interface
def sample(path, start, length): def sample(path, start, length):
""" """
:param path: path to the model :param path: path to the model
...@@ -130,7 +137,7 @@ def sample(path, start, length): ...@@ -130,7 +137,7 @@ def sample(path, start, length):
sess = tf.Session() sess = tf.Session()
tfutils.SaverRestore(path).init(sess) tfutils.SaverRestore(path).init(sess)
dummy_input = np.zeros((1,1), dtype='int32') dummy_input = np.zeros((1, 1), dtype='int32')
with sess.as_default(): with sess.as_default():
# feed the starting sentence # feed the starting sentence
state = model.initial.eval({input_vars[0]: dummy_input}) state = model.initial.eval({input_vars[0]: dummy_input})
...@@ -181,4 +188,3 @@ if __name__ == '__main__': ...@@ -181,4 +188,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -22,7 +22,9 @@ Cifar10: ...@@ -22,7 +22,9 @@ Cifar10:
Not a good model for Cifar100, just for demonstration. Not a good model for Cifar100, just for demonstration.
""" """
class Model(ModelDesc): class Model(ModelDesc):
def __init__(self, cifar_classnum): def __init__(self, cifar_classnum):
super(Model, self).__init__() super(Model, self).__init__()
self.cifar_classnum = cifar_classnum self.cifar_classnum = cifar_classnum
...@@ -72,6 +74,7 @@ class Model(ModelDesc): ...@@ -72,6 +74,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test, cifar_classnum): def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
if cifar_classnum == 10: if cifar_classnum == 10:
...@@ -83,10 +86,10 @@ def get_data(train_or_test, cifar_classnum): ...@@ -83,10 +86,10 @@ def get_data(train_or_test, cifar_classnum):
imgaug.RandomCrop((30, 30)), imgaug.RandomCrop((30, 30)),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.Brightness(63), imgaug.Brightness(63),
imgaug.Contrast((0.2,1.8)), imgaug.Contrast((0.2, 1.8)),
imgaug.GaussianDeform( imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(30,30), 0.2, 3), (30, 30), 0.2, 3),
imgaug.MeanVarianceNormalize(all_channel=True) imgaug.MeanVarianceNormalize(all_channel=True)
] ]
else: else:
...@@ -100,6 +103,7 @@ def get_data(train_or_test, cifar_classnum): ...@@ -100,6 +103,7 @@ def get_data(train_or_test, cifar_classnum):
ds = PrefetchData(ds, 3, 2) ds = PrefetchData(ds, 3, 2)
return ds return ds
def get_config(cifar_classnum): def get_config(cifar_classnum):
logger.auto_set_dir() logger.auto_set_dir()
...@@ -111,6 +115,7 @@ def get_config(cifar_classnum): ...@@ -111,6 +115,7 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5) sess_config = get_default_sess_config(0.5)
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True) lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
def lr_func(lr): def lr_func(lr):
if lr < 3e-5: if lr < 3e-5:
raise StopTraining() raise StopTraining()
......
...@@ -6,7 +6,9 @@ ...@@ -6,7 +6,9 @@
from __future__ import print_function from __future__ import print_function
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, cv2, argparse import os
import cv2
import argparse
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -19,9 +21,11 @@ Usage: ...@@ -19,9 +21,11 @@ Usage:
./load-alexnet.py --load alexnet.npy --input cat.png ./load-alexnet.py --load alexnet.npy --input cat.png
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 227, 227, 3), 'input') ] return [InputVar(tf.float32, (None, 227, 227, 3), 'input')]
def _build_graph(self, inputs): def _build_graph(self, inputs):
# img: 227x227x3 # img: 227x227x3
...@@ -48,6 +52,7 @@ class Model(ModelDesc): ...@@ -48,6 +52,7 @@ class Model(ModelDesc):
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item() param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig( predict_func = OfflinePredictor(PredictConfig(
...@@ -59,8 +64,8 @@ def run_test(path, input): ...@@ -59,8 +64,8 @@ def run_test(path, input):
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None, input assert im is not None, input
im = cv2.resize(im, (227, 227))[:,:,::-1].reshape( im = cv2.resize(im, (227, 227))[:, :, ::-1].reshape(
(1,227,227,3)).astype('float32') - 110 (1, 227, 227, 3)).astype('float32') - 110
outputs = predict_func([im])[0] outputs = predict_func([im])[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
......
...@@ -7,7 +7,8 @@ from __future__ import print_function ...@@ -7,7 +7,8 @@ from __future__ import print_function
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, argparse import os
import argparse
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -20,9 +21,11 @@ Usage: ...@@ -20,9 +21,11 @@ Usage:
./load-vgg16.py --load vgg16.npy --input cat.png ./load-vgg16.py --load vgg16.npy --input cat.png
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ] return [InputVar(tf.float32, (None, 224, 224, 3), 'input')]
def _build_graph(self, inputs): def _build_graph(self, inputs):
image = inputs[0] image = inputs[0]
...@@ -58,6 +61,7 @@ class Model(ModelDesc): ...@@ -58,6 +61,7 @@ class Model(ModelDesc):
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)()) .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item() param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig( predict_func = OfflinePredictor(PredictConfig(
...@@ -70,7 +74,7 @@ def run_test(path, input): ...@@ -70,7 +74,7 @@ def run_test(path, input):
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None, input assert im is not None, input
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224)).reshape((1,224,224,3)).astype('float32') im = cv2.resize(im, (224, 224)).reshape((1, 224, 224, 3)).astype('float32')
im = im - 110 im = im - 110
outputs = predict_func([im])[0] outputs = predict_func([im])[0]
prob = outputs[0] prob = outputs[0]
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import argparse import argparse
""" """
...@@ -18,12 +19,14 @@ from tensorpack import * ...@@ -18,12 +19,14 @@ from tensorpack import *
IMAGE_SIZE = 28 IMAGE_SIZE = 28
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
"""Define all the input variables (with type, shape, name) that'll be """Define all the input variables (with type, shape, name) that'll be
fed into the graph to produce a cost. """ fed into the graph to produce a cost. """
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'), return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
InputVar(tf.int32, (None,), 'label') ] InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
"""This function should build the model which takes the input variables """This function should build the model which takes the input variables
...@@ -59,7 +62,8 @@ class Model(ModelDesc): ...@@ -59,7 +62,8 @@ class Model(ModelDesc):
.FullyConnected('fc1', out_dim=10, nl=tf.identity)()) .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, label) # a vector of length B with loss of each sample
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
# compute the "incorrect vector", for the callback ClassificationError to use at validation time # compute the "incorrect vector", for the callback ClassificationError to use at validation time
...@@ -83,11 +87,13 @@ class Model(ModelDesc): ...@@ -83,11 +87,13 @@ class Model(ModelDesc):
summary.add_param_summary([('.*/W', ['histogram'])]) summary.add_param_summary([('.*/W', ['histogram'])])
self.cost = tf.add_n([wd_cost, cost], name='cost') self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_data(): def get_data():
train = BatchData(dataset.Mnist('train'), 128) train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True) test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test return train, test
def get_config(): def get_config():
# automatically setup the directory train_log/mnist-convnet for logging # automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir() logger.auto_set_dir()
...@@ -135,4 +141,3 @@ if __name__ == '__main__': ...@@ -135,4 +141,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train() SimpleTrainer(config).train()
...@@ -20,10 +20,12 @@ Each epoch iterates over the whole training set (4721 iterations). ...@@ -20,10 +20,12 @@ Each epoch iterates over the whole training set (4721 iterations).
Speed is about 43 it/s on TitanX. Speed is about 43 it/s on TitanX.
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -58,6 +60,7 @@ class Model(ModelDesc): ...@@ -58,6 +60,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'rms'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(): def get_data():
d1 = dataset.SVHNDigit('train') d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra') d2 = dataset.SVHNDigit('extra')
...@@ -67,20 +70,21 @@ def get_data(): ...@@ -67,20 +70,21 @@ def get_data():
augmentors = [ augmentors = [
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.Brightness(30), imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5, 1.5)),
imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40,40), 0.2, 3), (40, 40), 0.2, 3),
] ]
data_train = AugmentImageComponent(data_train, augmentors) data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128) data_train = BatchData(data_train, 128)
data_train = PrefetchData(data_train, 5, 5) data_train = PrefetchData(data_train, 5, 5)
augmentors = [ imgaug.Resize((40, 40)) ] augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors) data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True) data_test = BatchData(data_test, 128, remainder=True)
return data_train, data_test return data_train, data_test
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment