Commit 233b3b90 authored by Yuxin Wu's avatar Yuxin Wu

run autopep8 over examples

parent fb2a051c
...@@ -6,11 +6,15 @@ ...@@ -6,11 +6,15 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys, re, time import os
import sys
import re
import time
import random import random
import argparse import argparse
import subprocess import subprocess
import multiprocessing, threading import multiprocessing
import threading
from collections import deque from collections import deque
from tensorpack import * from tensorpack import *
...@@ -47,9 +51,10 @@ NUM_ACTIONS = None ...@@ -47,9 +51,10 @@ NUM_ACTIONS = None
ROM_FILE = None ROM_FILE = None
METHOD = None METHOD = None
def get_player(viz=False, train=False): def get_player(viz=False, train=False):
pl = AtariPlayer(ROM_FILE, frame_skip=ACTION_REPEAT, pl = AtariPlayer(ROM_FILE, frame_skip=ACTION_REPEAT,
image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train) image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train)
global NUM_ACTIONS global NUM_ACTIONS
NUM_ACTIONS = pl.get_action_space().num_actions() NUM_ACTIONS = pl.get_action_space().num_actions()
if not train: if not train:
...@@ -59,15 +64,18 @@ def get_player(viz=False, train=False): ...@@ -59,15 +64,18 @@ def get_player(viz=False, train=False):
return pl return pl
common.get_player = get_player # so that eval functions in common can use the player common.get_player = get_player # so that eval functions in common can use the player
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
if NUM_ACTIONS is None: if NUM_ACTIONS is None:
p = get_player(); del p p = get_player()
del p
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'), return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'), InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'reward'), InputVar(tf.float32, (None,), 'reward'),
InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'), InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'),
InputVar(tf.bool, (None,), 'isOver') ] InputVar(tf.bool, (None,), 'isOver')]
def _get_DQN_prediction(self, image): def _get_DQN_prediction(self, image):
""" image: [0,255]""" """ image: [0,255]"""
...@@ -75,20 +83,20 @@ class Model(ModelDesc): ...@@ -75,20 +83,20 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=PReLU.f, use_bias=True), \ with argscope(Conv2D, nl=PReLU.f, use_bias=True), \
argscope(LeakyReLU, alpha=0.01): argscope(LeakyReLU, alpha=0.01):
l = (LinearWrap(image) l = (LinearWrap(image)
.Conv2D('conv0', out_channel=32, kernel_shape=5) .Conv2D('conv0', out_channel=32, kernel_shape=5)
.MaxPooling('pool0', 2) .MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=32, kernel_shape=5) .Conv2D('conv1', out_channel=32, kernel_shape=5)
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
.Conv2D('conv2', out_channel=64, kernel_shape=4) .Conv2D('conv2', out_channel=64, kernel_shape=4)
.MaxPooling('pool2', 2) .MaxPooling('pool2', 2)
.Conv2D('conv3', out_channel=64, kernel_shape=3) .Conv2D('conv3', out_channel=64, kernel_shape=3)
# the original arch # the original arch
#.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4) #.Conv2D('conv0', image, out_channel=32, kernel_shape=8, stride=4)
#.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2) #.Conv2D('conv1', out_channel=64, kernel_shape=4, stride=2)
#.Conv2D('conv2', out_channel=64, kernel_shape=3) #.Conv2D('conv2', out_channel=64, kernel_shape=3)
.FullyConnected('fc0', 512, nl=LeakyReLU)()) .FullyConnected('fc0', 512, nl=LeakyReLU)())
if METHOD != 'Dueling': if METHOD != 'Dueling':
Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity) Q = FullyConnected('fct', l, NUM_ACTIONS, nl=tf.identity)
else: else:
...@@ -101,7 +109,7 @@ class Model(ModelDesc): ...@@ -101,7 +109,7 @@ class Model(ModelDesc):
state, action, reward, next_state, isOver = inputs state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state) self.predict_value = self._get_DQN_prediction(state)
action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0) action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N, pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N,
max_pred_reward = tf.reduce_mean(tf.reduce_max( max_pred_reward = tf.reduce_mean(tf.reduce_max(
self.predict_value, 1), name='predict_reward') self.predict_value, 1), name='predict_reward')
add_moving_summary(max_pred_reward) add_moving_summary(max_pred_reward)
...@@ -125,7 +133,7 @@ class Model(ModelDesc): ...@@ -125,7 +133,7 @@ class Model(ModelDesc):
self.cost = tf.truediv(symbf.huber_loss(target - pred_action_value), self.cost = tf.truediv(symbf.huber_loss(target - pred_action_value),
tf.cast(BATCH_SIZE, tf.float32), name='cost') tf.cast(BATCH_SIZE, tf.float32), name='cost')
summary.add_param_summary([('conv.*/W', ['histogram', 'rms']), summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms']) ]) # monitor all W ('fc.*/W', ['histogram', 'rms'])]) # monitor all W
def update_target_param(self): def update_target_param(self):
vars = tf.trainable_variables() vars = tf.trainable_variables()
...@@ -142,22 +150,23 @@ class Model(ModelDesc): ...@@ -142,22 +150,23 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]), return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
SummaryGradient()] SummaryGradient()]
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
M = Model() M = Model()
dataset_train = ExpReplay( dataset_train = ExpReplay(
predictor_io_names=(['state'], ['Qvalue']), predictor_io_names=(['state'], ['Qvalue']),
player=get_player(train=True), player=get_player(train=True),
batch_size=BATCH_SIZE, batch_size=BATCH_SIZE,
memory_size=MEMORY_SIZE, memory_size=MEMORY_SIZE,
init_memory_size=INIT_MEMORY_SIZE, init_memory_size=INIT_MEMORY_SIZE,
exploration=INIT_EXPLORATION, exploration=INIT_EXPLORATION,
end_exploration=END_EXPLORATION, end_exploration=END_EXPLORATION,
exploration_epoch_anneal=EXPLORATION_EPOCH_ANNEAL, exploration_epoch_anneal=EXPLORATION_EPOCH_ANNEAL,
update_frequency=4, update_frequency=4,
reward_clip=(-1, 1), reward_clip=(-1, 1),
history_len=FRAME_HISTORY) history_len=FRAME_HISTORY)
lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True) lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
...@@ -167,7 +176,7 @@ def get_config(): ...@@ -167,7 +176,7 @@ def get_config():
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(150, 4e-4), (250, 1e-4), (350, 5e-5)]), [(150, 4e-4), (250, 1e-4), (350, 5e-5)]),
RunOp(lambda: M.update_target_param()), RunOp(lambda: M.update_target_param()),
dataset_train, dataset_train,
PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['Qvalue']), 3), PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['Qvalue']), 3),
...@@ -185,10 +194,10 @@ if __name__ == '__main__': ...@@ -185,10 +194,10 @@ if __name__ == '__main__':
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('--load', help='load model') parser.add_argument('--load', help='load model')
parser.add_argument('--task', help='task to perform', parser.add_argument('--task', help='task to perform',
choices=['play', 'eval', 'train'], default='train') choices=['play', 'eval', 'train'], default='train')
parser.add_argument('--rom', help='atari rom', required=True) parser.add_argument('--rom', help='atari rom', required=True)
parser.add_argument('--algo', help='algorithm', parser.add_argument('--algo', help='algorithm',
choices=['DQN', 'Double', 'Dueling'], default='Double') choices=['DQN', 'Double', 'Dueling'], default='Double')
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
...@@ -200,10 +209,10 @@ if __name__ == '__main__': ...@@ -200,10 +209,10 @@ if __name__ == '__main__':
if args.task != 'train': if args.task != 'train':
cfg = PredictConfig( cfg = PredictConfig(
model=Model(), model=Model(),
session_init=SaverRestore(args.load), session_init=SaverRestore(args.load),
input_names=['state'], input_names=['state'],
output_names=['Qvalue']) output_names=['Qvalue'])
if args.task == 'play': if args.task == 'play':
play_model(cfg) play_model(cfg)
elif args.task == 'eval': elif args.task == 'eval':
...@@ -213,4 +222,3 @@ if __name__ == '__main__': ...@@ -213,4 +222,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import numpy as np import numpy as np
import time, os import time
import os
import cv2 import cv2
from collections import deque from collections import deque
import threading import threading
...@@ -22,15 +23,17 @@ __all__ = ['AtariPlayer'] ...@@ -22,15 +23,17 @@ __all__ = ['AtariPlayer']
ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms" ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms"
_ALE_LOCK = threading.Lock() _ALE_LOCK = threading.Lock()
class AtariPlayer(RLEnvironment): class AtariPlayer(RLEnvironment):
""" """
A wrapper for atari emulator. A wrapper for atari emulator.
Will automatically restart when a real episode ends (isOver might be just Will automatically restart when a real episode ends (isOver might be just
lost of lives but not game over). lost of lives but not game over).
""" """
def __init__(self, rom_file, viz=0, height_range=(None,None),
frame_skip=4, image_shape=(84, 84), nullop_start=30, def __init__(self, rom_file, viz=0, height_range=(None, None),
live_lost_as_eoe=True): frame_skip=4, image_shape=(84, 84), nullop_start=30,
live_lost_as_eoe=True):
""" """
:param rom_file: path to the rom :param rom_file: path to the rom
:param frame_skip: skip every k frames and repeat the action :param frame_skip: skip every k frames and repeat the action
...@@ -47,7 +50,7 @@ class AtariPlayer(RLEnvironment): ...@@ -47,7 +50,7 @@ class AtariPlayer(RLEnvironment):
if not os.path.isfile(rom_file) and '/' not in rom_file: if not os.path.isfile(rom_file) and '/' not in rom_file:
rom_file = get_dataset_path('atari_rom', rom_file) rom_file = get_dataset_path('atari_rom', rom_file)
assert os.path.isfile(rom_file), \ assert os.path.isfile(rom_file), \
"rom {} not found. Please download at {}".format(rom_file, ROM_URL) "rom {} not found. Please download at {}".format(rom_file, ROM_URL)
try: try:
ALEInterface.setLoggerMode(ALEInterface.Logger.Warning) ALEInterface.setLoggerMode(ALEInterface.Logger.Warning)
...@@ -84,7 +87,6 @@ class AtariPlayer(RLEnvironment): ...@@ -84,7 +87,6 @@ class AtariPlayer(RLEnvironment):
self.width, self.height = self.ale.getScreenDims() self.width, self.height = self.ale.getScreenDims()
self.actions = self.ale.getMinimalActionSet() self.actions = self.ale.getMinimalActionSet()
self.live_lost_as_eoe = live_lost_as_eoe self.live_lost_as_eoe = live_lost_as_eoe
self.frame_skip = frame_skip self.frame_skip = frame_skip
self.nullop_start = nullop_start self.nullop_start = nullop_start
...@@ -112,7 +114,7 @@ class AtariPlayer(RLEnvironment): ...@@ -112,7 +114,7 @@ class AtariPlayer(RLEnvironment):
if isinstance(self.viz, float): if isinstance(self.viz, float):
cv2.imshow(self.windowname, ret) cv2.imshow(self.windowname, ret)
time.sleep(self.viz) time.sleep(self.viz)
ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32') ret = ret[self.height_range[0]:self.height_range[1], :].astype('float32')
# 0.299,0.587.0.114. same as rgb2y in torch/image # 0.299,0.587.0.114. same as rgb2y in torch/image
ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY) ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
ret = cv2.resize(ret, self.image_shape) ret = cv2.resize(ret, self.image_shape)
...@@ -169,7 +171,7 @@ if __name__ == '__main__': ...@@ -169,7 +171,7 @@ if __name__ == '__main__':
import time import time
def benchmark(): def benchmark():
a = AtariPlayer(sys.argv[1], viz=False, height_range=(28,-8)) a = AtariPlayer(sys.argv[1], viz=False, height_range=(28, -8))
num = a.get_action_space().num_actions() num = a.get_action_space().num_actions()
rng = get_rng(num) rng = get_rng(num)
start = time.time() start = time.time()
...@@ -184,7 +186,8 @@ if __name__ == '__main__': ...@@ -184,7 +186,8 @@ if __name__ == '__main__':
print(time.time() - start) print(time.time() - start)
if len(sys.argv) == 3 and sys.argv[2] == 'benchmark': if len(sys.argv) == 3 and sys.argv[2] == 'benchmark':
import threading, multiprocessing import threading
import multiprocessing
for k in range(3): for k in range(3):
#th = multiprocessing.Process(target=benchmark) #th = multiprocessing.Process(target=benchmark)
th = threading.Thread(target=benchmark) th = threading.Thread(target=benchmark)
...@@ -193,7 +196,7 @@ if __name__ == '__main__': ...@@ -193,7 +196,7 @@ if __name__ == '__main__':
benchmark() benchmark()
else: else:
a = AtariPlayer(sys.argv[1], a = AtariPlayer(sys.argv[1],
viz=0.03, height_range=(28,-8)) viz=0.03, height_range=(28, -8))
num = a.get_action_space().num_actions() num = a.get_action_space().num_actions()
rng = get_rng(num) rng = get_rng(num)
import time import time
...@@ -204,6 +207,5 @@ if __name__ == '__main__': ...@@ -204,6 +207,5 @@ if __name__ == '__main__':
print(act) print(act)
r, o = a.action(act) r, o = a.action(act)
a.current_state() a.current_state()
#time.sleep(0.1) # time.sleep(0.1)
print(r, o) print(r, o)
...@@ -2,8 +2,10 @@ ...@@ -2,8 +2,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: common.py # File: common.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import random, time import random
import threading, multiprocessing import time
import threading
import multiprocessing
import numpy as np import numpy as np
from tqdm import tqdm from tqdm import tqdm
from six.moves import queue from six.moves import queue
...@@ -11,11 +13,12 @@ from six.moves import queue ...@@ -11,11 +13,12 @@ from six.moves import queue
from tensorpack import * from tensorpack import *
from tensorpack.predict import get_predict_func from tensorpack.predict import get_predict_func
from tensorpack.utils.concurrency import * from tensorpack.utils.concurrency import *
from tensorpack.utils.stats import * from tensorpack.utils.stats import *
global get_player global get_player
get_player = None get_player = None
def play_one_episode(player, func, verbose=False): def play_one_episode(player, func, verbose=False):
def f(s): def f(s):
spc = player.get_action_space() spc = player.get_action_space()
...@@ -27,6 +30,7 @@ def play_one_episode(player, func, verbose=False): ...@@ -27,6 +30,7 @@ def play_one_episode(player, func, verbose=False):
return act return act
return np.mean(player.play_one_episode(f)) return np.mean(player.play_one_episode(f))
def play_model(cfg): def play_model(cfg):
player = get_player(viz=0.01) player = get_player(viz=0.01)
predfunc = get_predict_func(cfg) predfunc = get_predict_func(cfg)
...@@ -34,8 +38,10 @@ def play_model(cfg): ...@@ -34,8 +38,10 @@ def play_model(cfg):
score = play_one_episode(player, predfunc) score = play_one_episode(player, predfunc)
print("Total:", score) print("Total:", score)
def eval_with_funcs(predict_funcs, nr_eval): def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread): class Worker(StoppableThread):
def __init__(self, func, queue): def __init__(self, func, queue):
super(Worker, self).__init__() super(Worker, self).__init__()
self._func = func self._func = func
...@@ -51,7 +57,7 @@ def eval_with_funcs(predict_funcs, nr_eval): ...@@ -51,7 +57,7 @@ def eval_with_funcs(predict_funcs, nr_eval):
while not self.stopped(): while not self.stopped():
try: try:
score = play_one_episode(player, self.func) score = play_one_episode(player, self.func)
#print "Score, ", score # print "Score, ", score
except RuntimeError: except RuntimeError:
return return
self.queue_put_stoppable(self.q, score) self.queue_put_stoppable(self.q, score)
...@@ -61,15 +67,17 @@ def eval_with_funcs(predict_funcs, nr_eval): ...@@ -61,15 +67,17 @@ def eval_with_funcs(predict_funcs, nr_eval):
for k in threads: for k in threads:
k.start() k.start()
time.sleep(0.1) # avoid simulator bugs time.sleep(0.1) # avoid simulator bugs
stat = StatCounter() stat = StatCounter()
try: try:
for _ in tqdm(range(nr_eval), **get_tqdm_kwargs()): for _ in tqdm(range(nr_eval), **get_tqdm_kwargs()):
r = q.get() r = q.get()
stat.feed(r) stat.feed(r)
logger.info("Waiting for all the workers to finish the last run...") logger.info("Waiting for all the workers to finish the last run...")
for k in threads: k.stop() for k in threads:
for k in threads: k.join() k.stop()
for k in threads:
k.join()
while q.qsize(): while q.qsize():
r = q.get() r = q.get()
stat.feed(r) stat.feed(r)
...@@ -80,13 +88,16 @@ def eval_with_funcs(predict_funcs, nr_eval): ...@@ -80,13 +88,16 @@ def eval_with_funcs(predict_funcs, nr_eval):
return (stat.average, stat.max) return (stat.average, stat.max)
return (0, 0) return (0, 0)
def eval_model_multithread(cfg, nr_eval): def eval_model_multithread(cfg, nr_eval):
func = get_predict_func(cfg) func = get_predict_func(cfg)
NR_PROC = min(multiprocessing.cpu_count() // 2, 8) NR_PROC = min(multiprocessing.cpu_count() // 2, 8)
mean, max = eval_with_funcs([func] * NR_PROC, nr_eval) mean, max = eval_with_funcs([func] * NR_PROC, nr_eval)
logger.info("Average Score: {}; Max Score: {}".format(mean, max)) logger.info("Average Score: {}; Max Score: {}".format(mean, max))
class Evaluator(Callback): class Evaluator(Callback):
def __init__(self, nr_eval, input_names, output_names): def __init__(self, nr_eval, input_names, output_names):
self.eval_episode = nr_eval self.eval_episode = nr_eval
self.input_names = input_names self.input_names = input_names
......
...@@ -13,26 +13,31 @@ from tensorpack.utils.argtools import memoized ...@@ -13,26 +13,31 @@ from tensorpack.utils.argtools import memoized
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
_CM = plt.get_cmap('jet') _CM = plt.get_cmap('jet')
def colorize(img, heatmap): def colorize(img, heatmap):
""" img: bgr, [0,255] """ img: bgr, [0,255]
heatmap: [0,1] heatmap: [0,1]
""" """
heatmap = _CM(heatmap)[:,:,[2,1,0]] * 255.0 heatmap = _CM(heatmap)[:, :, [2, 1, 0]] * 255.0
return img * 0.5 + heatmap * 0.5 return img * 0.5 + heatmap * 0.5
@memoized @memoized
def get_gaussian_map(): def get_gaussian_map():
sigma = 21 sigma = 21
gaussian_map = np.zeros((368, 368), dtype='float32') gaussian_map = np.zeros((368, 368), dtype='float32')
for x_p in range(368): for x_p in range(368):
for y_p in range(368): for y_p in range(368):
dist_sq = (x_p - 368/2) * (x_p - 368/2) + \ dist_sq = (x_p - 368 / 2) * (x_p - 368 / 2) + \
(y_p - 368/2) * (y_p - 368/2) (y_p - 368 / 2) * (y_p - 368 / 2)
exponent = dist_sq / 2.0 / (21**2) exponent = dist_sq / 2.0 / (21**2)
gaussian_map[y_p, x_p] = np.exp(-exponent) gaussian_map[y_p, x_p] = np.exp(-exponent)
return gaussian_map.reshape((1,368,368,1)) return gaussian_map.reshape((1, 368, 368, 1))
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 368, 368, 3), 'input'), return [InputVar(tf.float32, (None, 368, 368, 3), 'input'),
InputVar(tf.float32, (None, 368, 368, 15), 'label'), InputVar(tf.float32, (None, 368, 368, 15), 'label'),
...@@ -43,32 +48,32 @@ class Model(ModelDesc): ...@@ -43,32 +48,32 @@ class Model(ModelDesc):
image = image / 256.0 - 0.5 image = image / 256.0 - 0.5
gmap = tf.constant(get_gaussian_map()) gmap = tf.constant(get_gaussian_map())
gmap = tf.pad(gmap, [[0,0],[0,1],[0,1],[0,0]]) gmap = tf.pad(gmap, [[0, 0], [0, 1], [0, 1], [0, 0]])
pool_center = AvgPooling('mappool', gmap, 9, stride=8, padding='VALID') pool_center = AvgPooling('mappool', gmap, 9, stride=8, padding='VALID')
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu,
W_init=tf.random_normal_initializer(stddev=0.01)): W_init=tf.random_normal_initializer(stddev=0.01)):
shared = (LinearWrap(image) shared = (LinearWrap(image)
.Conv2D('conv1_1', 64) .Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64) .Conv2D('conv1_2', 64)
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
# 184 # 184
.Conv2D('conv2_1', 128) .Conv2D('conv2_1', 128)
.Conv2D('conv2_2', 128) .Conv2D('conv2_2', 128)
.MaxPooling('pool2', 2) .MaxPooling('pool2', 2)
# 92 # 92
.Conv2D('conv3_1', 256) .Conv2D('conv3_1', 256)
.Conv2D('conv3_2', 256) .Conv2D('conv3_2', 256)
.Conv2D('conv3_3', 256) .Conv2D('conv3_3', 256)
.Conv2D('conv3_4', 256) .Conv2D('conv3_4', 256)
.MaxPooling('pool3', 2) .MaxPooling('pool3', 2)
# 46 # 46
.Conv2D('conv4_1', 512) .Conv2D('conv4_1', 512)
.Conv2D('conv4_2', 512) .Conv2D('conv4_2', 512)
.Conv2D('conv4_3_CPM', 256) .Conv2D('conv4_3_CPM', 256)
.Conv2D('conv4_4_CPM', 256) .Conv2D('conv4_4_CPM', 256)
.Conv2D('conv4_5_CPM', 256) .Conv2D('conv4_5_CPM', 256)
.Conv2D('conv4_6_CPM', 256) .Conv2D('conv4_6_CPM', 256)
.Conv2D('conv4_7_CPM', 128)()) .Conv2D('conv4_7_CPM', 128)())
def add_stage(stage, l): def add_stage(stage, l):
l = tf.concat(3, [l, shared, pool_center], name='concat_stage{}'.format(stage)) l = tf.concat(3, [l, shared, pool_center], name='concat_stage{}'.format(stage))
...@@ -76,20 +81,21 @@ class Model(ModelDesc): ...@@ -76,20 +81,21 @@ class Model(ModelDesc):
l = Conv2D('Mconv{}_stage{}'.format(i, stage), l, 128) l = Conv2D('Mconv{}_stage{}'.format(i, stage), l, 128)
l = Conv2D('Mconv6_stage{}'.format(stage), l, 128, kernel_shape=1) l = Conv2D('Mconv6_stage{}'.format(stage), l, 128, kernel_shape=1)
l = Conv2D('Mconv7_stage{}'.format(stage), l = Conv2D('Mconv7_stage{}'.format(stage),
l, 15, kernel_shape=1, nl=tf.identity) l, 15, kernel_shape=1, nl=tf.identity)
return l return l
with argscope(Conv2D, kernel_shape=7, nl=tf.nn.relu): with argscope(Conv2D, kernel_shape=7, nl=tf.nn.relu):
out1 = (LinearWrap(shared) out1 = (LinearWrap(shared)
.Conv2D('conv5_1_CPM', 512, kernel_shape=1) .Conv2D('conv5_1_CPM', 512, kernel_shape=1)
.Conv2D('conv5_2_CPM', 15, kernel_shape=1, nl=tf.identity)()) .Conv2D('conv5_2_CPM', 15, kernel_shape=1, nl=tf.identity)())
out2 = add_stage(2, out1) out2 = add_stage(2, out1)
out3 = add_stage(3, out2) out3 = add_stage(3, out2)
out4 = add_stage(4, out3) out4 = add_stage(4, out3)
out5 = add_stage(5, out4) out5 = add_stage(5, out4)
out6 = add_stage(6, out4) out6 = add_stage(6, out4)
resized_map = tf.image.resize_bilinear(out6, resized_map = tf.image.resize_bilinear(out6,
[368,368], name='resized_map') [368, 368], name='resized_map')
def run_test(model_path, img_file): def run_test(model_path, img_file):
param_dict = np.load(model_path, encoding='latin1').item() param_dict = np.load(model_path, encoding='latin1').item()
...@@ -101,9 +107,9 @@ def run_test(model_path, img_file): ...@@ -101,9 +107,9 @@ def run_test(model_path, img_file):
)) ))
im = cv2.imread(img_file, cv2.IMREAD_COLOR).astype('float32') im = cv2.imread(img_file, cv2.IMREAD_COLOR).astype('float32')
im = cv2.resize(im, (368,368)) im = cv2.resize(im, (368, 368))
out = predict_func([[im]])[0][0] out = predict_func([[im]])[0][0]
hm = out[:,:,:14].sum(axis=2) hm = out[:, :, :14].sum(axis=2)
viz = colorize(im, hm) viz = colorize(im, hm)
cv2.imwrite("output.jpg", viz) cv2.imwrite("output.jpg", viz)
......
...@@ -5,7 +5,9 @@ ...@@ -5,7 +5,9 @@
from tensorpack import ProxyDataFlow, get_rng from tensorpack import ProxyDataFlow, get_rng
class DisturbLabel(ProxyDataFlow): class DisturbLabel(ProxyDataFlow):
def __init__(self, ds, prob): def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds) super(DisturbLabel, self).__init__(ds)
self.prob = prob self.prob = prob
...@@ -19,4 +21,3 @@ class DisturbLabel(ProxyDataFlow): ...@@ -19,4 +21,3 @@ class DisturbLabel(ProxyDataFlow):
if self.rng.rand() < self.prob: if self.rng.rand() < self.prob:
l = self.rng.choice(10) l = self.rng.choice(10)
yield [img, l] yield [img, l]
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import argparse import argparse
from tensorpack import * from tensorpack import *
...@@ -13,9 +14,10 @@ from disturb import DisturbLabel ...@@ -13,9 +14,10 @@ from disturb import DisturbLabel
import imp import imp
mnist_example = imp.load_source('mnist_example', mnist_example = imp.load_source('mnist_example',
os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py')) os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py'))
get_config = mnist_example.get_config get_config = mnist_example.get_config
def get_data(): def get_data():
dataset_train = BatchData(DisturbLabel(dataset.Mnist('train'), args.prob), 128) dataset_train = BatchData(DisturbLabel(dataset.Mnist('train'), args.prob), 128)
dataset_test = BatchData(dataset.Mnist('test'), 256, remainder=True) dataset_test = BatchData(dataset.Mnist('test'), 256, remainder=True)
...@@ -24,19 +26,21 @@ mnist_example.get_data = get_data ...@@ -24,19 +26,21 @@ mnist_example.get_data = get_data
IMAGE_SIZE = 28 IMAGE_SIZE = 28
class Model(mnist_example.Model): class Model(mnist_example.Model):
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
image = tf.expand_dims(image, 3) image = tf.expand_dims(image, 3)
with argscope(Conv2D, kernel_shape=5, nl=tf.nn.relu): with argscope(Conv2D, kernel_shape=5, nl=tf.nn.relu):
logits = (LinearWrap(image) # the starting brace is only for line-breaking logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0', out_channel=32, padding='VALID') .Conv2D('conv0', out_channel=32, padding='VALID')
.MaxPooling('pool0', 2) .MaxPooling('pool0', 2)
.Conv2D('conv1', out_channel=64, padding='VALID') .Conv2D('conv1', out_channel=64, padding='VALID')
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
.FullyConnected('fc0', 512, nl=tf.nn.relu) .FullyConnected('fc0', 512, nl=tf.nn.relu)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)()) .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
wrong = symbolic_functions.prediction_incorrect(logits, label) wrong = symbolic_functions.prediction_incorrect(logits, label)
...@@ -63,4 +67,3 @@ if __name__ == '__main__': ...@@ -63,4 +67,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -16,20 +16,20 @@ import imp ...@@ -16,20 +16,20 @@ import imp
svhn_example = imp.load_source('svhn_example', svhn_example = imp.load_source('svhn_example',
os.path.join(os.path.dirname(__file__), '..', 'svhn-digit-convnet.py'))) os.path.join(os.path.dirname(__file__), '..', 'svhn-digit-convnet.py')))
Model = svhn_example.Model Model=svhn_example.Model
get_config = svhn_example.get_config get_config=svhn_example.get_config
def get_data(): def get_data():
d1 = dataset.SVHNDigit('train') d1=dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra') d2=dataset.SVHNDigit('extra')
data_train = RandomMixData([d1, d2]) data_train=RandomMixData([d1, d2])
data_train = DisturbLabel(data_train, args.prob) data_train=DisturbLabel(data_train, args.prob)
data_test = dataset.SVHNDigit('test') data_test=dataset.SVHNDigit('test')
augmentors = [ augmentors=[
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.Brightness(30), imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5, 1.5)),
] ]
data_train = AugmentImageComponent(data_train, augmentors) data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128) data_train = BatchData(data_train, 128)
......
...@@ -9,7 +9,8 @@ import argparse ...@@ -9,7 +9,8 @@ import argparse
import numpy as np import numpy as np
import multiprocessing import multiprocessing
import msgpack import msgpack
import os, sys import os
import sys
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -69,10 +70,12 @@ BITG = 6 ...@@ -69,10 +70,12 @@ BITG = 6
TOTAL_BATCH_SIZE = 128 TOTAL_BATCH_SIZE = 128
BATCH_SIZE = 64 BATCH_SIZE = 64
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'), return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -81,6 +84,7 @@ class Model(ModelDesc): ...@@ -81,6 +84,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG) fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw # monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs): def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs) v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer # don't binarize first and last layer
...@@ -102,41 +106,41 @@ class Model(ModelDesc): ...@@ -102,41 +106,41 @@ class Model(ModelDesc):
with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity): argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv0', 96, 12, stride=4, padding='VALID') .Conv2D('conv0', 96, 12, stride=4, padding='VALID')
.apply(activate) .apply(activate)
.Conv2D('conv1', 256, 5, padding='SAME', split=2) .Conv2D('conv1', 256, 5, padding='SAME', split=2)
.apply(fg) .apply(fg)
.BatchNorm('bn1') .BatchNorm('bn1')
.MaxPooling('pool1', 3, 2, padding='SAME') .MaxPooling('pool1', 3, 2, padding='SAME')
.apply(activate) .apply(activate)
.Conv2D('conv2', 384, 3) .Conv2D('conv2', 384, 3)
.apply(fg) .apply(fg)
.BatchNorm('bn2') .BatchNorm('bn2')
.MaxPooling('pool2', 3, 2, padding='SAME') .MaxPooling('pool2', 3, 2, padding='SAME')
.apply(activate) .apply(activate)
.Conv2D('conv3', 384, 3, split=2) .Conv2D('conv3', 384, 3, split=2)
.apply(fg) .apply(fg)
.BatchNorm('bn3') .BatchNorm('bn3')
.apply(activate) .apply(activate)
.Conv2D('conv4', 256, 3, split=2) .Conv2D('conv4', 256, 3, split=2)
.apply(fg) .apply(fg)
.BatchNorm('bn4') .BatchNorm('bn4')
.MaxPooling('pool4', 3, 2, padding='VALID') .MaxPooling('pool4', 3, 2, padding='VALID')
.apply(activate) .apply(activate)
.FullyConnected('fc0', 4096) .FullyConnected('fc0', 4096)
.apply(fg) .apply(fg)
.BatchNorm('bnfc0') .BatchNorm('bnfc0')
.apply(activate) .apply(activate)
.FullyConnected('fc1', 4096) .FullyConnected('fc1', 4096)
.apply(fg) .apply(fg)
.BatchNorm('bnfc1') .BatchNorm('bnfc1')
.apply(nonlin) .apply(nonlin)
.FullyConnected('fct', 1000, use_bias=True)()) .FullyConnected('fct', 1000, use_bias=True)())
tf.get_variable = old_get_variable tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
...@@ -156,28 +160,31 @@ class Model(ModelDesc): ...@@ -156,28 +160,31 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(dataset_name): def get_data(dataset_name):
isTrain = dataset_name == 'train' isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain) ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain)
meta = dataset.ILSVRCMeta() meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:] pp_mean_224 = pp_mean[16:-16, 16:-16, :]
if isTrain: if isTrain:
class Resize(imgaug.ImageAugmentor): class Resize(imgaug.ImageAugmentor):
def __init__(self): def __init__(self):
self._init(locals()) self._init(locals())
def _augment(self, img, _): def _augment(self, img, _):
h, w = img.shape[:2] h, w = img.shape[:2]
size = 224 size = 224
scale = self.rng.randint(size, 308) * 1.0 / min(h, w) scale = self.rng.randint(size, 308) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15) scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15) scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\ desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h)))) max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize), dst = cv2.resize(img, tuple(desSize),
interpolation=cv2.INTER_CUBIC) interpolation=cv2.INTER_CUBIC)
return dst return dst
augmentors = [ augmentors = [
...@@ -186,11 +193,11 @@ def get_data(dataset_name): ...@@ -186,11 +193,11 @@ def get_data(dataset_name):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5), imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True), imgaug.Brightness(30, True),
imgaug.Gamma(), imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True), imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((224, 224)), imgaug.RandomCrop((224, 224)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8), imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform( imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(224, 224), 0.2, 3), 0.1), (224, 224), 0.2, 3), 0.1),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_224), imgaug.MapImage(lambda x: x - pp_mean_224),
...@@ -199,7 +206,7 @@ def get_data(dataset_name): ...@@ -199,7 +206,7 @@ def get_data(dataset_name):
def resize_func(im): def resize_func(im):
h, w = im.shape[:2] h, w = im.shape[:2]
scale = 256.0 / min(h, w) scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\ desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h)))) max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im return im
...@@ -214,6 +221,7 @@ def get_data(dataset_name): ...@@ -214,6 +221,7 @@ def get_data(dataset_name):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count())) ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -228,19 +236,20 @@ def get_config(): ...@@ -228,19 +236,20 @@ def get_config():
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5), optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
#HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('learning_rate'),
ScheduledHyperParamSetter( ScheduledHyperParamSetter(
'learning_rate', [(56, 2e-5), (64, 4e-6)]), 'learning_rate', [(56, 2e-5), (64, 4e-6)]),
InferenceRunner(data_test, InferenceRunner(data_test,
[ScalarStats('cost'), [ScalarStats('cost'),
ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]) ClassificationError('wrong-top5', 'val-error-top5')])
]), ]),
model=Model(), model=Model(),
step_per_epoch=10000, step_per_epoch=10000,
max_epoch=100, max_epoch=100,
) )
def run_image(model, sess_init, inputs): def run_image(model, sess_init, inputs):
pred_config = PredictConfig( pred_config = PredictConfig(
model=model, model=model,
...@@ -252,13 +261,13 @@ def run_image(model, sess_init, inputs): ...@@ -252,13 +261,13 @@ def run_image(model, sess_init, inputs):
predict_func = get_predict_func(pred_config) predict_func = get_predict_func(pred_config)
meta = dataset.ILSVRCMeta() meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:] pp_mean_224 = pp_mean[16:-16, 16:-16, :]
words = meta.get_synset_words_1000() words = meta.get_synset_words_1000()
def resize_func(im): def resize_func(im):
h, w = im.shape[:2] h, w = im.shape[:2]
scale = 256.0 / min(h, w) scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\ desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h)))) max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im return im
...@@ -272,7 +281,7 @@ def run_image(model, sess_init, inputs): ...@@ -272,7 +281,7 @@ def run_image(model, sess_init, inputs):
img = cv2.imread(f).astype('float32') img = cv2.imread(f).astype('float32')
assert img is not None assert img is not None
img = transformers.augment(img)[np.newaxis, :,:,:] img = transformers.augment(img)[np.newaxis, :, :, :]
outputs = predict_func([img])[0] outputs = predict_func([img])[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
...@@ -287,7 +296,7 @@ if __name__ == '__main__': ...@@ -287,7 +296,7 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load a checkpoint, or a npy (given as the pretrained model)') parser.add_argument('--load', help='load a checkpoint, or a npy (given as the pretrained model)')
parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--dorefa', parser.add_argument('--dorefa',
help='number of bits for W,A,G, separated by comma', required=True) help='number of bits for W,A,G, separated by comma', required=True)
parser.add_argument('--run', help='run on a list of images with the pretrained model', nargs='*') parser.add_argument('--run', help='run on a list of images with the pretrained model', nargs='*')
args = parser.parse_args() args = parser.parse_args()
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
import tensorflow as tf import tensorflow as tf
from tensorpack.utils.argtools import memoized from tensorpack.utils.argtools import memoized
@memoized @memoized
def get_dorefa(bitW, bitA, bitG): def get_dorefa(bitW, bitA, bitG):
""" """
...@@ -15,7 +16,7 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -15,7 +16,7 @@ def get_dorefa(bitW, bitA, bitG):
G = tf.get_default_graph() G = tf.get_default_graph()
def quantize(x, k): def quantize(x, k):
n = float(2**k-1) n = float(2**k - 1)
with G.gradient_override_map({"Floor": "Identity"}): with G.gradient_override_map({"Floor": "Identity"}):
return tf.floor(x * n + 0.5) / n return tf.floor(x * n + 0.5) / n
...@@ -39,11 +40,11 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -39,11 +40,11 @@ def get_dorefa(bitW, bitA, bitG):
def grad_fg(op, x): def grad_fg(op, x):
rank = x.get_shape().ndims rank = x.get_shape().ndims
assert rank is not None assert rank is not None
maxx = tf.reduce_max(tf.abs(x), list(range(1,rank)), keep_dims=True) maxx = tf.reduce_max(tf.abs(x), list(range(1, rank)), keep_dims=True)
x = x / maxx x = x / maxx
n = float(2**bitG-1) n = float(2**bitG - 1)
x = x * 0.5 + 0.5 + tf.random_uniform( x = x * 0.5 + 0.5 + tf.random_uniform(
tf.shape(x), minval=-0.5/n, maxval=0.5/n) tf.shape(x), minval=-0.5 / n, maxval=0.5 / n)
x = tf.clip_by_value(x, 0.0, 1.0) x = tf.clip_by_value(x, 0.0, 1.0)
x = quantize(x, bitG) - 0.5 x = quantize(x, bitG) - 0.5
return x * maxx * 2 return x * maxx * 2
...@@ -54,4 +55,3 @@ def get_dorefa(bitW, bitA, bitG): ...@@ -54,4 +55,3 @@ def get_dorefa(bitW, bitA, bitG):
with G.gradient_override_map({"Identity": "FGGrad"}): with G.gradient_override_map({"Identity": "FGGrad"}):
return tf.identity(x) return tf.identity(x)
return fw, fa, fg return fw, fa, fg
...@@ -40,10 +40,12 @@ BITW = 1 ...@@ -40,10 +40,12 @@ BITW = 1
BITA = 2 BITA = 2
BITG = 4 BITG = 4
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -52,6 +54,7 @@ class Model(ModelDesc): ...@@ -52,6 +54,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG) fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw # monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs): def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs) v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer # don't binarize first and last layer
...@@ -62,9 +65,9 @@ class Model(ModelDesc): ...@@ -62,9 +65,9 @@ class Model(ModelDesc):
return fw(v) return fw(v)
tf.get_variable = new_get_variable tf.get_variable = new_get_variable
def cabs(x): def cabs(x):
return tf.minimum(1.0, tf.abs(x), name='cabs') return tf.minimum(1.0, tf.abs(x), name='cabs')
def activate(x): def activate(x):
return fa(cabs(x)) return fa(cabs(x))
...@@ -73,38 +76,38 @@ class Model(ModelDesc): ...@@ -73,38 +76,38 @@ class Model(ModelDesc):
with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ with argscope(BatchNorm, decay=0.9, epsilon=1e-4), \
argscope(Conv2D, use_bias=False, nl=tf.identity): argscope(Conv2D, use_bias=False, nl=tf.identity):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv0', 48, 5, padding='VALID', use_bias=True) .Conv2D('conv0', 48, 5, padding='VALID', use_bias=True)
.MaxPooling('pool0', 2, padding='SAME') .MaxPooling('pool0', 2, padding='SAME')
.apply(activate) .apply(activate)
# 18 # 18
.Conv2D('conv1', 64, 3, padding='SAME') .Conv2D('conv1', 64, 3, padding='SAME')
.apply(fg) .apply(fg)
.BatchNorm('bn1').apply(activate) .BatchNorm('bn1').apply(activate)
.Conv2D('conv2', 64, 3, padding='SAME') .Conv2D('conv2', 64, 3, padding='SAME')
.apply(fg) .apply(fg)
.BatchNorm('bn2') .BatchNorm('bn2')
.MaxPooling('pool1', 2, padding='SAME') .MaxPooling('pool1', 2, padding='SAME')
.apply(activate) .apply(activate)
# 9 # 9
.Conv2D('conv3', 128, 3, padding='VALID') .Conv2D('conv3', 128, 3, padding='VALID')
.apply(fg) .apply(fg)
.BatchNorm('bn3').apply(activate) .BatchNorm('bn3').apply(activate)
# 7 # 7
.Conv2D('conv4', 128, 3, padding='SAME') .Conv2D('conv4', 128, 3, padding='SAME')
.apply(fg) .apply(fg)
.BatchNorm('bn4').apply(activate) .BatchNorm('bn4').apply(activate)
.Conv2D('conv5', 128, 3, padding='VALID') .Conv2D('conv5', 128, 3, padding='VALID')
.apply(fg) .apply(fg)
.BatchNorm('bn5').apply(activate) .BatchNorm('bn5').apply(activate)
# 5 # 5
.tf.nn.dropout(0.5 if is_training else 1.0) .tf.nn.dropout(0.5 if is_training else 1.0)
.Conv2D('conv6', 512, 5, padding='VALID') .Conv2D('conv6', 512, 5, padding='VALID')
.apply(fg).BatchNorm('bn6') .apply(fg).BatchNorm('bn6')
.apply(cabs) .apply(cabs)
.FullyConnected('fc1', 10, nl=tf.identity)()) .FullyConnected('fc1', 10, nl=tf.identity)())
tf.get_variable = old_get_variable tf.get_variable = old_get_variable
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
...@@ -122,6 +125,7 @@ class Model(ModelDesc): ...@@ -122,6 +125,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -134,17 +138,17 @@ def get_config(): ...@@ -134,17 +138,17 @@ def get_config():
augmentors = [ augmentors = [
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.Brightness(30), imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5, 1.5)),
#imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus # imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus
#[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], #[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
#(40,40), 0.2, 3), #(40,40), 0.2, 3),
] ]
data_train = AugmentImageComponent(data_train, augmentors) data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128) data_train = BatchData(data_train, 128)
data_train = PrefetchDataZMQ(data_train, 5) data_train = PrefetchDataZMQ(data_train, 5)
step_per_epoch = data_train.size() step_per_epoch = data_train.size()
augmentors = [ imgaug.Resize((40, 40)) ] augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors) data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True) data_test = BatchData(data_test, 128, remainder=True)
...@@ -162,7 +166,7 @@ def get_config(): ...@@ -162,7 +166,7 @@ def get_config():
StatPrinter(), StatPrinter(),
ModelSaver(), ModelSaver(),
InferenceRunner(data_test, InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()]) [ScalarStats('cost'), ClassificationError()])
]), ]),
model=Model(), model=Model(),
step_per_epoch=step_per_epoch, step_per_epoch=step_per_epoch,
...@@ -174,8 +178,8 @@ if __name__ == '__main__': ...@@ -174,8 +178,8 @@ if __name__ == '__main__':
parser.add_argument('--gpu', help='the GPU to use') parser.add_argument('--gpu', help='the GPU to use')
parser.add_argument('--load', help='load a checkpoint') parser.add_argument('--load', help='load a checkpoint')
parser.add_argument('--dorefa', parser.add_argument('--dorefa',
help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'', help='number of bits for W,A,G, separated by comma. Defaults to \'1,2,4\'',
default='1,2,4') default='1,2,4')
args = parser.parse_args() args = parser.parse_args()
BITW, BITA, BITG = map(int, args.dorefa.split(',')) BITW, BITA, BITG = map(int, args.dorefa.split(','))
......
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import glob, pickle import glob
import os, sys import pickle
import os
import sys
import argparse import argparse
import cv2 import cv2
...@@ -32,15 +34,17 @@ CFG.SHAPE = 64 ...@@ -32,15 +34,17 @@ CFG.SHAPE = 64
CFG.BATCH = 128 CFG.BATCH = 128
CFG.Z_DIM = 100 CFG.Z_DIM = 100
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input') ] return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input')]
def generator(self, z): def generator(self, z):
""" return a image generated from z""" """ return a image generated from z"""
nf = 64 nf = 64
l = FullyConnected('fc0', z, nf * 8 * 4 * 4, nl=tf.identity) l = FullyConnected('fc0', z, nf * 8 * 4 * 4, nl=tf.identity)
l = tf.reshape(l, [-1, 4, 4, nf*8]) l = tf.reshape(l, [-1, 4, 4, nf * 8])
l = BNReLU(l) l = BNReLU(l)
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2): with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
l = Deconv2D('deconv1', l, [8, 8, nf * 4]) l = Deconv2D('deconv1', l, [8, 8, nf * 4])
...@@ -56,14 +60,14 @@ class Model(ModelDesc): ...@@ -56,14 +60,14 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \ with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2): argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU) .Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv1', nf*2) .Conv2D('conv1', nf * 2)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', nf*4) .Conv2D('conv2', nf * 4)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', nf*8) .Conv2D('conv3', nf * 8)
.BatchNorm('bn3').LeakyReLU() .BatchNorm('bn3').LeakyReLU()
.FullyConnected('fct', 1, nl=tf.identity)()) .FullyConnected('fct', 1, nl=tf.identity)())
return l return l
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
...@@ -74,7 +78,7 @@ class Model(ModelDesc): ...@@ -74,7 +78,7 @@ class Model(ModelDesc):
z = tf.placeholder_with_default(z, [None, CFG.Z_DIM], name='z') z = tf.placeholder_with_default(z, [None, CFG.Z_DIM], name='z')
with argscope([Conv2D, Deconv2D, FullyConnected], with argscope([Conv2D, Deconv2D, FullyConnected],
W_init=tf.truncated_normal_initializer(stddev=0.02)): W_init=tf.truncated_normal_initializer(stddev=0.02)):
with tf.variable_scope('gen'): with tf.variable_scope('gen'):
image_gen = self.generator(z) image_gen = self.generator(z)
tf.summary.image('gen', image_gen, max_outputs=30) tf.summary.image('gen', image_gen, max_outputs=30)
...@@ -88,16 +92,18 @@ class Model(ModelDesc): ...@@ -88,16 +92,18 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')] self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')] self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data(): def get_data():
datadir = CFG.data datadir = CFG.data
imgs = glob.glob(datadir + '/*.jpg') imgs = glob.glob(datadir + '/*.jpg')
ds = ImageFromFile(imgs, channel=3, shuffle=True) ds = ImageFromFile(imgs, channel=3, shuffle=True)
augs = [ imgaug.CenterCrop(140), imgaug.Resize(64) ] augs = [imgaug.CenterCrop(140), imgaug.Resize(64)]
ds = AugmentImageComponent(ds, augs) ds = AugmentImageComponent(ds, augs)
ds = BatchData(ds, CFG.BATCH) ds = BatchData(ds, CFG.BATCH)
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 1)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
...@@ -114,17 +120,18 @@ def get_config(): ...@@ -114,17 +120,18 @@ def get_config():
max_epoch=200, max_epoch=200,
) )
def sample(model_path): def sample(model_path):
pred = PredictConfig( pred = PredictConfig(
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
model=Model(), model=Model(),
input_names=['z'], input_names=['z'],
output_names=['gen/gen', 'z']) output_names=['gen/gen', 'z'])
pred = SimpleDatasetPredictor(pred, RandomZData((100, 100))) pred = SimpleDatasetPredictor(pred, RandomZData((100, 100)))
for o in pred.get_result(): for o in pred.get_result():
o, zs = o[0] + 1, o[1] o, zs = o[0] + 1, o[1]
o = o * 128.0 o = o * 128.0
o = o[:,:,:,::-1] o = o[:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=10, nr_col=10, viz=True)) viz = next(build_patch_list(o, nr_row=10, nr_col=10, viz=True))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -7,11 +7,13 @@ import tensorflow as tf ...@@ -7,11 +7,13 @@ import tensorflow as tf
import numpy as np import numpy as np
import time import time
from tensorpack import (FeedfreeTrainer, TowerContext, from tensorpack import (FeedfreeTrainer, TowerContext,
get_global_step_var, QueueInput) get_global_step_var, QueueInput)
from tensorpack.tfutils.summary import summary_moving_average, add_moving_summary from tensorpack.tfutils.summary import summary_moving_average, add_moving_summary
from tensorpack.dataflow import DataFlow from tensorpack.dataflow import DataFlow
class GANTrainer(FeedfreeTrainer): class GANTrainer(FeedfreeTrainer):
def __init__(self, config): def __init__(self, config):
self._input_method = QueueInput(config.dataset) self._input_method = QueueInput(config.dataset)
super(GANTrainer, self).__init__(config) super(GANTrainer, self).__init__(config)
...@@ -22,10 +24,10 @@ class GANTrainer(FeedfreeTrainer): ...@@ -22,10 +24,10 @@ class GANTrainer(FeedfreeTrainer):
actual_inputs = self._get_input_tensors() actual_inputs = self._get_input_tensors()
self.model.build_graph(actual_inputs) self.model.build_graph(actual_inputs)
self.g_min = self.config.optimizer.minimize(self.model.g_loss, self.g_min = self.config.optimizer.minimize(self.model.g_loss,
var_list=self.model.g_vars, name='g_op') var_list=self.model.g_vars, name='g_op')
with tf.control_dependencies([self.g_min]): with tf.control_dependencies([self.g_min]):
self.d_min = self.config.optimizer.minimize(self.model.d_loss, self.d_min = self.config.optimizer.minimize(self.model.d_loss,
var_list=self.model.d_vars, name='d_op') var_list=self.model.d_vars, name='d_op')
self.gs_incr = tf.assign_add(get_global_step_var(), 1, name='global_step_incr') self.gs_incr = tf.assign_add(get_global_step_var(), 1, name='global_step_incr')
self.summary_op = summary_moving_average() self.summary_op = summary_moving_average()
self.train_op = tf.group(self.d_min, self.summary_op, self.gs_incr) self.train_op = tf.group(self.d_min, self.summary_op, self.gs_incr)
...@@ -33,14 +35,18 @@ class GANTrainer(FeedfreeTrainer): ...@@ -33,14 +35,18 @@ class GANTrainer(FeedfreeTrainer):
def run_step(self): def run_step(self):
self.sess.run(self.train_op) self.sess.run(self.train_op)
class RandomZData(DataFlow): class RandomZData(DataFlow):
def __init__(self, shape): def __init__(self, shape):
super(RandomZData, self).__init__() super(RandomZData, self).__init__()
self.shape = shape self.shape = shape
def get_data(self): def get_data(self):
while True: while True:
yield [np.random.uniform(-1, 1, size=self.shape)] yield [np.random.uniform(-1, 1, size=self.shape)]
def build_GAN_losses(vecpos, vecneg): def build_GAN_losses(vecpos, vecneg):
""" """
:param vecpos, vecneg: output of the discriminator (logits) for real :param vecpos, vecneg: output of the discriminator (logits) for real
...@@ -64,6 +70,6 @@ def build_GAN_losses(vecpos, vecneg): ...@@ -64,6 +70,6 @@ def build_GAN_losses(vecpos, vecneg):
vecneg, tf.ones_like(vecneg)), name='g_CE_loss') vecneg, tf.ones_like(vecneg)), name='g_CE_loss')
d_loss = tf.add(d_loss_pos, d_loss_neg, name='d_CE_loss') d_loss = tf.add(d_loss_pos, d_loss_neg, name='d_CE_loss')
add_moving_summary(d_loss_pos, d_loss_neg, add_moving_summary(d_loss_pos, d_loss_neg,
g_loss, d_loss, g_loss, d_loss,
d_pos_acc, d_neg_acc) d_pos_acc, d_neg_acc)
return g_loss, d_loss return g_loss, d_loss
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import glob, pickle import glob
import os, sys import pickle
import os
import sys
import argparse import argparse
import cv2 import cv2
...@@ -36,12 +38,14 @@ BATCH = 1 ...@@ -36,12 +38,14 @@ BATCH = 1
IN_CH = 3 IN_CH = 3
OUT_CH = 3 OUT_CH = 3
LAMBDA = 100 LAMBDA = 100
NF = 64 # number of filter NF = 64 # number of filter
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input') , return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input'),
InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output') ] InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output')]
def generator(self, imgs): def generator(self, imgs):
# imgs: input: 256x256xch # imgs: input: 256x256xch
...@@ -50,49 +54,49 @@ class Model(ModelDesc): ...@@ -50,49 +54,49 @@ class Model(ModelDesc):
argscope(Dropout, is_training=True): argscope(Dropout, is_training=True):
# always use local stat for BN, and apply dropout even in testing # always use local stat for BN, and apply dropout even in testing
with argscope(Conv2D, kernel_shape=4, stride=2, with argscope(Conv2D, kernel_shape=4, stride=2,
nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)): nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)):
e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU) e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU)
e2 = Conv2D('conv2', e1, NF*2) e2 = Conv2D('conv2', e1, NF * 2)
e3 = Conv2D('conv3', e2, NF*4) e3 = Conv2D('conv3', e2, NF * 4)
e4 = Conv2D('conv4', e3, NF*8) e4 = Conv2D('conv4', e3, NF * 8)
e5 = Conv2D('conv5', e4, NF*8) e5 = Conv2D('conv5', e4, NF * 8)
e6 = Conv2D('conv6', e5, NF*8) e6 = Conv2D('conv6', e5, NF * 8)
e7 = Conv2D('conv7', e6, NF*8) e7 = Conv2D('conv7', e6, NF * 8)
e8 = Conv2D('conv8', e7, NF*8, nl=BNReLU) # 1x1 e8 = Conv2D('conv8', e7, NF * 8, nl=BNReLU) # 1x1
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2): with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
return (LinearWrap(e8) return (LinearWrap(e8)
.Deconv2D('deconv1', NF*8) .Deconv2D('deconv1', NF * 8)
.Dropout() .Dropout()
.ConcatWith(3, e7) .ConcatWith(3, e7)
.Deconv2D('deconv2', NF*8) .Deconv2D('deconv2', NF * 8)
.Dropout() .Dropout()
.ConcatWith(3, e6) .ConcatWith(3, e6)
.Deconv2D('deconv3', NF*8) .Deconv2D('deconv3', NF * 8)
.Dropout() .Dropout()
.ConcatWith(3, e5) .ConcatWith(3, e5)
.Deconv2D('deconv4', NF*8) .Deconv2D('deconv4', NF * 8)
.ConcatWith(3, e4) .ConcatWith(3, e4)
.Deconv2D('deconv5', NF*4) .Deconv2D('deconv5', NF * 4)
.ConcatWith(3, e3) .ConcatWith(3, e3)
.Deconv2D('deconv6', NF*2) .Deconv2D('deconv6', NF * 2)
.ConcatWith(3, e2) .ConcatWith(3, e2)
.Deconv2D('deconv7', NF*1) .Deconv2D('deconv7', NF * 1)
.ConcatWith(3, e1) .ConcatWith(3, e1)
.Deconv2D('deconv8', OUT_CH, nl=tf.tanh)()) .Deconv2D('deconv8', OUT_CH, nl=tf.tanh)())
def discriminator(self, inputs, outputs): def discriminator(self, inputs, outputs):
""" return a (b, 1) logits""" """ return a (b, 1) logits"""
l = tf.concat(3, [inputs, outputs]) l = tf.concat(3, [inputs, outputs])
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2): with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(l) l = (LinearWrap(l)
.Conv2D('conv0', NF, nl=LeakyReLU) .Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv1', NF*2) .Conv2D('conv1', NF * 2)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', NF*4) .Conv2D('conv2', NF * 4)
.BatchNorm('bn2').LeakyReLU() .BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', NF*8, stride=1, padding='VALID') .Conv2D('conv3', NF * 8, stride=1, padding='VALID')
.BatchNorm('bn3').LeakyReLU() .BatchNorm('bn3').LeakyReLU()
.Conv2D('convlast', 1, stride=1, padding='VALID')()) .Conv2D('convlast', 1, stride=1, padding='VALID')())
return l return l
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
...@@ -100,7 +104,7 @@ class Model(ModelDesc): ...@@ -100,7 +104,7 @@ class Model(ModelDesc):
input, output = input / 128.0 - 1, output / 128.0 - 1 input, output = input / 128.0 - 1, output / 128.0 - 1
with argscope([Conv2D, Deconv2D], with argscope([Conv2D, Deconv2D],
W_init=tf.truncated_normal_initializer(stddev=0.02)), \ W_init=tf.truncated_normal_initializer(stddev=0.02)), \
argscope(LeakyReLU, alpha=0.2): argscope(LeakyReLU, alpha=0.2):
with tf.variable_scope('gen'): with tf.variable_scope('gen'):
fake_output = self.generator(input) fake_output = self.generator(input)
...@@ -128,33 +132,36 @@ class Model(ModelDesc): ...@@ -128,33 +132,36 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')] self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')] self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def split_input(img): def split_input(img):
""" """
img: an image with shape (s, 2s, 3) img: an image with shape (s, 2s, 3)
:return: [input, output] :return: [input, output]
""" """
s = img.shape[0] s = img.shape[0]
input, output = img[:,:s,:], img[:,s:,:] input, output = img[:, :s, :], img[:, s:, :]
if args.mode == 'BtoA': if args.mode == 'BtoA':
input, output = output, input input, output = output, input
if IN_CH == 1: if IN_CH == 1:
input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis] input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
if OUT_CH == 1: if OUT_CH == 1:
output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis] output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
return [input, output] return [input, output]
def get_data(): def get_data():
datadir = args.data datadir = args.data
# assume each image is 512x256 split to left and right # assume each image is 512x256 split to left and right
imgs = glob.glob(os.path.join(datadir, '*.jpg')) imgs = glob.glob(os.path.join(datadir, '*.jpg'))
ds = ImageFromFile(imgs, channel=3, shuffle=True) ds = ImageFromFile(imgs, channel=3, shuffle=True)
ds = MapData(ds, lambda dp: split_input(dp[0])) ds = MapData(ds, lambda dp: split_input(dp[0]))
augs = [ imgaug.Resize(286), imgaug.RandomCrop(256) ] augs = [imgaug.Resize(286), imgaug.RandomCrop(256)]
ds = AugmentImageComponents(ds, augs, (0, 1)) ds = AugmentImageComponents(ds, augs, (0, 1))
ds = BatchData(ds, BATCH) ds = BatchData(ds, BATCH)
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 1)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
...@@ -171,12 +178,13 @@ def get_config(): ...@@ -171,12 +178,13 @@ def get_config():
max_epoch=300, max_epoch=300,
) )
def sample(datadir, model_path): def sample(datadir, model_path):
pred = PredictConfig( pred = PredictConfig(
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
model=Model(), model=Model(),
input_names=['input', 'output'], input_names=['input', 'output'],
output_names=['viz']) output_names=['viz'])
imgs = glob.glob(os.path.join(datadir, '*.jpg')) imgs = glob.glob(os.path.join(datadir, '*.jpg'))
ds = ImageFromFile(imgs, channel=3, shuffle=True) ds = ImageFromFile(imgs, channel=3, shuffle=True)
...@@ -184,7 +192,7 @@ def sample(datadir, model_path): ...@@ -184,7 +192,7 @@ def sample(datadir, model_path):
pred = SimpleDatasetPredictor(pred, ds) pred = SimpleDatasetPredictor(pred, ds)
for o in pred.get_result(): for o in pred.get_result():
o = o[0][:,:,:,::-1] o = o[0][:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=3, nr_col=2, viz=True)) viz = next(build_patch_list(o, nr_row=3, nr_col=2, viz=True))
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import cv2 import cv2
import argparse import argparse
...@@ -16,9 +17,11 @@ from GAN import GANTrainer, build_GAN_losses ...@@ -16,9 +17,11 @@ from GAN import GANTrainer, build_GAN_losses
BATCH = 128 BATCH = 128
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 28, 28), 'input') ] return [InputVar(tf.float32, (None, 28, 28), 'input')]
def generator(self, z): def generator(self, z):
l = FullyConnected('fc0', z, 1024, nl=BNReLU) l = FullyConnected('fc0', z, 1024, nl=BNReLU)
...@@ -34,18 +37,18 @@ class Model(ModelDesc): ...@@ -34,18 +37,18 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \ with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2), \
argscope(LeakyReLU, alpha=0.2): argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs) l = (LinearWrap(imgs)
.Conv2D('conv0', 64) .Conv2D('conv0', 64)
.LeakyReLU() .LeakyReLU()
.Conv2D('conv1', 128) .Conv2D('conv1', 128)
.BatchNorm('bn1').LeakyReLU() .BatchNorm('bn1').LeakyReLU()
.FullyConnected('fc1', 1024, nl=tf.identity) .FullyConnected('fc1', 1024, nl=tf.identity)
.BatchNorm('bn2').LeakyReLU()()) .BatchNorm('bn2').LeakyReLU()())
logits = FullyConnected('fct', l, 1, nl=tf.identity) logits = FullyConnected('fct', l, 1, nl=tf.identity)
encoder = (LinearWrap(l) encoder = (LinearWrap(l)
.FullyConnected('fce1', 128, nl=tf.identity) .FullyConnected('fce1', 128, nl=tf.identity)
.BatchNorm('bne').LeakyReLU() .BatchNorm('bne').LeakyReLU()
.FullyConnected('fce-out', 10, nl=tf.identity)()) .FullyConnected('fce-out', 10, nl=tf.identity)())
return logits, encoder return logits, encoder
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
...@@ -54,7 +57,7 @@ class Model(ModelDesc): ...@@ -54,7 +57,7 @@ class Model(ModelDesc):
prior_prob = tf.constant([0.1] * 10, name='prior_prob') prior_prob = tf.constant([0.1] * 10, name='prior_prob')
# assume first 10 is categorical # assume first 10 is categorical
ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:,0] ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:, 0]
zc = tf.one_hot(ids, 10, name='zc_train') zc = tf.one_hot(ids, 10, name='zc_train')
zc = tf.placeholder_with_default(zc, [None, 10], name='zc') zc = tf.placeholder_with_default(zc, [None, 10], name='zc')
...@@ -63,7 +66,7 @@ class Model(ModelDesc): ...@@ -63,7 +66,7 @@ class Model(ModelDesc):
z = tf.concat(1, [zc, z], name='fullz') z = tf.concat(1, [zc, z], name='fullz')
with argscope([Conv2D, Deconv2D, FullyConnected], with argscope([Conv2D, Deconv2D, FullyConnected],
W_init=tf.truncated_normal_initializer(stddev=0.02)): W_init=tf.truncated_normal_initializer(stddev=0.02)):
with tf.variable_scope('gen'): with tf.variable_scope('gen'):
image_gen = self.generator(z) image_gen = self.generator(z)
tf.summary.image('gen', image_gen, max_outputs=30) tf.summary.image('gen', image_gen, max_outputs=30)
...@@ -71,10 +74,10 @@ class Model(ModelDesc): ...@@ -71,10 +74,10 @@ class Model(ModelDesc):
vecpos, _ = self.discriminator(image_pos) vecpos, _ = self.discriminator(image_pos)
with tf.variable_scope('discrim', reuse=True): with tf.variable_scope('discrim', reuse=True):
vecneg, dist_param = self.discriminator(image_gen) vecneg, dist_param = self.discriminator(image_gen)
logprob = tf.nn.log_softmax(dist_param) # log prob of each category logprob = tf.nn.log_softmax(dist_param) # log prob of each category
# Q(c|x) = Q(zc | image_gen) # Q(c|x) = Q(zc | image_gen)
log_qc_given_x = tf.reduce_sum(logprob * zc, 1, name='logQc_x') # bx1 log_qc_given_x = tf.reduce_sum(logprob * zc, 1, name='logQc_x') # bx1
log_qc = tf.reduce_sum(prior_prob * zc, 1, name='logQc') log_qc = tf.reduce_sum(prior_prob * zc, 1, name='logQc')
Elog_qc_given_x = tf.reduce_mean(log_qc_given_x, name='ElogQc_x') Elog_qc_given_x = tf.reduce_mean(log_qc_given_x, name='ElogQc_x')
Hc = tf.reduce_mean(-log_qc, name='Hc') Hc = tf.reduce_mean(-log_qc, name='Hc')
...@@ -89,11 +92,13 @@ class Model(ModelDesc): ...@@ -89,11 +92,13 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')] self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')] self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data(): def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')]) ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH) ds = BatchData(ds, BATCH)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
...@@ -110,12 +115,13 @@ def get_config(): ...@@ -110,12 +115,13 @@ def get_config():
max_epoch=100, max_epoch=100,
) )
def sample(model_path): def sample(model_path):
pred = OfflinePredictor(PredictConfig( pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
model=Model(), model=Model(),
input_names=['zc'], input_names=['zc'],
output_names=['gen/gen'])) output_names=['gen/gen']))
eye = [] eye = []
for k in np.eye(10): for k in np.eye(10):
...@@ -143,4 +149,3 @@ if __name__ == '__main__': ...@@ -143,4 +149,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
GANTrainer(config).train() GANTrainer(config).train()
...@@ -8,16 +8,19 @@ import tensorflow as tf ...@@ -8,16 +8,19 @@ import tensorflow as tf
import argparse import argparse
import numpy as np import numpy as np
from six.moves import zip from six.moves import zip
import os, sys import os
import sys
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, None, 3], 'image'), return [InputVar(tf.float32, [None, None, None, 3], 'image'),
InputVar(tf.int32, [None, None, None], 'edgemap') ] InputVar(tf.int32, [None, None, None], 'edgemap')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, edgemap = input_vars image, edgemap = input_vars
...@@ -27,9 +30,9 @@ class Model(ModelDesc): ...@@ -27,9 +30,9 @@ class Model(ModelDesc):
def branch(name, l, up): def branch(name, l, up):
with tf.variable_scope(name) as scope: with tf.variable_scope(name) as scope:
l = Conv2D('convfc', l, 1, kernel_shape=1, nl=tf.identity, l = Conv2D('convfc', l, 1, kernel_shape=1, nl=tf.identity,
use_bias=True, use_bias=True,
W_init=tf.constant_initializer(), W_init=tf.constant_initializer(),
b_init=tf.constant_initializer()) b_init=tf.constant_initializer())
while up != 1: while up != 1:
l = BilinearUpSample('upsample{}'.format(up), l, 2) l = BilinearUpSample('upsample{}'.format(up), l, 2)
up = up / 2 up = up / 2
...@@ -64,15 +67,15 @@ class Model(ModelDesc): ...@@ -64,15 +67,15 @@ class Model(ModelDesc):
b5 = branch('branch5', l, 16) b5 = branch('branch5', l, 16)
final_map = Conv2D('convfcweight', final_map = Conv2D('convfcweight',
tf.concat(3, [b1, b2, b3, b4, b5]), 1, 1, tf.concat(3, [b1, b2, b3, b4, b5]), 1, 1,
W_init=tf.constant_initializer(0.2), W_init=tf.constant_initializer(0.2),
use_bias=False, nl=tf.identity) use_bias=False, nl=tf.identity)
costs = [] costs = []
for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]): for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
output = tf.nn.sigmoid(b, name='output{}'.format(idx+1)) output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
xentropy = class_balanced_sigmoid_cross_entropy( xentropy = class_balanced_sigmoid_cross_entropy(
b, edgemap, b, edgemap,
name='xentropy{}'.format(idx+1)) name='xentropy{}'.format(idx + 1))
costs.append(xentropy) costs.append(xentropy)
# some magic threshold # some magic threshold
...@@ -91,13 +94,15 @@ class Model(ModelDesc): ...@@ -91,13 +94,15 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost') self.cost = tf.add_n(costs, name='cost')
def get_gradient_processor(self): def get_gradient_processor(self):
return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ] return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)])]
def get_data(name): def get_data(name):
isTrain = name == 'train' isTrain = name == 'train'
ds = dataset.BSDS500(name, shuffle=True) ds = dataset.BSDS500(name, shuffle=True)
class CropMultiple16(imgaug.ImageAugmentor): class CropMultiple16(imgaug.ImageAugmentor):
def _get_augment_params(self, img): def _get_augment_params(self, img):
newh = img.shape[0] // 16 * 16 newh = img.shape[0] // 16 * 16
neww = img.shape[1] // 16 * 16 neww = img.shape[1] // 16 * 16
...@@ -110,12 +115,12 @@ def get_data(name): ...@@ -110,12 +115,12 @@ def get_data(name):
def _augment(self, img, param): def _augment(self, img, param):
h0, w0, newh, neww = param h0, w0, newh, neww = param
return img[h0:h0+newh,w0:w0+neww] return img[h0:h0 + newh, w0:w0 + neww]
if isTrain: if isTrain:
shape_aug = [ shape_aug = [
imgaug.RandomResize(xrange=(0.7,1.5), yrange=(0.7,1.5), imgaug.RandomResize(xrange=(0.7, 1.5), yrange=(0.7, 1.5),
aspect_ratio_thres=0.15), aspect_ratio_thres=0.15),
imgaug.RotationAndCropValid(90), imgaug.RotationAndCropValid(90),
CropMultiple16(), CropMultiple16(),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
...@@ -128,15 +133,15 @@ def get_data(name): ...@@ -128,15 +133,15 @@ def get_data(name):
ds = AugmentImageComponents(ds, shape_aug, (0, 1)) ds = AugmentImageComponents(ds, shape_aug, (0, 1))
def f(m): def f(m):
m[m>=0.50] = 1 m[m >= 0.50] = 1
m[m<0.50] = 0 m[m < 0.50] = 0
return m return m
ds = MapDataComponent(ds, f, 1) ds = MapDataComponent(ds, f, 1)
if isTrain: if isTrain:
augmentors = [ augmentors = [
imgaug.Brightness(63, clip=False), imgaug.Brightness(63, clip=False),
imgaug.Contrast((0.4,1.5)), imgaug.Contrast((0.4, 1.5)),
] ]
ds = AugmentImageComponent(ds, augmentors) ds = AugmentImageComponent(ds, augmentors)
ds = BatchDataByShape(ds, 8, idx=0) ds = BatchDataByShape(ds, 8, idx=0)
...@@ -145,6 +150,7 @@ def get_data(name): ...@@ -145,6 +150,7 @@ def get_data(name):
ds = BatchData(ds, 1) ds = BatchData(ds, 1)
return ds return ds
def view_data(): def view_data():
ds = RepeatedData(get_data('train'), -1) ds = RepeatedData(get_data('train'), -1)
ds.reset_state() ds.reset_state()
...@@ -156,6 +162,7 @@ def view_data(): ...@@ -156,6 +162,7 @@ def view_data():
cv2.imshow("edge", edgemap) cv2.imshow("edge", edgemap)
cv2.waitKey(1000) cv2.waitKey(1000)
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset_train = get_data('train') dataset_train = get_data('train')
...@@ -178,12 +185,13 @@ def get_config(): ...@@ -178,12 +185,13 @@ def get_config():
max_epoch=100, max_epoch=100,
) )
def run(model_path, image_path, output): def run(model_path, image_path, output):
pred_config = PredictConfig( pred_config = PredictConfig(
model=Model(), model=Model(),
session_init=get_model_loader(model_path), session_init=get_model_loader(model_path),
input_names=['image'], input_names=['image'],
output_names=['output' + str(k) for k in range(1, 7)]) output_names=['output' + str(k) for k in range(1, 7)])
predict_func = get_predict_func(pred_config) predict_func = get_predict_func(pred_config)
im = cv2.imread(image_path) im = cv2.imread(image_path)
assert im is not None assert im is not None
...@@ -193,7 +201,7 @@ def run(model_path, image_path, output): ...@@ -193,7 +201,7 @@ def run(model_path, image_path, output):
for k in range(6): for k in range(6):
pred = outputs[k][0] pred = outputs[k][0]
cv2.imwrite("out{}.png".format( cv2.imwrite("out{}.png".format(
'-fused' if k == 5 else str(k+1)), pred * 255) '-fused' if k == 5 else str(k + 1)), pred * 255)
else: else:
pred = outputs[5][0] pred = outputs[5][0]
cv2.imwrite(output, pred * 255) cv2.imwrite(output, pred * 255)
......
...@@ -27,10 +27,12 @@ This config reaches 71% single-crop validation accuracy after 150k steps with 6 ...@@ -27,10 +27,12 @@ This config reaches 71% single-crop validation accuracy after 150k steps with 6
Learning rate may need a different schedule for different number of GPUs (because batch size will be different). Learning rate may need a different schedule for different number of GPUs (because batch size will be different).
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -117,6 +119,7 @@ class Model(ModelDesc): ...@@ -117,6 +119,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False) ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False)
...@@ -128,7 +131,7 @@ def get_data(train_or_test): ...@@ -128,7 +131,7 @@ def get_data(train_or_test):
augmentors = [ augmentors = [
imgaug.Resize((256, 256)), imgaug.Resize((256, 256)),
imgaug.Brightness(30, False), imgaug.Brightness(30, False),
imgaug.Contrast((0.8,1.2), True), imgaug.Contrast((0.8, 1.2), True),
imgaug.MapImage(lambda x: x - pp_mean), imgaug.MapImage(lambda x: x - pp_mean),
imgaug.RandomCrop((224, 224)), imgaug.RandomCrop((224, 224)),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
...@@ -166,7 +169,7 @@ def get_config(): ...@@ -166,7 +169,7 @@ def get_config():
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(8, 0.03), (14, 0.02), (17, 5e-3), [(8, 0.03), (14, 0.02), (17, 5e-3),
(19, 3e-3), (24, 1e-3), (26, 2e-4), (19, 3e-3), (24, 1e-3), (26, 2e-4),
(30, 5e-5) ]) (30, 5e-5)])
]), ]),
session_config=get_default_sess_config(0.99), session_config=get_default_sess_config(0.99),
model=Model(), model=Model(),
......
...@@ -32,10 +32,12 @@ NR_GPU = 8 ...@@ -32,10 +32,12 @@ NR_GPU = 8
BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU
INPUT_SHAPE = 299 INPUT_SHAPE = 299
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -61,28 +63,28 @@ class Model(ModelDesc): ...@@ -61,28 +63,28 @@ class Model(ModelDesc):
def proj_77(l, ch_r, ch): def proj_77(l, ch_r, ch):
return (LinearWrap(l) return (LinearWrap(l)
.Conv2D('conv77r', ch_r, 1) .Conv2D('conv77r', ch_r, 1)
.Conv2D('conv77a', ch_r, [1,7]) .Conv2D('conv77a', ch_r, [1, 7])
.Conv2D('conv77b', ch, [7,1])()) .Conv2D('conv77b', ch, [7, 1])())
def proj_277(l, ch_r, ch): def proj_277(l, ch_r, ch):
return (LinearWrap(l) return (LinearWrap(l)
.Conv2D('conv277r', ch_r, 1) .Conv2D('conv277r', ch_r, 1)
.Conv2D('conv277aa', ch_r, [7,1]) .Conv2D('conv277aa', ch_r, [7, 1])
.Conv2D('conv277ab', ch_r, [1,7]) .Conv2D('conv277ab', ch_r, [1, 7])
.Conv2D('conv277ba', ch_r, [7,1]) .Conv2D('conv277ba', ch_r, [7, 1])
.Conv2D('conv277bb', ch, [1,7])()) .Conv2D('conv277bb', ch, [1, 7])())
with argscope(Conv2D, nl=BNReLU, use_bias=False),\ with argscope(Conv2D, nl=BNReLU, use_bias=False),\
argscope(BatchNorm, decay=0.9997, epsilon=1e-3): argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
l = (LinearWrap(image) l = (LinearWrap(image)
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299 .Conv2D('conv0', 32, 3, stride=2, padding='VALID') # 299
.Conv2D('conv1', 32, 3, padding='VALID') #149 .Conv2D('conv1', 32, 3, padding='VALID') # 149
.Conv2D('conv2', 64, 3, padding='SAME') # 147 .Conv2D('conv2', 64, 3, padding='SAME') # 147
.MaxPooling('pool2', 3, 2) .MaxPooling('pool2', 3, 2)
.Conv2D('conv3', 80, 1, padding='SAME') # 73 .Conv2D('conv3', 80, 1, padding='SAME') # 73
.Conv2D('conv4', 192, 3, padding='VALID') # 71 .Conv2D('conv4', 192, 3, padding='VALID') # 71
.MaxPooling('pool4', 3, 2)()) # 35 .MaxPooling('pool4', 3, 2)()) # 35
with tf.variable_scope('incep-35-256a'): with tf.variable_scope('incep-35-256a'):
l = tf.concat(3, [ l = tf.concat(3, [
...@@ -140,7 +142,7 @@ class Model(ModelDesc): ...@@ -140,7 +142,7 @@ class Model(ModelDesc):
br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID') br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID')
br1 = Conv2D('conv11', br1, 128, 1) br1 = Conv2D('conv11', br1, 128, 1)
shape = br1.get_shape().as_list() shape = br1.get_shape().as_list()
br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID') # TODO gauss, stddev=0.01 br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID') # TODO gauss, stddev=0.01
br1 = FullyConnected('fc', br1, 1000, nl=tf.identity) br1 = FullyConnected('fc', br1, 1000, nl=tf.identity)
with tf.variable_scope('incep-17-1280a'): with tf.variable_scope('incep-17-1280a'):
...@@ -194,27 +196,30 @@ class Model(ModelDesc): ...@@ -194,27 +196,30 @@ class Model(ModelDesc):
self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost') self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
ds = dataset.ILSVRC12(args.data, train_or_test, ds = dataset.ILSVRC12(args.data, train_or_test,
shuffle=True if isTrain else False, dir_structure='train') shuffle=True if isTrain else False, dir_structure='train')
meta = dataset.ILSVRCMeta() meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_299 = cv2.resize(pp_mean, (299, 299)) pp_mean_299 = cv2.resize(pp_mean, (299, 299))
if isTrain: if isTrain:
class Resize(imgaug.ImageAugmentor): class Resize(imgaug.ImageAugmentor):
def __init__(self): def __init__(self):
self._init(locals()) self._init(locals())
def _augment(self, img, _): def _augment(self, img, _):
h, w = img.shape[:2] h, w = img.shape[:2]
size = 299 size = 299
scale = self.rng.randint(size, 340) * 1.0 / min(h, w) scale = self.rng.randint(size, 340) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15) scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15) scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\ desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h)))) max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC) dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return dst return dst
...@@ -224,11 +229,11 @@ def get_data(train_or_test): ...@@ -224,11 +229,11 @@ def get_data(train_or_test):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5), imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True), imgaug.Brightness(30, True),
imgaug.Gamma(), imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True), imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((299, 299)), imgaug.RandomCrop((299, 299)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8), imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform( imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(299, 299), 0.2, 3), 0.1), (299, 299), 0.2, 3), 0.1),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_299), imgaug.MapImage(lambda x: x - pp_mean_299),
...@@ -237,7 +242,7 @@ def get_data(train_or_test): ...@@ -237,7 +242,7 @@ def get_data(train_or_test):
def resize_func(im): def resize_func(im):
h, w = im.shape[:2] h, w = im.shape[:2]
scale = 340.0 / min(h, w) scale = 340.0 / min(h, w)
desSize = map(int, (max(299, min(w, scale * w)),\ desSize = map(int, (max(299, min(w, scale * w)),
max(299, min(h, scale * h)))) max(299, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC) im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im return im
......
...@@ -5,7 +5,10 @@ ...@@ -5,7 +5,10 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys, re, time import os
import sys
import re
import time
import random import random
import argparse import argparse
import six import six
...@@ -23,6 +26,7 @@ ENV_NAME = None ...@@ -23,6 +26,7 @@ ENV_NAME = None
from common import play_one_episode from common import play_one_episode
def get_player(dumpdir=None): def get_player(dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False) pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1])) pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
...@@ -33,12 +37,14 @@ def get_player(dumpdir=None): ...@@ -33,12 +37,14 @@ def get_player(dumpdir=None):
pl = HistoryFramePlayer(pl, FRAME_HISTORY) pl = HistoryFramePlayer(pl, FRAME_HISTORY)
return pl return pl
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
assert NUM_ACTIONS is not None assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'), return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int32, (None,), 'action'), InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ] InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image): def _get_NN_prediction(self, image):
image = image / 255.0 image = image / 255.0
...@@ -61,6 +67,7 @@ class Model(ModelDesc): ...@@ -61,6 +67,7 @@ class Model(ModelDesc):
policy = self._get_NN_prediction(state) policy = self._get_NN_prediction(state)
self.logits = tf.nn.softmax(policy, name='logits') self.logits = tf.nn.softmax(policy, name='logits')
def run_submission(cfg, output, nr): def run_submission(cfg, output, nr):
player = get_player(dumpdir=output) player = get_player(dumpdir=output)
predfunc = get_predict_func(cfg) predfunc = get_predict_func(cfg)
...@@ -71,6 +78,7 @@ def run_submission(cfg, output, nr): ...@@ -71,6 +78,7 @@ def run_submission(cfg, output, nr):
score = play_one_episode(player, predfunc) score = play_one_episode(player, predfunc)
print("Score:", score) print("Score:", score)
def do_submit(output): def do_submit(output):
gym.upload(output, api_key='xxx') gym.upload(output, api_key='xxx')
...@@ -80,21 +88,22 @@ if __name__ == '__main__': ...@@ -80,21 +88,22 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load model', required=True) parser.add_argument('--load', help='load model', required=True)
parser.add_argument('--env', help='environment name', required=True) parser.add_argument('--env', help='environment name', required=True)
parser.add_argument('--episode', help='number of episodes to run', parser.add_argument('--episode', help='number of episodes to run',
type=int, default=100) type=int, default=100)
parser.add_argument('--output', help='output directory', default='gym-submit') parser.add_argument('--output', help='output directory', default='gym-submit')
args = parser.parse_args() args = parser.parse_args()
ENV_NAME = args.env ENV_NAME = args.env
assert ENV_NAME assert ENV_NAME
logger.info("Environment Name: {}".format(ENV_NAME)) logger.info("Environment Name: {}".format(ENV_NAME))
p = get_player(); del p # set NUM_ACTIONS p = get_player()
del p # set NUM_ACTIONS
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
cfg = PredictConfig( cfg = PredictConfig(
model=Model(), model=Model(),
session_init=SaverRestore(args.load), session_init=SaverRestore(args.load),
input_names=['state'], input_names=['state'],
output_names=['logits']) output_names=['logits'])
run_submission(cfg, args.output, args.episode) run_submission(cfg, args.output, args.episode)
...@@ -5,11 +5,15 @@ ...@@ -5,11 +5,15 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys, re, time import os
import sys
import re
import time
import random import random
import uuid import uuid
import argparse import argparse
import multiprocessing, threading import multiprocessing
import threading
from collections import deque from collections import deque
import six import six
from six.moves import queue from six.moves import queue
...@@ -17,7 +21,7 @@ from six.moves import queue ...@@ -17,7 +21,7 @@ from six.moves import queue
from tensorpack import * from tensorpack import *
from tensorpack.utils.concurrency import * from tensorpack.utils.concurrency import *
from tensorpack.utils.serialize import * from tensorpack.utils.serialize import *
from tensorpack.utils.stats import * from tensorpack.utils.stats import *
from tensorpack.tfutils import symbolic_functions as symbf from tensorpack.tfutils import symbolic_functions as symbf
from tensorpack.RL import * from tensorpack.RL import *
...@@ -42,8 +46,10 @@ EVALUATE_PROC = min(multiprocessing.cpu_count() // 2, 20) ...@@ -42,8 +46,10 @@ EVALUATE_PROC = min(multiprocessing.cpu_count() // 2, 20)
NUM_ACTIONS = None NUM_ACTIONS = None
ENV_NAME = None ENV_NAME = None
def get_player(viz=False, train=False, dumpdir=None): def get_player(viz=False, train=False, dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir) pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
def func(img): def func(img):
return cv2.resize(img, IMAGE_SIZE[::-1]) return cv2.resize(img, IMAGE_SIZE[::-1])
pl = MapPlayerState(pl, func) pl = MapPlayerState(pl, func)
...@@ -58,16 +64,20 @@ def get_player(viz=False, train=False, dumpdir=None): ...@@ -58,16 +64,20 @@ def get_player(viz=False, train=False, dumpdir=None):
return pl return pl
common.get_player = get_player common.get_player = get_player
class MySimulatorWorker(SimulatorProcess): class MySimulatorWorker(SimulatorProcess):
def _build_player(self): def _build_player(self):
return get_player(train=True) return get_player(train=True)
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
assert NUM_ACTIONS is not None assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'), return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'), InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ] InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image): def _get_NN_prediction(self, image):
image = image / 255.0 image = image / 255.0
...@@ -89,11 +99,11 @@ class Model(ModelDesc): ...@@ -89,11 +99,11 @@ class Model(ModelDesc):
def _build_graph(self, inputs): def _build_graph(self, inputs):
state, action, futurereward = inputs state, action, futurereward = inputs
policy, self.value = self._get_NN_prediction(state) policy, self.value = self._get_NN_prediction(state)
self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,) self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,)
self.logits = tf.nn.softmax(policy, name='logits') self.logits = tf.nn.softmax(policy, name='logits')
expf = tf.get_variable('explore_factor', shape=[], expf = tf.get_variable('explore_factor', shape=[],
initializer=tf.constant_initializer(1), trainable=False) initializer=tf.constant_initializer(1), trainable=False)
logitsT = tf.nn.softmax(policy * expf, name='logitsT') logitsT = tf.nn.softmax(policy * expf, name='logitsT')
is_training = get_current_tower_context().is_training is_training = get_current_tower_context().is_training
if not is_training: if not is_training:
...@@ -101,38 +111,40 @@ class Model(ModelDesc): ...@@ -101,38 +111,40 @@ class Model(ModelDesc):
log_probs = tf.log(self.logits + 1e-6) log_probs = tf.log(self.logits + 1e-6)
log_pi_a_given_s = tf.reduce_sum( log_pi_a_given_s = tf.reduce_sum(
log_probs * tf.one_hot(action, NUM_ACTIONS), 1) log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
advantage = tf.sub(tf.stop_gradient(self.value), futurereward, name='advantage') advantage = tf.sub(tf.stop_gradient(self.value), futurereward, name='advantage')
policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage, name='policy_loss') policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage, name='policy_loss')
xentropy_loss = tf.reduce_sum( xentropy_loss = tf.reduce_sum(
self.logits * log_probs, name='xentropy_loss') self.logits * log_probs, name='xentropy_loss')
value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss') value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss')
pred_reward = tf.reduce_mean(self.value, name='predict_reward') pred_reward = tf.reduce_mean(self.value, name='predict_reward')
advantage = symbf.rms(advantage, name='rms_advantage') advantage = symbf.rms(advantage, name='rms_advantage')
summary.add_moving_summary(policy_loss, xentropy_loss, value_loss, pred_reward, advantage) summary.add_moving_summary(policy_loss, xentropy_loss, value_loss, pred_reward, advantage)
entropy_beta = tf.get_variable('entropy_beta', shape=[], entropy_beta = tf.get_variable('entropy_beta', shape=[],
initializer=tf.constant_initializer(0.01), trainable=False) initializer=tf.constant_initializer(0.01), trainable=False)
self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss]) self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
self.cost = tf.truediv(self.cost, self.cost = tf.truediv(self.cost,
tf.cast(tf.shape(futurereward)[0], tf.float32), tf.cast(tf.shape(futurereward)[0], tf.float32),
name='cost') name='cost')
def get_gradient_processor(self): def get_gradient_processor(self):
return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)), return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()] SummaryGradient()]
class MySimulatorMaster(SimulatorMaster, Callback): class MySimulatorMaster(SimulatorMaster, Callback):
def __init__(self, pipe_c2s, pipe_s2c, model): def __init__(self, pipe_c2s, pipe_s2c, model):
super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c) super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
self.M = model self.M = model
self.queue = queue.Queue(maxsize=BATCH_SIZE*8*2) self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
def _setup_graph(self): def _setup_graph(self):
self.sess = self.trainer.sess self.sess = self.trainer.sess
self.async_predictor = MultiThreadAsyncPredictor( self.async_predictor = MultiThreadAsyncPredictor(
self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'], self.trainer.get_predict_funcs(['state'], ['logitsT', 'pred_value'],
PREDICTOR_THREAD), batch_size=15) PREDICTOR_THREAD), batch_size=15)
self.async_predictor.run() self.async_predictor.run()
def _on_state(self, state, ident): def _on_state(self, state, ident):
...@@ -172,6 +184,7 @@ class MySimulatorMaster(SimulatorMaster, Callback): ...@@ -172,6 +184,7 @@ class MySimulatorMaster(SimulatorMaster, Callback):
else: else:
client.memory = [] client.memory = []
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
M = Model() M = Model()
...@@ -196,7 +209,7 @@ def get_config(): ...@@ -196,7 +209,7 @@ def get_config():
ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]),
ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
ScheduledHyperParamSetter('explore_factor', ScheduledHyperParamSetter('explore_factor',
[(80, 2), (100, 3), (120, 4), (140, 5)]), [(80, 2), (100, 3), (120, 4), (140, 5)]),
master, master,
StartProcOrThread(master), StartProcOrThread(master),
PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 2), PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 2),
...@@ -213,12 +226,13 @@ if __name__ == '__main__': ...@@ -213,12 +226,13 @@ if __name__ == '__main__':
parser.add_argument('--load', help='load model') parser.add_argument('--load', help='load model')
parser.add_argument('--env', help='env', required=True) parser.add_argument('--env', help='env', required=True)
parser.add_argument('--task', help='task to perform', parser.add_argument('--task', help='task to perform',
choices=['play', 'eval', 'train'], default='train') choices=['play', 'eval', 'train'], default='train')
args = parser.parse_args() args = parser.parse_args()
ENV_NAME = args.env ENV_NAME = args.env
assert ENV_NAME assert ENV_NAME
p = get_player(); del p # set NUM_ACTIONS p = get_player()
del p # set NUM_ACTIONS
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
...@@ -227,10 +241,10 @@ if __name__ == '__main__': ...@@ -227,10 +241,10 @@ if __name__ == '__main__':
if args.task != 'train': if args.task != 'train':
cfg = PredictConfig( cfg = PredictConfig(
model=Model(), model=Model(),
session_init=SaverRestore(args.load), session_init=SaverRestore(args.load),
input_names=['state'], input_names=['state'],
output_names=['logits']) output_names=['logits'])
if args.task == 'play': if args.task == 'play':
play_model(cfg) play_model(cfg)
elif args.task == 'eval': elif args.task == 'eval':
...@@ -239,11 +253,11 @@ if __name__ == '__main__': ...@@ -239,11 +253,11 @@ if __name__ == '__main__':
if args.gpu: if args.gpu:
nr_gpu = get_nr_gpu() nr_gpu = get_nr_gpu()
if nr_gpu > 1: if nr_gpu > 1:
predict_tower = range(nr_gpu)[-nr_gpu//2:] predict_tower = range(nr_gpu)[-nr_gpu // 2:]
else: else:
predict_tower = [0] predict_tower = [0]
PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
train_tower = range(nr_gpu)[:-nr_gpu//2] or [0] train_tower = range(nr_gpu)[:-nr_gpu // 2] or [0]
logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format( logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format(
','.join(map(str, train_tower)), ','.join(map(str, predict_tower)))) ','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
trainer = AsyncMultiGPUTrainer trainer = AsyncMultiGPUTrainer
......
...@@ -30,14 +30,16 @@ This model uses the whole training set instead of a train-val split. ...@@ -30,14 +30,16 @@ This model uses the whole training set instead of a train-val split.
BATCH_SIZE = 128 BATCH_SIZE = 128
NUM_UNITS = None NUM_UNITS = None
class Model(ModelDesc): class Model(ModelDesc):
def __init__(self, n): def __init__(self, n):
super(Model, self).__init__() super(Model, self).__init__()
self.n = n self.n = n
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 32, 32, 3], 'input'), return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -60,13 +62,13 @@ class Model(ModelDesc): ...@@ -60,13 +62,13 @@ class Model(ModelDesc):
c2 = Conv2D('conv2', c1, out_channel) c2 = Conv2D('conv2', c1, out_channel)
if increase_dim: if increase_dim:
l = AvgPooling('pool', l, 2) l = AvgPooling('pool', l, 2)
l = tf.pad(l, [[0,0], [0,0], [0,0], [in_channel//2, in_channel//2]]) l = tf.pad(l, [[0, 0], [0, 0], [0, 0], [in_channel // 2, in_channel // 2]])
l = c2 + l l = c2 + l
return l return l
with argscope(Conv2D, nl=tf.identity, use_bias=False, kernel_shape=3, with argscope(Conv2D, nl=tf.identity, use_bias=False, kernel_shape=3,
W_init=variance_scaling_initializer(mode='FAN_OUT')): W_init=variance_scaling_initializer(mode='FAN_OUT')):
l = Conv2D('conv0', image, 16, nl=BNReLU) l = Conv2D('conv0', image, 16, nl=BNReLU)
l = residual('res1.0', l, first=True) l = residual('res1.0', l, first=True)
for k in range(1, self.n): for k in range(1, self.n):
...@@ -104,6 +106,7 @@ class Model(ModelDesc): ...@@ -104,6 +106,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test) ds = dataset.Cifar10(train_or_test)
...@@ -125,6 +128,7 @@ def get_data(train_or_test): ...@@ -125,6 +128,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 3, 2) ds = PrefetchData(ds, 3, 2)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -140,7 +144,7 @@ def get_config(): ...@@ -140,7 +144,7 @@ def get_config():
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError()]), [ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)]) [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
]), ]),
...@@ -153,8 +157,8 @@ if __name__ == '__main__': ...@@ -153,8 +157,8 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.') parser.add_argument('--gpu', help='comma separated list of GPU(s) to use.')
parser.add_argument('-n', '--num_units', parser.add_argument('-n', '--num_units',
help='number of units in each stage', help='number of units in each stage',
type=int, default=18) type=int, default=18)
parser.add_argument('--load', help='load model') parser.add_argument('--load', help='load model')
args = parser.parse_args() args = parser.parse_args()
NUM_UNITS = args.num_units NUM_UNITS = args.num_units
......
...@@ -26,10 +26,12 @@ TOTAL_BATCH_SIZE = 256 ...@@ -26,10 +26,12 @@ TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224 INPUT_SHAPE = 224
DEPTH = None DEPTH = None
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -73,32 +75,32 @@ class Model(ModelDesc): ...@@ -73,32 +75,32 @@ class Model(ModelDesc):
with tf.variable_scope(layername): with tf.variable_scope(layername):
with tf.variable_scope('block0'): with tf.variable_scope('block0'):
l = block_func(l, features, stride, l = block_func(l, features, stride,
'no_preact' if first else 'both_preact') 'no_preact' if first else 'both_preact')
for i in range(1, count): for i in range(1, count):
with tf.variable_scope('block{}'.format(i)): with tf.variable_scope('block{}'.format(i)):
l = block_func(l, features, 1, 'default') l = block_func(l, features, 1, 'default')
return l return l
cfg = { cfg = {
18: ([2,2,2,2], basicblock), 18: ([2, 2, 2, 2], basicblock),
34: ([3,4,6,3], basicblock), 34: ([3, 4, 6, 3], basicblock),
50: ([3,4,6,3], bottleneck), 50: ([3, 4, 6, 3], bottleneck),
101: ([3,4,23,3], bottleneck) 101: ([3, 4, 23, 3], bottleneck)
} }
defs, block_func = cfg[DEPTH] defs, block_func = cfg[DEPTH]
with argscope(Conv2D, nl=tf.identity, use_bias=False, with argscope(Conv2D, nl=tf.identity, use_bias=False,
W_init=variance_scaling_initializer(mode='FAN_OUT')): W_init=variance_scaling_initializer(mode='FAN_OUT')):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU) .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', block_func, 64, defs[0], 1, first=True) .apply(layer, 'group0', block_func, 64, defs[0], 1, first=True)
.apply(layer, 'group1', block_func, 128, defs[1], 2) .apply(layer, 'group1', block_func, 128, defs[1], 2)
.apply(layer, 'group2', block_func, 256, defs[2], 2) .apply(layer, 'group2', block_func, 256, defs[2], 2)
.apply(layer, 'group3', block_func, 512, defs[3], 2) .apply(layer, 'group3', block_func, 512, defs[3], 2)
.BNReLU('bnlast') .BNReLU('bnlast')
.GlobalAvgPooling('gap') .GlobalAvgPooling('gap')
.FullyConnected('linear', 1000, nl=tf.identity)()) .FullyConnected('linear', 1000, nl=tf.identity)())
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
loss = tf.reduce_mean(loss, name='xentropy-loss') loss = tf.reduce_mean(loss, name='xentropy-loss')
...@@ -113,12 +115,13 @@ class Model(ModelDesc): ...@@ -113,12 +115,13 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost) add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost') self.cost = tf.add_n([loss, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
datadir = args.data datadir = args.data
ds = dataset.ILSVRC12(datadir, train_or_test, ds = dataset.ILSVRC12(datadir, train_or_test,
shuffle=True if isTrain else False, dir_structure='original') shuffle=True if isTrain else False, dir_structure='original')
image_mean = np.array([0.485, 0.456, 0.406], dtype='float32') image_mean = np.array([0.485, 0.456, 0.406], dtype='float32')
image_std = np.array([0.229, 0.224, 0.225], dtype='float32') image_std = np.array([0.229, 0.224, 0.225], dtype='float32')
...@@ -128,12 +131,13 @@ def get_data(train_or_test): ...@@ -128,12 +131,13 @@ def get_data(train_or_test):
crop 8%~100% of the original image crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google. See `Going Deeper with Convolutions` by Google.
""" """
def _augment(self, img, _): def _augment(self, img, _):
h, w = img.shape[:2] h, w = img.shape[:2]
area = h * w area = h * w
for _ in range(10): for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75,1.333) aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR)) ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR)) hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5: if self.rng.uniform() < 0.5:
...@@ -141,10 +145,10 @@ def get_data(train_or_test): ...@@ -141,10 +145,10 @@ def get_data(train_or_test):
if hh <= h and ww <= w: if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww) x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh) y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1+hh,x1:x1+ww] out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224,224), interpolation=cv2.INTER_CUBIC) out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out return out
out = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC) out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out return out
augmentors = [ augmentors = [
...@@ -154,11 +158,11 @@ def get_data(train_or_test): ...@@ -154,11 +158,11 @@ def get_data(train_or_test):
imgaug.Contrast((0.8, 1.2), clip=False), imgaug.Contrast((0.8, 1.2), clip=False),
imgaug.Saturation(0.4), imgaug.Saturation(0.4),
imgaug.Lighting(0.1, imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045], eigval=[0.2175, 0.0188, 0.0045],
eigvec=[[ -0.5675, 0.7192, 0.4009], eigvec=[[-0.5675, 0.7192, 0.4009],
[ -0.5808, -0.0045, -0.8140], [-0.5808, -0.0045, -0.8140],
[ -0.5836, -0.6948, 0.4203]] [-0.5836, -0.6948, 0.4203]]
)]), )]),
imgaug.Clip(), imgaug.Clip(),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std), imgaug.MapImage(lambda x: (x * (1.0 / 255) - image_mean) / image_std),
...@@ -175,6 +179,7 @@ def get_data(train_or_test): ...@@ -175,6 +179,7 @@ def get_data(train_or_test):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count())) ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds return ds
def get_config(): def get_config():
# prepare dataset # prepare dataset
dataset_train = get_data('train') dataset_train = get_data('train')
...@@ -190,7 +195,7 @@ def get_config(): ...@@ -190,7 +195,7 @@ def get_config():
ClassificationError('wrong-top1', 'val-error-top1'), ClassificationError('wrong-top1', 'val-error-top1'),
ClassificationError('wrong-top5', 'val-error-top5')]), ClassificationError('wrong-top5', 'val-error-top5')]),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]), [(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
]), ]),
model=Model(), model=Model(),
...@@ -198,6 +203,7 @@ def get_config(): ...@@ -198,6 +203,7 @@ def get_config():
max_epoch=110, max_epoch=110,
) )
def eval_on_ILSVRC12(model_file, data_dir): def eval_on_ILSVRC12(model_file, data_dir):
ds = get_data('val') ds = get_data('val')
pred_config = PredictConfig( pred_config = PredictConfig(
...@@ -221,7 +227,7 @@ if __name__ == '__main__': ...@@ -221,7 +227,7 @@ if __name__ == '__main__':
parser.add_argument('--data', help='ILSVRC dataset dir') parser.add_argument('--data', help='ILSVRC dataset dir')
parser.add_argument('--load', help='load model') parser.add_argument('--load', help='load model')
parser.add_argument('-d', '--depth', help='resnet depth', parser.add_argument('-d', '--depth', help='resnet depth',
type=int, default=18, choices=[18, 34, 50, 101]) type=int, default=18, choices=[18, 34, 50, 101])
parser.add_argument('--eval', action='store_true') parser.add_argument('--eval', action='store_true')
args = parser.parse_args() args = parser.parse_args()
......
...@@ -7,7 +7,8 @@ ...@@ -7,7 +7,8 @@
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
import argparse import argparse
import os, re import os
import re
import numpy as np import numpy as np
import six import six
from six.moves import zip from six.moves import zip
...@@ -22,7 +23,9 @@ from tensorpack.dataflow.dataset import ILSVRCMeta ...@@ -22,7 +23,9 @@ from tensorpack.dataflow.dataset import ILSVRCMeta
MODEL_DEPTH = None MODEL_DEPTH = None
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'), return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label')] InputVar(tf.int32, [None], 'label')]
...@@ -57,48 +60,49 @@ class Model(ModelDesc): ...@@ -57,48 +60,49 @@ class Model(ModelDesc):
with tf.variable_scope(layername): with tf.variable_scope(layername):
with tf.variable_scope('block0'): with tf.variable_scope('block0'):
l = bottleneck(l, features, stride, l = bottleneck(l, features, stride,
'no_preact' if first else 'both_preact') 'no_preact' if first else 'both_preact')
for i in range(1, count): for i in range(1, count):
with tf.variable_scope('block{}'.format(i)): with tf.variable_scope('block{}'.format(i)):
l = bottleneck(l, features, 1, 'both_preact') l = bottleneck(l, features, 1, 'both_preact')
return l return l
cfg = { cfg = {
50: ([3,4,6,3]), 50: ([3, 4, 6, 3]),
101: ([3,4,23,3]), 101: ([3, 4, 23, 3]),
152: ([3,8,36,3]) 152: ([3, 8, 36, 3])
} }
defs = cfg[MODEL_DEPTH] defs = cfg[MODEL_DEPTH]
with argscope(Conv2D, nl=tf.identity, use_bias=False, with argscope(Conv2D, nl=tf.identity, use_bias=False,
W_init=variance_scaling_initializer(mode='FAN_OUT')): W_init=variance_scaling_initializer(mode='FAN_OUT')):
# tensorflow with padding=SAME will by default pad [2,3] here. # tensorflow with padding=SAME will by default pad [2,3] here.
# but caffe conv with stride will pad [3,3] # but caffe conv with stride will pad [3,3]
image = tf.pad(image, [[0,0],[3,3],[3,3],[0,0]]) image = tf.pad(image, [[0, 0], [3, 3], [3, 3], [0, 0]])
fc1000 = (LinearWrap(image) fc1000 = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID') .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME') .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
.apply(layer, 'group0', 64, defs[0], 1, first=True) .apply(layer, 'group0', 64, defs[0], 1, first=True)
.apply(layer, 'group1', 128, defs[1], 2) .apply(layer, 'group1', 128, defs[1], 2)
.apply(layer, 'group2', 256, defs[2], 2) .apply(layer, 'group2', 256, defs[2], 2)
.apply(layer, 'group3', 512, defs[3], 2) .apply(layer, 'group3', 512, defs[3], 2)
.tf.nn.relu() .tf.nn.relu()
.GlobalAvgPooling('gap') .GlobalAvgPooling('gap')
.FullyConnected('fc1000', 1000, nl=tf.identity)()) .FullyConnected('fc1000', 1000, nl=tf.identity)())
prob = tf.nn.softmax(fc1000, name='prob') prob = tf.nn.softmax(fc1000, name='prob')
nr_wrong = prediction_incorrect(fc1000, label, name='wrong-top1') nr_wrong = prediction_incorrect(fc1000, label, name='wrong-top1')
nr_wrong = prediction_incorrect(fc1000, label, 5, name='wrong-top5') nr_wrong = prediction_incorrect(fc1000, label, 5, name='wrong-top5')
def get_inference_augmentor(): def get_inference_augmentor():
# load ResNet mean from Kaiming: # load ResNet mean from Kaiming:
#from tensorpack.utils.loadcaffe import get_caffe_pb #from tensorpack.utils.loadcaffe import get_caffe_pb
#obj = get_caffe_pb().BlobProto() #obj = get_caffe_pb().BlobProto()
#obj.ParseFromString(open('ResNet_mean.binaryproto').read()) # obj.ParseFromString(open('ResNet_mean.binaryproto').read())
#pp_mean_224 = np.array(obj.data).reshape(3, 224, 224).transpose(1,2,0) #pp_mean_224 = np.array(obj.data).reshape(3, 224, 224).transpose(1,2,0)
meta = ILSVRCMeta() meta = ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean() pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:] pp_mean_224 = pp_mean[16:-16, 16:-16, :]
transformers = imgaug.AugmentorList([ transformers = imgaug.AugmentorList([
imgaug.ResizeShortestEdge(256), imgaug.ResizeShortestEdge(256),
...@@ -107,6 +111,7 @@ def get_inference_augmentor(): ...@@ -107,6 +111,7 @@ def get_inference_augmentor():
]) ])
return transformers return transformers
def run_test(params, input): def run_test(params, input):
pred_config = PredictConfig( pred_config = PredictConfig(
model=Model(), model=Model(),
...@@ -119,7 +124,7 @@ def run_test(params, input): ...@@ -119,7 +124,7 @@ def run_test(params, input):
prepro = get_inference_augmentor() prepro = get_inference_augmentor()
im = cv2.imread(input).astype('float32') im = cv2.imread(input).astype('float32')
im = prepro.augment(im) im = prepro.augment(im)
im = np.reshape( im, (1, 224, 224, 3)) im = np.reshape(im, (1, 224, 224, 3))
outputs = predict_func([im]) outputs = predict_func([im])
prob = outputs[0] prob = outputs[0]
...@@ -128,6 +133,7 @@ def run_test(params, input): ...@@ -128,6 +133,7 @@ def run_test(params, input):
meta = ILSVRCMeta().get_synset_words_1000() meta = ILSVRCMeta().get_synset_words_1000()
print([meta[k] for k in ret]) print([meta[k] for k in ret])
def eval_on_ILSVRC12(params, data_dir): def eval_on_ILSVRC12(params, data_dir):
ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False, dir_structure='train') ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False, dir_structure='train')
ds = AugmentImageComponent(ds, get_inference_augmentor()) ds = AugmentImageComponent(ds, get_inference_augmentor())
...@@ -147,16 +153,17 @@ def eval_on_ILSVRC12(params, data_dir): ...@@ -147,16 +153,17 @@ def eval_on_ILSVRC12(params, data_dir):
print("Top1 Error: {}".format(acc1.ratio)) print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio)) print("Top5 Error: {}".format(acc5.ratio))
def name_conversion(caffe_layer_name): def name_conversion(caffe_layer_name):
""" Convert a caffe parameter name to a tensorflow parameter name as """ Convert a caffe parameter name to a tensorflow parameter name as
defined in the above model """ defined in the above model """
# beginning & end mapping # beginning & end mapping
NAME_MAP = {'bn_conv1/beta': 'conv0/bn/beta', NAME_MAP = {'bn_conv1/beta': 'conv0/bn/beta',
'bn_conv1/gamma': 'conv0/bn/gamma', 'bn_conv1/gamma': 'conv0/bn/gamma',
'bn_conv1/mean/EMA': 'conv0/bn/mean/EMA', 'bn_conv1/mean/EMA': 'conv0/bn/mean/EMA',
'bn_conv1/variance/EMA': 'conv0/bn/variance/EMA', 'bn_conv1/variance/EMA': 'conv0/bn/variance/EMA',
'conv1/W': 'conv0/W', 'conv1/b': 'conv0/b', 'conv1/W': 'conv0/W', 'conv1/b': 'conv0/b',
'fc1000/W': 'fc1000/W', 'fc1000/b': 'fc1000/b'} 'fc1000/W': 'fc1000/W', 'fc1000/b': 'fc1000/b'}
if caffe_layer_name in NAME_MAP: if caffe_layer_name in NAME_MAP:
return NAME_MAP[caffe_layer_name] return NAME_MAP[caffe_layer_name]
...@@ -178,13 +185,13 @@ def name_conversion(caffe_layer_name): ...@@ -178,13 +185,13 @@ def name_conversion(caffe_layer_name):
layer_id = re.search('_branch[0-9]([a-z])/', caffe_layer_name).group(1) layer_id = re.search('_branch[0-9]([a-z])/', caffe_layer_name).group(1)
layer_id = ord(layer_id) - ord('a') + 1 layer_id = ord(layer_id) - ord('a') + 1
TYPE_DICT = {'res':'conv', 'bn':'bn'} TYPE_DICT = {'res': 'conv', 'bn': 'bn'}
tf_name = caffe_layer_name[caffe_layer_name.index('/'):] tf_name = caffe_layer_name[caffe_layer_name.index('/'):]
layer_type = TYPE_DICT[layer_type] + \ layer_type = TYPE_DICT[layer_type] + \
(str(layer_id) if layer_branch == 2 else 'shortcut') (str(layer_id) if layer_branch == 2 else 'shortcut')
tf_name = 'group{}/block{}/{}'.format( tf_name = 'group{}/block{}/{}'.format(
int(layer_group) - 2, layer_block, layer_type) + tf_name int(layer_group) - 2, layer_block, layer_type) + tf_name
return tf_name return tf_name
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -20,11 +20,12 @@ You might need to adjust the learning rate schedule when running with 1 GPU. ...@@ -20,11 +20,12 @@ You might need to adjust the learning rate schedule when running with 1 GPU.
import imp import imp
cifar_example = imp.load_source('cifar_example', cifar_example = imp.load_source('cifar_example',
os.path.join(os.path.dirname(__file__), 'cifar10-resnet.py')) os.path.join(os.path.dirname(__file__), 'cifar10-resnet.py'))
Model = cifar_example.Model Model = cifar_example.Model
BATCH_SIZE = 128 BATCH_SIZE = 128
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
pp_mean = dataset.SVHNDigit.get_per_pixel_mean() pp_mean = dataset.SVHNDigit.get_per_pixel_mean()
...@@ -39,9 +40,9 @@ def get_data(train_or_test): ...@@ -39,9 +40,9 @@ def get_data(train_or_test):
augmentors = [ augmentors = [
imgaug.CenterPaste((40, 40)), imgaug.CenterPaste((40, 40)),
imgaug.Brightness(10), imgaug.Brightness(10),
imgaug.Contrast((0.8,1.2)), imgaug.Contrast((0.8, 1.2)),
imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3), (40, 40), 0.2, 3),
imgaug.RandomCrop((32, 32)), imgaug.RandomCrop((32, 32)),
imgaug.MapImage(lambda x: x - pp_mean), imgaug.MapImage(lambda x: x - pp_mean),
...@@ -56,6 +57,7 @@ def get_data(train_or_test): ...@@ -56,6 +57,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 5, 5) ds = PrefetchData(ds, 5, 5)
return ds return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -72,7 +74,7 @@ def get_config(): ...@@ -72,7 +74,7 @@ def get_config():
StatPrinter(), StatPrinter(),
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]), [ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)]) [(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)])
]), ]),
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import argparse import argparse
from tensorpack import * from tensorpack import *
...@@ -15,36 +16,38 @@ IMAGE_SIZE = 42 ...@@ -15,36 +16,38 @@ IMAGE_SIZE = 42
WARP_TARGET_SIZE = 28 WARP_TARGET_SIZE = 28
HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2 HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'), return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'),
InputVar(tf.int32, (None,), 'label') ] InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
xys = np.array([(y,x,1) for y in range(WARP_TARGET_SIZE) xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
for x in range(WARP_TARGET_SIZE)], dtype='float32') for x in range(WARP_TARGET_SIZE)], dtype='float32')
xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3 xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3
image, label = input_vars image, label = input_vars
image = image / 255.0 - 0.5 # bhw2 image = image / 255.0 - 0.5 # bhw2
def get_stn(image): def get_stn(image):
stn = (LinearWrap(image) stn = (LinearWrap(image)
.AvgPooling('downsample', 2) .AvgPooling('downsample', 2)
.Conv2D('conv0', 20, 5, padding='VALID') .Conv2D('conv0', 20, 5, padding='VALID')
.MaxPooling('pool0', 2) .MaxPooling('pool0', 2)
.Conv2D('conv1', 20, 5, padding='VALID') .Conv2D('conv1', 20, 5, padding='VALID')
.FullyConnected('fc1', out_dim=32) .FullyConnected('fc1', out_dim=32)
.FullyConnected('fct', out_dim=6, nl=tf.identity, .FullyConnected('fct', out_dim=6, nl=tf.identity,
W_init=tf.constant_initializer(), W_init=tf.constant_initializer(),
b_init=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))()) b_init=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
# output 6 parameters for affine transformation # output 6 parameters for affine transformation
stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3 stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3
stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2) stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2)
coor = tf.reshape(tf.matmul(xys, stn), coor = tf.reshape(tf.matmul(xys, stn),
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2]) [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2 coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2
sampled = ImageSample('warp', [image, coor], borderMode='constant') sampled = ImageSample('warp', [image, coor], borderMode='constant')
return sampled return sampled
...@@ -55,21 +58,21 @@ class Model(ModelDesc): ...@@ -55,21 +58,21 @@ class Model(ModelDesc):
sampled2 = get_stn(image) sampled2 = get_stn(image)
# For visualization in tensorboard # For visualization in tensorboard
padded1 = tf.pad(sampled1, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]]) padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
padded2 = tf.pad(sampled2, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]]) padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
img_orig = tf.concat(1, [image[:,:,:,0], image[:,:,:,1]]) #b x 2h x w img_orig = tf.concat(1, [image[:, :, :, 0], image[:, :, :, 1]]) # b x 2h x w
transform1 = tf.concat(1, [padded1[:,:,:,0], padded1[:,:,:,1]]) transform1 = tf.concat(1, [padded1[:, :, :, 0], padded1[:, :, :, 1]])
transform2 = tf.concat(1, [padded2[:,:,:,0], padded2[:,:,:,1]]) transform2 = tf.concat(1, [padded2[:, :, :, 0], padded2[:, :, :, 1]])
stacked = tf.concat(2, [img_orig, transform1, transform2], 'viz') stacked = tf.concat(2, [img_orig, transform1, transform2], 'viz')
tf.summary.image('visualize', tf.summary.image('visualize',
tf.expand_dims(stacked, -1), max_images=30) tf.expand_dims(stacked, -1), max_images=30)
sampled = tf.concat(3, [sampled1, sampled2], 'sampled_concat') sampled = tf.concat(3, [sampled1, sampled2], 'sampled_concat')
logits = (LinearWrap(sampled) logits = (LinearWrap(sampled)
.apply(symbf.batch_flatten) .apply(symbf.batch_flatten)
.FullyConnected('fc1', out_dim=256, nl=tf.nn.relu) .FullyConnected('fc1', out_dim=256, nl=tf.nn.relu)
.FullyConnected('fc2', out_dim=128, nl=tf.nn.relu) .FullyConnected('fc2', out_dim=128, nl=tf.nn.relu)
.FullyConnected('fct', out_dim=19, nl=tf.identity)()) .FullyConnected('fct', out_dim=19, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
...@@ -87,6 +90,7 @@ class Model(ModelDesc): ...@@ -87,6 +90,7 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]), return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
ScaleGradient([('STN.*', 0.1)]), SummaryGradient()] ScaleGradient([('STN.*', 0.1)]), SummaryGradient()]
def get_data(isTrain): def get_data(isTrain):
ds = dataset.Mnist('train' if isTrain else 'test') ds = dataset.Mnist('train' if isTrain else 'test')
# create augmentation for both training and testing # create augmentation for both training and testing
...@@ -105,20 +109,21 @@ def get_data(isTrain): ...@@ -105,20 +109,21 @@ def get_data(isTrain):
ds = BatchData(ds, 128) ds = BatchData(ds, 128)
return ds return ds
def view_warp(modelpath): def view_warp(modelpath):
pred = OfflinePredictor(PredictConfig( pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(modelpath), session_init=get_model_loader(modelpath),
model=Model(), model=Model(),
input_names=['input'], input_names=['input'],
output_names=['viz', 'STN1/affine', 'STN2/affine'])) output_names=['viz', 'STN1/affine', 'STN2/affine']))
xys = np.array([[0, 0, 1], xys = np.array([[0, 0, 1],
[WARP_TARGET_SIZE, 0, 1], [WARP_TARGET_SIZE, 0, 1],
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1], [WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1],
[0, WARP_TARGET_SIZE, 1]], dtype='float32') [0, WARP_TARGET_SIZE, 1]], dtype='float32')
def draw_rect(img, affine, c, offset=[0,0]): def draw_rect(img, affine, c, offset=[0, 0]):
a = np.transpose(affine) #3x2 a = np.transpose(affine) # 3x2
a = (np.matmul(xys, a) + offset).astype('int32') a = (np.matmul(xys, a) + offset).astype('int32')
cv2.line(img, tuple(a[0][::-1]), tuple(a[1][::-1]), c) cv2.line(img, tuple(a[0][::-1]), tuple(a[1][::-1]), c)
cv2.line(img, tuple(a[1][::-1]), tuple(a[2][::-1]), c) cv2.line(img, tuple(a[1][::-1]), tuple(a[2][::-1]), c)
...@@ -133,11 +138,12 @@ def view_warp(modelpath): ...@@ -133,11 +138,12 @@ def view_warp(modelpath):
for idx, viz in enumerate(outputs): for idx, viz in enumerate(outputs):
viz = cv2.cvtColor(viz, cv2.COLOR_GRAY2BGR) viz = cv2.cvtColor(viz, cv2.COLOR_GRAY2BGR)
# Here we assume the second branch focuses on the first digit # Here we assume the second branch focuses on the first digit
draw_rect(viz, affine2[idx], (0,0,255)) draw_rect(viz, affine2[idx], (0, 0, 255))
draw_rect(viz, affine1[idx], (0,0,255), offset=[IMAGE_SIZE, 0]) draw_rect(viz, affine1[idx], (0, 0, 255), offset=[IMAGE_SIZE, 0])
cv2.imwrite('{:03d}.png'.format(idx), (viz + 0.5) * 255) cv2.imwrite('{:03d}.png'.format(idx), (viz + 0.5) * 255)
break break
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -152,7 +158,7 @@ def get_config(): ...@@ -152,7 +158,7 @@ def get_config():
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]), [ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)]) ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
]), ]),
session_config=get_default_sess_config(0.5), session_config=get_default_sess_config(0.5),
...@@ -176,4 +182,3 @@ if __name__ == '__main__': ...@@ -176,4 +182,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train() SimpleTrainer(config).train()
...@@ -2,7 +2,8 @@ ...@@ -2,7 +2,8 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: create-lmdb.py # File: create-lmdb.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import sys, os import sys
import os
import scipy.io.wavfile as wavfile import scipy.io.wavfile as wavfile
import string import string
import numpy as np import numpy as np
...@@ -14,10 +15,12 @@ from tensorpack.utils.stats import OnlineMoments ...@@ -14,10 +15,12 @@ from tensorpack.utils.stats import OnlineMoments
import bob.ap import bob.ap
CHARSET = set(string.ascii_lowercase + ' ') CHARSET = set(string.ascii_lowercase + ' ')
PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(',') PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(
',')
PHONEME_DIC = {v: k for k, v in enumerate(PHONEME_LIST)} PHONEME_DIC = {v: k for k, v in enumerate(PHONEME_LIST)}
WORD_DIC = {v: k for k, v in enumerate(string.ascii_lowercase + ' ')} WORD_DIC = {v: k for k, v in enumerate(string.ascii_lowercase + ' ')}
def read_timit_txt(f): def read_timit_txt(f):
f = open(f) f = open(f)
line = f.readlines()[0].strip().split(' ') line = f.readlines()[0].strip().split(' ')
...@@ -30,6 +33,7 @@ def read_timit_txt(f): ...@@ -30,6 +33,7 @@ def read_timit_txt(f):
ret.append(WORD_DIC[c]) ret.append(WORD_DIC[c])
return np.asarray(ret) return np.asarray(ret)
def read_timit_phoneme(f): def read_timit_phoneme(f):
f = open(f) f = open(f)
pho = [] pho = []
...@@ -39,15 +43,17 @@ def read_timit_phoneme(f): ...@@ -39,15 +43,17 @@ def read_timit_phoneme(f):
f.close() f.close()
return np.asarray(pho) return np.asarray(pho)
@memoized @memoized
def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5, def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5,
n_filters=55, n_ceps=15, f_min=0., f_max=6000, n_filters=55, n_ceps=15, f_min=0., f_max=6000,
delta_win=2, pre_emphasis_coef=0.95, dct_norm=True, delta_win=2, pre_emphasis_coef=0.95, dct_norm=True,
mel_scale=True): mel_scale=True):
ret = bob.ap.Ceps(fs, win_length_ms, win_shift_ms, n_filters, n_ceps, f_min, ret = bob.ap.Ceps(fs, win_length_ms, win_shift_ms, n_filters, n_ceps, f_min,
f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm) f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm)
return ret return ret
def diff_feature(feat, nd=1): def diff_feature(feat, nd=1):
diff = feat[1:] - feat[:-1] diff = feat[1:] - feat[:-1]
feat = feat[1:] feat = feat[1:]
...@@ -57,6 +63,7 @@ def diff_feature(feat, nd=1): ...@@ -57,6 +63,7 @@ def diff_feature(feat, nd=1):
d2 = diff[1:] - diff[:-1] d2 = diff[1:] - diff[:-1]
return np.concatenate((feat[1:], diff[1:], d2), axis=1) return np.concatenate((feat[1:], diff[1:], d2), axis=1)
def get_feature(f): def get_feature(f):
fs, signal = wavfile.read(f) fs, signal = wavfile.read(f)
signal = signal.astype('float64') signal = signal.astype('float64')
...@@ -64,12 +71,14 @@ def get_feature(f): ...@@ -64,12 +71,14 @@ def get_feature(f):
feat = diff_feature(feat, nd=2) feat = diff_feature(feat, nd=2)
return feat return feat
class RawTIMIT(DataFlow): class RawTIMIT(DataFlow):
def __init__(self, dirname, label='phoneme'): def __init__(self, dirname, label='phoneme'):
self.dirname = dirname self.dirname = dirname
assert os.path.isdir(dirname), dirname assert os.path.isdir(dirname), dirname
self.filelists = [k for k in fs.recursive_walk(self.dirname) self.filelists = [k for k in fs.recursive_walk(self.dirname)
if k.endswith('.wav')] if k.endswith('.wav')]
logger.info("Found {} wav files ...".format(len(self.filelists))) logger.info("Found {} wav files ...".format(len(self.filelists)))
assert len(self.filelists), self.filelists assert len(self.filelists), self.filelists
assert label in ['phoneme', 'letter'], label assert label in ['phoneme', 'letter'], label
...@@ -87,12 +96,13 @@ class RawTIMIT(DataFlow): ...@@ -87,12 +96,13 @@ class RawTIMIT(DataFlow):
label = read_timit_txt(f[:-4] + '.TXT') label = read_timit_txt(f[:-4] + '.TXT')
yield [feat, label] yield [feat, label]
def compute_mean_std(db, fname): def compute_mean_std(db, fname):
ds = LMDBDataPoint(db, shuffle=False) ds = LMDBDataPoint(db, shuffle=False)
o = OnlineMoments() o = OnlineMoments()
with get_tqdm(total=ds.size()) as bar: with get_tqdm(total=ds.size()) as bar:
for dp in ds.get_data(): for dp in ds.get_data():
feat = dp[0] #len x dim feat = dp[0] # len x dim
for f in feat: for f in feat:
o.feed(f) o.feed(f)
bar.update() bar.update()
...@@ -105,13 +115,13 @@ if __name__ == '__main__': ...@@ -105,13 +115,13 @@ if __name__ == '__main__':
subparsers = parser.add_subparsers(title='command', dest='command') subparsers = parser.add_subparsers(title='command', dest='command')
parser_db = subparsers.add_parser('build', help='build a LMDB database') parser_db = subparsers.add_parser('build', help='build a LMDB database')
parser_db.add_argument('--dataset', parser_db.add_argument('--dataset',
help='path to TIMIT TRAIN or TEST directory', required=True) help='path to TIMIT TRAIN or TEST directory', required=True)
parser_db.add_argument('--db', help='output lmdb file', required=True) parser_db.add_argument('--db', help='output lmdb file', required=True)
parser_stat = subparsers.add_parser('stat', help='compute statistics (mean/std) of dataset') parser_stat = subparsers.add_parser('stat', help='compute statistics (mean/std) of dataset')
parser_stat.add_argument('--db', help='input lmdb file', required=True) parser_stat.add_argument('--db', help='input lmdb file', required=True)
parser_stat.add_argument('-o', '--output', parser_stat.add_argument('-o', '--output',
help='output statistics file', default='stats.data') help='output statistics file', default='stats.data')
args = parser.parse_args() args = parser.parse_args()
if args.command == 'build': if args.command == 'build':
...@@ -119,4 +129,3 @@ if __name__ == '__main__': ...@@ -119,4 +129,3 @@ if __name__ == '__main__':
dftools.dump_dataflow_to_lmdb(ds, args.db) dftools.dump_dataflow_to_lmdb(ds, args.db)
elif args.command == 'stat': elif args.command == 'stat':
compute_mean_std(args.db, args.output) compute_mean_std(args.db, args.output)
...@@ -9,15 +9,17 @@ from six.moves import range ...@@ -9,15 +9,17 @@ from six.moves import range
__all__ = ['TIMITBatch'] __all__ = ['TIMITBatch']
def batch_feature(feats): def batch_feature(feats):
# pad to the longest in the batch # pad to the longest in the batch
maxlen = max([k.shape[0] for k in feats]) maxlen = max([k.shape[0] for k in feats])
bsize = len(feats) bsize = len(feats)
ret = np.zeros((bsize, maxlen, feats[0].shape[1])) ret = np.zeros((bsize, maxlen, feats[0].shape[1]))
for idx, feat in enumerate(feats): for idx, feat in enumerate(feats):
ret[idx,:feat.shape[0],:] = feat ret[idx, :feat.shape[0], :] = feat
return ret return ret
def sparse_label(labels): def sparse_label(labels):
maxlen = max([k.shape[0] for k in labels]) maxlen = max([k.shape[0] for k in labels])
shape = [len(labels), maxlen] # bxt shape = [len(labels), maxlen] # bxt
...@@ -31,7 +33,9 @@ def sparse_label(labels): ...@@ -31,7 +33,9 @@ def sparse_label(labels):
values = np.asarray(values) values = np.asarray(values)
return (indices, values, shape) return (indices, values, shape)
class TIMITBatch(ProxyDataFlow): class TIMITBatch(ProxyDataFlow):
def __init__(self, ds, batch): def __init__(self, ds, batch):
self.batch = batch self.batch = batch
self.ds = ds self.ds = ds
...@@ -52,4 +56,3 @@ class TIMITBatch(ProxyDataFlow): ...@@ -52,4 +56,3 @@ class TIMITBatch(ProxyDataFlow):
batchlab = sparse_label(labs) batchlab = sparse_label(labs)
seqlen = np.asarray([k.shape[0] for k in feats]) seqlen = np.asarray([k.shape[0] for k in feats])
yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen] yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen]
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, sys import os
import sys
import argparse import argparse
from collections import Counter from collections import Counter
import operator import operator
...@@ -13,7 +14,7 @@ import six ...@@ -13,7 +14,7 @@ import six
from six.moves import map, range from six.moves import map, range
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.gradproc import * from tensorpack.tfutils.gradproc import *
from tensorpack.utils.globvars import globalns as param from tensorpack.utils.globvars import globalns as param
import tensorpack.tfutils.symbolic_functions as symbf import tensorpack.tfutils.symbolic_functions as symbf
from timitdata import TIMITBatch from timitdata import TIMITBatch
...@@ -21,13 +22,15 @@ from timitdata import TIMITBatch ...@@ -21,13 +22,15 @@ from timitdata import TIMITBatch
BATCH = 64 BATCH = 64
NLAYER = 2 NLAYER = 2
HIDDEN = 128 HIDDEN = 128
NR_CLASS = 61 + 1 NR_CLASS = 61 + 1
FEATUREDIM = 39 FEATUREDIM = 39
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, FEATUREDIM], 'feat'), # bxmaxseqx39 return [InputVar(tf.float32, [None, None, FEATUREDIM], 'feat'), # bxmaxseqx39
InputVar(tf.int64, None, 'labelidx'), #label is b x maxlen, sparse InputVar(tf.int64, None, 'labelidx'), # label is b x maxlen, sparse
InputVar(tf.int32, None, 'labelvalue'), InputVar(tf.int32, None, 'labelvalue'),
InputVar(tf.int64, None, 'labelshape'), InputVar(tf.int64, None, 'labelshape'),
InputVar(tf.int32, [None], 'seqlen'), # b InputVar(tf.int32, [None], 'seqlen'), # b
...@@ -43,36 +46,37 @@ class Model(ModelDesc): ...@@ -43,36 +46,37 @@ class Model(ModelDesc):
initial = cell.zero_state(tf.shape(feat)[0], tf.float32) initial = cell.zero_state(tf.shape(feat)[0], tf.float32)
outputs, last_state = tf.nn.dynamic_rnn(cell, feat, outputs, last_state = tf.nn.dynamic_rnn(cell, feat,
seqlen, initial, seqlen, initial,
dtype=tf.float32, scope='rnn') dtype=tf.float32, scope='rnn')
# o: b x t x HIDDEN # o: b x t x HIDDEN
output = tf.reshape(outputs, [-1, HIDDEN]) # (Bxt) x rnnsize output = tf.reshape(outputs, [-1, HIDDEN]) # (Bxt) x rnnsize
logits = FullyConnected('fc', output, NR_CLASS, nl=tf.identity, logits = FullyConnected('fc', output, NR_CLASS, nl=tf.identity,
W_init=tf.truncated_normal_initializer(stddev=0.01)) W_init=tf.truncated_normal_initializer(stddev=0.01))
logits = tf.reshape(logits, (BATCH, -1, NR_CLASS)) logits = tf.reshape(logits, (BATCH, -1, NR_CLASS))
loss = tf.nn.ctc_loss(logits, label, seqlen, time_major=False) loss = tf.nn.ctc_loss(logits, label, seqlen, time_major=False)
self.cost = tf.reduce_mean(loss, name='cost') self.cost = tf.reduce_mean(loss, name='cost')
logits = tf.transpose(logits, [1,0,2]) logits = tf.transpose(logits, [1, 0, 2])
isTrain = get_current_tower_context().is_training isTrain = get_current_tower_context().is_training
if isTrain: if isTrain:
# beam search is too slow to run in training # beam search is too slow to run in training
predictions = tf.to_int32( predictions = tf.to_int32(
tf.nn.ctc_greedy_decoder(logits, seqlen)[0][0]) tf.nn.ctc_greedy_decoder(logits, seqlen)[0][0])
else: else:
predictions = tf.to_int32( predictions = tf.to_int32(
tf.nn.ctc_beam_search_decoder(logits, seqlen)[0][0]) tf.nn.ctc_beam_search_decoder(logits, seqlen)[0][0])
err = tf.edit_distance(predictions, label, normalize=True) err = tf.edit_distance(predictions, label, normalize=True)
err.set_shape([None]) err.set_shape([None])
err = tf.reduce_mean(err, name='error') err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err) summary.add_moving_summary(err)
def get_gradient_processor(self): def get_gradient_processor(self):
return [GlobalNormClip(5), SummaryGradient() ] return [GlobalNormClip(5), SummaryGradient()]
def get_data(path, isTrain, stat_file): def get_data(path, isTrain, stat_file):
ds = LMDBDataPoint(path, shuffle=isTrain) ds = LMDBDataPoint(path, shuffle=isTrain)
...@@ -83,6 +87,7 @@ def get_data(path, isTrain, stat_file): ...@@ -83,6 +87,7 @@ def get_data(path, isTrain, stat_file):
ds = PrefetchDataZMQ(ds, 1) ds = PrefetchDataZMQ(ds, 1)
return ds return ds
def get_config(ds_train, ds_test): def get_config(ds_train, ds_test):
step_per_epoch = ds_train.size() step_per_epoch = ds_train.size()
...@@ -94,7 +99,7 @@ def get_config(ds_train, ds_test): ...@@ -94,7 +99,7 @@ def get_config(ds_train, ds_test):
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
StatMonitorParamSetter('learning_rate', 'error', StatMonitorParamSetter('learning_rate', 'error',
lambda x: x * 0.2, 0, 5), lambda x: x * 0.2, 0, 5),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
PeriodicCallback( PeriodicCallback(
InferenceRunner(ds_test, [ScalarStats('error')]), 2), InferenceRunner(ds_test, [ScalarStats('error')]), 2),
...@@ -124,4 +129,3 @@ if __name__ == '__main__': ...@@ -124,4 +129,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, sys import os
import sys
import argparse import argparse
from collections import Counter from collections import Counter
import operator import operator
...@@ -13,7 +14,7 @@ import six ...@@ -13,7 +14,7 @@ import six
from six.moves import map, range from six.moves import map, range
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.gradproc import * from tensorpack.tfutils.gradproc import *
from tensorpack.utils.lut import LookUpTable from tensorpack.utils.lut import LookUpTable
from tensorpack.utils.globvars import globalns as param from tensorpack.utils.globvars import globalns as param
...@@ -27,7 +28,9 @@ param.vocab_size = None ...@@ -27,7 +28,9 @@ param.vocab_size = None
param.softmax_temprature = 1 param.softmax_temprature = 1
param.corpus = 'input.txt' param.corpus = 'input.txt'
class CharRNNData(RNGDataFlow): class CharRNNData(RNGDataFlow):
def __init__(self, input_file, size): def __init__(self, input_file, size):
self.seq_length = param.seq_len self.seq_length = param.seq_len
self._size = size self._size = size
...@@ -51,16 +54,17 @@ class CharRNNData(RNGDataFlow): ...@@ -51,16 +54,17 @@ class CharRNNData(RNGDataFlow):
def get_data(self): def get_data(self):
random_starts = self.rng.randint(0, random_starts = self.rng.randint(0,
self.whole_seq.shape[0] - self.seq_length - 1, (self._size,)) self.whole_seq.shape[0] - self.seq_length - 1, (self._size,))
for st in random_starts: for st in random_starts:
seq = self.whole_seq[st:st + self.seq_length + 1] seq = self.whole_seq[st:st + self.seq_length + 1]
yield [seq[:-1], seq[1:]] yield [seq[:-1], seq[1:]]
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.int32, (None, param.seq_len), 'input'), return [InputVar(tf.int32, (None, param.seq_len), 'input'),
InputVar(tf.int32, (None, param.seq_len), 'nextinput') ] InputVar(tf.int32, (None, param.seq_len), 'nextinput')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
input, nextinput = input_vars input, nextinput = input_vars
...@@ -71,9 +75,9 @@ class Model(ModelDesc): ...@@ -71,9 +75,9 @@ class Model(ModelDesc):
self.initial = initial = cell.zero_state(tf.shape(input)[0], tf.float32) self.initial = initial = cell.zero_state(tf.shape(input)[0], tf.float32)
embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size]) embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
input_list = tf.unstack(input_feature, axis=1) #seqlen x (Bxrnnsize) input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize)
# seqlen is 1 in inference. don't need loop_function # seqlen is 1 in inference. don't need loop_function
outputs, last_state = tf.nn.rnn(cell, input_list, initial, scope='rnnlm') outputs, last_state = tf.nn.rnn(cell, input_list, initial, scope='rnnlm')
...@@ -85,13 +89,14 @@ class Model(ModelDesc): ...@@ -85,13 +89,14 @@ class Model(ModelDesc):
self.prob = tf.nn.softmax(logits / param.softmax_temprature) self.prob = tf.nn.softmax(logits / param.softmax_temprature)
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, symbolic_functions.flatten(nextinput)) logits, symbolic_functions.flatten(nextinput))
self.cost = tf.reduce_mean(xent_loss, name='cost') self.cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W
def get_gradient_processor(self): def get_gradient_processor(self):
return [GlobalNormClip(5)] return [GlobalNormClip(5)]
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -114,6 +119,8 @@ def get_config(): ...@@ -114,6 +119,8 @@ def get_config():
) )
# TODO rewrite using Predictor interface # TODO rewrite using Predictor interface
def sample(path, start, length): def sample(path, start, length):
""" """
:param path: path to the model :param path: path to the model
...@@ -130,7 +137,7 @@ def sample(path, start, length): ...@@ -130,7 +137,7 @@ def sample(path, start, length):
sess = tf.Session() sess = tf.Session()
tfutils.SaverRestore(path).init(sess) tfutils.SaverRestore(path).init(sess)
dummy_input = np.zeros((1,1), dtype='int32') dummy_input = np.zeros((1, 1), dtype='int32')
with sess.as_default(): with sess.as_default():
# feed the starting sentence # feed the starting sentence
state = model.initial.eval({input_vars[0]: dummy_input}) state = model.initial.eval({input_vars[0]: dummy_input})
...@@ -149,7 +156,7 @@ def sample(path, start, length): ...@@ -149,7 +156,7 @@ def sample(path, start, length):
for k in range(length): for k in range(length):
x = np.array([[ds.lut.get_idx(c)]], dtype='int32') x = np.array([[ds.lut.get_idx(c)]], dtype='int32')
[prob, state] = sess.run([model.prob, model.last_state], [prob, state] = sess.run([model.prob, model.last_state],
{input_vars[0]: x, model.initial: state}) {input_vars[0]: x, model.initial: state})
c = ds.lut.get_obj(pick(prob[0])) c = ds.lut.get_obj(pick(prob[0]))
ret += c ret += c
print(ret) print(ret)
...@@ -161,11 +168,11 @@ if __name__ == '__main__': ...@@ -161,11 +168,11 @@ if __name__ == '__main__':
subparsers = parser.add_subparsers(title='command', dest='command') subparsers = parser.add_subparsers(title='command', dest='command')
parser_sample = subparsers.add_parser('sample', help='sample a trained model') parser_sample = subparsers.add_parser('sample', help='sample a trained model')
parser_sample.add_argument('-n', '--num', type=int, parser_sample.add_argument('-n', '--num', type=int,
default=300, help='length of text to generate') default=300, help='length of text to generate')
parser_sample.add_argument('-s', '--start', parser_sample.add_argument('-s', '--start',
default='The ', help='initial text sequence') default='The ', help='initial text sequence')
parser_sample.add_argument('-t', '--temperature', type=float, parser_sample.add_argument('-t', '--temperature', type=float,
default=1, help='softmax temperature') default=1, help='softmax temperature')
parser_train = subparsers.add_parser('train', help='train') parser_train = subparsers.add_parser('train', help='train')
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
...@@ -181,4 +188,3 @@ if __name__ == '__main__': ...@@ -181,4 +188,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train() QueueInputTrainer(config).train()
...@@ -22,7 +22,9 @@ Cifar10: ...@@ -22,7 +22,9 @@ Cifar10:
Not a good model for Cifar100, just for demonstration. Not a good model for Cifar100, just for demonstration.
""" """
class Model(ModelDesc): class Model(ModelDesc):
def __init__(self, cifar_classnum): def __init__(self, cifar_classnum):
super(Model, self).__init__() super(Model, self).__init__()
self.cifar_classnum = cifar_classnum self.cifar_classnum = cifar_classnum
...@@ -30,7 +32,7 @@ class Model(ModelDesc): ...@@ -30,7 +32,7 @@ class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 30, 30, 3], 'input'), return [InputVar(tf.float32, [None, 30, 30, 3], 'input'),
InputVar(tf.int32, [None], 'label') InputVar(tf.int32, [None], 'label')
] ]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -43,18 +45,18 @@ class Model(ModelDesc): ...@@ -43,18 +45,18 @@ class Model(ModelDesc):
image = image / 4.0 # just to make range smaller image = image / 4.0 # just to make range smaller
with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3): with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3):
logits = LinearWrap(image) \ logits = LinearWrap(image) \
.Conv2D('conv1.1', out_channel=64) \ .Conv2D('conv1.1', out_channel=64) \
.Conv2D('conv1.2', out_channel=64) \ .Conv2D('conv1.2', out_channel=64) \
.MaxPooling('pool1', 3, stride=2, padding='SAME') \ .MaxPooling('pool1', 3, stride=2, padding='SAME') \
.Conv2D('conv2.1', out_channel=128) \ .Conv2D('conv2.1', out_channel=128) \
.Conv2D('conv2.2', out_channel=128) \ .Conv2D('conv2.2', out_channel=128) \
.MaxPooling('pool2', 3, stride=2, padding='SAME') \ .MaxPooling('pool2', 3, stride=2, padding='SAME') \
.Conv2D('conv3.1', out_channel=128, padding='VALID') \ .Conv2D('conv3.1', out_channel=128, padding='VALID') \
.Conv2D('conv3.2', out_channel=128, padding='VALID') \ .Conv2D('conv3.2', out_channel=128, padding='VALID') \
.FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \ .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \
.tf.nn.dropout(keep_prob) \ .tf.nn.dropout(keep_prob) \
.FullyConnected('fc1', 512, nl=tf.nn.relu) \ .FullyConnected('fc1', 512, nl=tf.nn.relu) \
.FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)() .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)()
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
cost = tf.reduce_mean(cost, name='cross_entropy_loss') cost = tf.reduce_mean(cost, name='cross_entropy_loss')
...@@ -72,6 +74,7 @@ class Model(ModelDesc): ...@@ -72,6 +74,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test, cifar_classnum): def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
if cifar_classnum == 10: if cifar_classnum == 10:
...@@ -83,10 +86,10 @@ def get_data(train_or_test, cifar_classnum): ...@@ -83,10 +86,10 @@ def get_data(train_or_test, cifar_classnum):
imgaug.RandomCrop((30, 30)), imgaug.RandomCrop((30, 30)),
imgaug.Flip(horiz=True), imgaug.Flip(horiz=True),
imgaug.Brightness(63), imgaug.Brightness(63),
imgaug.Contrast((0.2,1.8)), imgaug.Contrast((0.2, 1.8)),
imgaug.GaussianDeform( imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(30,30), 0.2, 3), (30, 30), 0.2, 3),
imgaug.MeanVarianceNormalize(all_channel=True) imgaug.MeanVarianceNormalize(all_channel=True)
] ]
else: else:
...@@ -100,6 +103,7 @@ def get_data(train_or_test, cifar_classnum): ...@@ -100,6 +103,7 @@ def get_data(train_or_test, cifar_classnum):
ds = PrefetchData(ds, 3, 2) ds = PrefetchData(ds, 3, 2)
return ds return ds
def get_config(cifar_classnum): def get_config(cifar_classnum):
logger.auto_set_dir() logger.auto_set_dir()
...@@ -111,6 +115,7 @@ def get_config(cifar_classnum): ...@@ -111,6 +115,7 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5) sess_config = get_default_sess_config(0.5)
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True) lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
def lr_func(lr): def lr_func(lr):
if lr < 3e-5: if lr < 3e-5:
raise StopTraining() raise StopTraining()
...@@ -123,7 +128,7 @@ def get_config(cifar_classnum): ...@@ -123,7 +128,7 @@ def get_config(cifar_classnum):
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test, ClassificationError()), InferenceRunner(dataset_test, ClassificationError()),
StatMonitorParamSetter('learning_rate', 'val_error', lr_func, StatMonitorParamSetter('learning_rate', 'val_error', lr_func,
threshold=0.001, last_k=10), threshold=0.001, last_k=10),
]), ]),
session_config=sess_config, session_config=sess_config,
model=Model(cifar_classnum), model=Model(cifar_classnum),
......
...@@ -6,7 +6,9 @@ ...@@ -6,7 +6,9 @@
from __future__ import print_function from __future__ import print_function
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, cv2, argparse import os
import cv2
import argparse
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -19,9 +21,11 @@ Usage: ...@@ -19,9 +21,11 @@ Usage:
./load-alexnet.py --load alexnet.npy --input cat.png ./load-alexnet.py --load alexnet.npy --input cat.png
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 227, 227, 3), 'input') ] return [InputVar(tf.float32, (None, 227, 227, 3), 'input')]
def _build_graph(self, inputs): def _build_graph(self, inputs):
# img: 227x227x3 # img: 227x227x3
...@@ -48,6 +52,7 @@ class Model(ModelDesc): ...@@ -48,6 +52,7 @@ class Model(ModelDesc):
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item() param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig( predict_func = OfflinePredictor(PredictConfig(
...@@ -59,8 +64,8 @@ def run_test(path, input): ...@@ -59,8 +64,8 @@ def run_test(path, input):
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None, input assert im is not None, input
im = cv2.resize(im, (227, 227))[:,:,::-1].reshape( im = cv2.resize(im, (227, 227))[:, :, ::-1].reshape(
(1,227,227,3)).astype('float32') - 110 (1, 227, 227, 3)).astype('float32') - 110
outputs = predict_func([im])[0] outputs = predict_func([im])[0]
prob = outputs[0] prob = outputs[0]
ret = prob.argsort()[-10:][::-1] ret = prob.argsort()[-10:][::-1]
......
...@@ -7,7 +7,8 @@ from __future__ import print_function ...@@ -7,7 +7,8 @@ from __future__ import print_function
import cv2 import cv2
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os, argparse import os
import argparse
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
...@@ -20,44 +21,47 @@ Usage: ...@@ -20,44 +21,47 @@ Usage:
./load-vgg16.py --load vgg16.npy --input cat.png ./load-vgg16.py --load vgg16.npy --input cat.png
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ] return [InputVar(tf.float32, (None, 224, 224, 3), 'input')]
def _build_graph(self, inputs): def _build_graph(self, inputs):
image = inputs[0] image = inputs[0]
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu): with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv1_1', 64) .Conv2D('conv1_1', 64)
.Conv2D('conv1_2', 64) .Conv2D('conv1_2', 64)
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
# 112 # 112
.Conv2D('conv2_1', 128) .Conv2D('conv2_1', 128)
.Conv2D('conv2_2', 128) .Conv2D('conv2_2', 128)
.MaxPooling('pool2', 2) .MaxPooling('pool2', 2)
# 56 # 56
.Conv2D('conv3_1', 256) .Conv2D('conv3_1', 256)
.Conv2D('conv3_2', 256) .Conv2D('conv3_2', 256)
.Conv2D('conv3_3', 256) .Conv2D('conv3_3', 256)
.MaxPooling('pool3', 2) .MaxPooling('pool3', 2)
# 28 # 28
.Conv2D('conv4_1', 512) .Conv2D('conv4_1', 512)
.Conv2D('conv4_2', 512) .Conv2D('conv4_2', 512)
.Conv2D('conv4_3', 512) .Conv2D('conv4_3', 512)
.MaxPooling('pool4', 2) .MaxPooling('pool4', 2)
# 14 # 14
.Conv2D('conv5_1', 512) .Conv2D('conv5_1', 512)
.Conv2D('conv5_2', 512) .Conv2D('conv5_2', 512)
.Conv2D('conv5_3', 512) .Conv2D('conv5_3', 512)
.MaxPooling('pool5', 2) .MaxPooling('pool5', 2)
# 7 # 7
.FullyConnected('fc6', 4096, nl=tf.nn.relu) .FullyConnected('fc6', 4096, nl=tf.nn.relu)
.Dropout('drop0', 0.5) .Dropout('drop0', 0.5)
.FullyConnected('fc7', 4096, nl=tf.nn.relu) .FullyConnected('fc7', 4096, nl=tf.nn.relu)
.Dropout('drop1', 0.5) .Dropout('drop1', 0.5)
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)()) .FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item() param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig( predict_func = OfflinePredictor(PredictConfig(
...@@ -70,7 +74,7 @@ def run_test(path, input): ...@@ -70,7 +74,7 @@ def run_test(path, input):
im = cv2.imread(input) im = cv2.imread(input)
assert im is not None, input assert im is not None, input
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224)).reshape((1,224,224,3)).astype('float32') im = cv2.resize(im, (224, 224)).reshape((1, 224, 224, 3)).astype('float32')
im = im - 110 im = im - 110
outputs = predict_func([im])[0] outputs = predict_func([im])[0]
prob = outputs[0] prob = outputs[0]
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
import os, sys import os
import sys
import argparse import argparse
""" """
...@@ -18,12 +19,14 @@ from tensorpack import * ...@@ -18,12 +19,14 @@ from tensorpack import *
IMAGE_SIZE = 28 IMAGE_SIZE = 28
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
"""Define all the input variables (with type, shape, name) that'll be """Define all the input variables (with type, shape, name) that'll be
fed into the graph to produce a cost. """ fed into the graph to produce a cost. """
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'), return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
InputVar(tf.int32, (None,), 'label') ] InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
"""This function should build the model which takes the input variables """This function should build the model which takes the input variables
...@@ -47,19 +50,20 @@ class Model(ModelDesc): ...@@ -47,19 +50,20 @@ class Model(ModelDesc):
l = MaxPooling('pool0', image, 2) l = MaxPooling('pool0', image, 2)
... """ ... """
logits = (LinearWrap(image) # the starting brace is only for line-breaking logits = (LinearWrap(image) # the starting brace is only for line-breaking
.Conv2D('conv0') .Conv2D('conv0')
.MaxPooling('pool0', 2) .MaxPooling('pool0', 2)
.Conv2D('conv1', padding='SAME') .Conv2D('conv1', padding='SAME')
.Conv2D('conv2') .Conv2D('conv2')
.MaxPooling('pool1', 2) .MaxPooling('pool1', 2)
.Conv2D('conv3') .Conv2D('conv3')
.FullyConnected('fc0', 512, nl=tf.nn.relu) .FullyConnected('fc0', 512, nl=tf.nn.relu)
.Dropout('dropout', 0.5) .Dropout('dropout', 0.5)
.FullyConnected('fc1', out_dim=10, nl=tf.identity)()) .FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, label) # a vector of length B with loss of each sample
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
# compute the "incorrect vector", for the callback ClassificationError to use at validation time # compute the "incorrect vector", for the callback ClassificationError to use at validation time
...@@ -83,11 +87,13 @@ class Model(ModelDesc): ...@@ -83,11 +87,13 @@ class Model(ModelDesc):
summary.add_param_summary([('.*/W', ['histogram'])]) summary.add_param_summary([('.*/W', ['histogram'])])
self.cost = tf.add_n([wd_cost, cost], name='cost') self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_data(): def get_data():
train = BatchData(dataset.Mnist('train'), 128) train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True) test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test return train, test
def get_config(): def get_config():
# automatically setup the directory train_log/mnist-convnet for logging # automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir() logger.auto_set_dir()
...@@ -135,4 +141,3 @@ if __name__ == '__main__': ...@@ -135,4 +141,3 @@ if __name__ == '__main__':
if args.load: if args.load:
config.session_init = SaverRestore(args.load) config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train() SimpleTrainer(config).train()
...@@ -20,10 +20,12 @@ Each epoch iterates over the whole training set (4721 iterations). ...@@ -20,10 +20,12 @@ Each epoch iterates over the whole training set (4721 iterations).
Speed is about 43 it/s on TitanX. Speed is about 43 it/s on TitanX.
""" """
class Model(ModelDesc): class Model(ModelDesc):
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars): def _build_graph(self, input_vars):
image, label = input_vars image, label = input_vars
...@@ -32,16 +34,16 @@ class Model(ModelDesc): ...@@ -32,16 +34,16 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=BNReLU, use_bias=False): with argscope(Conv2D, nl=BNReLU, use_bias=False):
logits = (LinearWrap(image) logits = (LinearWrap(image)
.Conv2D('conv1', 24, 5, padding='VALID') .Conv2D('conv1', 24, 5, padding='VALID')
.MaxPooling('pool1', 2, padding='SAME') .MaxPooling('pool1', 2, padding='SAME')
.Conv2D('conv2', 32, 3, padding='VALID') .Conv2D('conv2', 32, 3, padding='VALID')
.Conv2D('conv3', 32, 3, padding='VALID') .Conv2D('conv3', 32, 3, padding='VALID')
.MaxPooling('pool2', 2, padding='SAME') .MaxPooling('pool2', 2, padding='SAME')
.Conv2D('conv4', 64, 3, padding='VALID') .Conv2D('conv4', 64, 3, padding='VALID')
.Dropout('drop', 0.5) .Dropout('drop', 0.5)
.FullyConnected('fc0', 512, .FullyConnected('fc0', 512,
b_init=tf.constant_initializer(0.1), nl=tf.nn.relu) b_init=tf.constant_initializer(0.1), nl=tf.nn.relu)
.FullyConnected('linear', out_dim=10, nl=tf.identity)()) .FullyConnected('linear', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
# compute the number of failed samples, for ClassificationError to use at test time # compute the number of failed samples, for ClassificationError to use at test time
...@@ -58,6 +60,7 @@ class Model(ModelDesc): ...@@ -58,6 +60,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'rms'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(): def get_data():
d1 = dataset.SVHNDigit('train') d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra') d2 = dataset.SVHNDigit('extra')
...@@ -67,20 +70,21 @@ def get_data(): ...@@ -67,20 +70,21 @@ def get_data():
augmentors = [ augmentors = [
imgaug.Resize((40, 40)), imgaug.Resize((40, 40)),
imgaug.Brightness(30), imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)), imgaug.Contrast((0.5, 1.5)),
imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)], [(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40,40), 0.2, 3), (40, 40), 0.2, 3),
] ]
data_train = AugmentImageComponent(data_train, augmentors) data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128) data_train = BatchData(data_train, 128)
data_train = PrefetchData(data_train, 5, 5) data_train = PrefetchData(data_train, 5, 5)
augmentors = [ imgaug.Resize((40, 40)) ] augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors) data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True) data_test = BatchData(data_test, 128, remainder=True)
return data_train, data_test return data_train, data_test
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -100,7 +104,7 @@ def get_config(): ...@@ -100,7 +104,7 @@ def get_config():
callbacks=Callbacks([ callbacks=Callbacks([
StatPrinter(), ModelSaver(), StatPrinter(), ModelSaver(),
InferenceRunner(data_test, InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()]) [ScalarStats('cost'), ClassificationError()])
]), ]),
model=Model(), model=Model(),
step_per_epoch=step_per_epoch, step_per_epoch=step_per_epoch,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment