Commit 233b3b90 authored by Yuxin Wu's avatar Yuxin Wu

run autopep8 over examples

parent fb2a051c
......@@ -6,11 +6,15 @@
import numpy as np
import tensorflow as tf
import os, sys, re, time
import os
import sys
import re
import time
import random
import argparse
import subprocess
import multiprocessing, threading
import multiprocessing
import threading
from collections import deque
from tensorpack import *
......@@ -47,6 +51,7 @@ NUM_ACTIONS = None
ROM_FILE = None
METHOD = None
def get_player(viz=False, train=False):
pl = AtariPlayer(ROM_FILE, frame_skip=ACTION_REPEAT,
image_shape=IMAGE_SIZE[::-1], viz=viz, live_lost_as_eoe=train)
......@@ -59,15 +64,18 @@ def get_player(viz=False, train=False):
return pl
common.get_player = get_player # so that eval functions in common can use the player
class Model(ModelDesc):
def _get_input_vars(self):
if NUM_ACTIONS is None:
p = get_player(); del p
p = get_player()
del p
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'reward'),
InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'next_state'),
InputVar(tf.bool, (None,), 'isOver') ]
InputVar(tf.bool, (None,), 'isOver')]
def _get_DQN_prediction(self, image):
""" image: [0,255]"""
......@@ -101,7 +109,7 @@ class Model(ModelDesc):
state, action, reward, next_state, isOver = inputs
self.predict_value = self._get_DQN_prediction(state)
action_onehot = tf.one_hot(action, NUM_ACTIONS, 1.0, 0.0)
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) #N,
pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1) # N,
max_pred_reward = tf.reduce_mean(tf.reduce_max(
self.predict_value, 1), name='predict_reward')
add_moving_summary(max_pred_reward)
......@@ -125,7 +133,7 @@ class Model(ModelDesc):
self.cost = tf.truediv(symbf.huber_loss(target - pred_action_value),
tf.cast(BATCH_SIZE, tf.float32), name='cost')
summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
('fc.*/W', ['histogram', 'rms']) ]) # monitor all W
('fc.*/W', ['histogram', 'rms'])]) # monitor all W
def update_target_param(self):
vars = tf.trainable_variables()
......@@ -142,6 +150,7 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
SummaryGradient()]
def get_config():
logger.auto_set_dir()
......@@ -213,4 +222,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -4,7 +4,8 @@
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import numpy as np
import time, os
import time
import os
import cv2
from collections import deque
import threading
......@@ -22,13 +23,15 @@ __all__ = ['AtariPlayer']
ROM_URL = "https://github.com/openai/atari-py/tree/master/atari_py/atari_roms"
_ALE_LOCK = threading.Lock()
class AtariPlayer(RLEnvironment):
"""
A wrapper for atari emulator.
Will automatically restart when a real episode ends (isOver might be just
lost of lives but not game over).
"""
def __init__(self, rom_file, viz=0, height_range=(None,None),
def __init__(self, rom_file, viz=0, height_range=(None, None),
frame_skip=4, image_shape=(84, 84), nullop_start=30,
live_lost_as_eoe=True):
"""
......@@ -84,7 +87,6 @@ class AtariPlayer(RLEnvironment):
self.width, self.height = self.ale.getScreenDims()
self.actions = self.ale.getMinimalActionSet()
self.live_lost_as_eoe = live_lost_as_eoe
self.frame_skip = frame_skip
self.nullop_start = nullop_start
......@@ -112,7 +114,7 @@ class AtariPlayer(RLEnvironment):
if isinstance(self.viz, float):
cv2.imshow(self.windowname, ret)
time.sleep(self.viz)
ret = ret[self.height_range[0]:self.height_range[1],:].astype('float32')
ret = ret[self.height_range[0]:self.height_range[1], :].astype('float32')
# 0.299,0.587.0.114. same as rgb2y in torch/image
ret = cv2.cvtColor(ret, cv2.COLOR_RGB2GRAY)
ret = cv2.resize(ret, self.image_shape)
......@@ -169,7 +171,7 @@ if __name__ == '__main__':
import time
def benchmark():
a = AtariPlayer(sys.argv[1], viz=False, height_range=(28,-8))
a = AtariPlayer(sys.argv[1], viz=False, height_range=(28, -8))
num = a.get_action_space().num_actions()
rng = get_rng(num)
start = time.time()
......@@ -184,7 +186,8 @@ if __name__ == '__main__':
print(time.time() - start)
if len(sys.argv) == 3 and sys.argv[2] == 'benchmark':
import threading, multiprocessing
import threading
import multiprocessing
for k in range(3):
#th = multiprocessing.Process(target=benchmark)
th = threading.Thread(target=benchmark)
......@@ -193,7 +196,7 @@ if __name__ == '__main__':
benchmark()
else:
a = AtariPlayer(sys.argv[1],
viz=0.03, height_range=(28,-8))
viz=0.03, height_range=(28, -8))
num = a.get_action_space().num_actions()
rng = get_rng(num)
import time
......@@ -204,6 +207,5 @@ if __name__ == '__main__':
print(act)
r, o = a.action(act)
a.current_state()
#time.sleep(0.1)
# time.sleep(0.1)
print(r, o)
......@@ -2,8 +2,10 @@
# -*- coding: utf-8 -*-
# File: common.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import random, time
import threading, multiprocessing
import random
import time
import threading
import multiprocessing
import numpy as np
from tqdm import tqdm
from six.moves import queue
......@@ -16,6 +18,7 @@ from tensorpack.utils.stats import *
global get_player
get_player = None
def play_one_episode(player, func, verbose=False):
def f(s):
spc = player.get_action_space()
......@@ -27,6 +30,7 @@ def play_one_episode(player, func, verbose=False):
return act
return np.mean(player.play_one_episode(f))
def play_model(cfg):
player = get_player(viz=0.01)
predfunc = get_predict_func(cfg)
......@@ -34,8 +38,10 @@ def play_model(cfg):
score = play_one_episode(player, predfunc)
print("Total:", score)
def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread):
def __init__(self, func, queue):
super(Worker, self).__init__()
self._func = func
......@@ -51,7 +57,7 @@ def eval_with_funcs(predict_funcs, nr_eval):
while not self.stopped():
try:
score = play_one_episode(player, self.func)
#print "Score, ", score
# print "Score, ", score
except RuntimeError:
return
self.queue_put_stoppable(self.q, score)
......@@ -68,8 +74,10 @@ def eval_with_funcs(predict_funcs, nr_eval):
r = q.get()
stat.feed(r)
logger.info("Waiting for all the workers to finish the last run...")
for k in threads: k.stop()
for k in threads: k.join()
for k in threads:
k.stop()
for k in threads:
k.join()
while q.qsize():
r = q.get()
stat.feed(r)
......@@ -80,13 +88,16 @@ def eval_with_funcs(predict_funcs, nr_eval):
return (stat.average, stat.max)
return (0, 0)
def eval_model_multithread(cfg, nr_eval):
func = get_predict_func(cfg)
NR_PROC = min(multiprocessing.cpu_count() // 2, 8)
mean, max = eval_with_funcs([func] * NR_PROC, nr_eval)
logger.info("Average Score: {}; Max Score: {}".format(mean, max))
class Evaluator(Callback):
def __init__(self, nr_eval, input_names, output_names):
self.eval_episode = nr_eval
self.input_names = input_names
......
......@@ -13,26 +13,31 @@ from tensorpack.utils.argtools import memoized
import matplotlib.pyplot as plt
_CM = plt.get_cmap('jet')
def colorize(img, heatmap):
""" img: bgr, [0,255]
heatmap: [0,1]
"""
heatmap = _CM(heatmap)[:,:,[2,1,0]] * 255.0
heatmap = _CM(heatmap)[:, :, [2, 1, 0]] * 255.0
return img * 0.5 + heatmap * 0.5
@memoized
def get_gaussian_map():
sigma = 21
gaussian_map = np.zeros((368, 368), dtype='float32')
for x_p in range(368):
for y_p in range(368):
dist_sq = (x_p - 368/2) * (x_p - 368/2) + \
(y_p - 368/2) * (y_p - 368/2)
dist_sq = (x_p - 368 / 2) * (x_p - 368 / 2) + \
(y_p - 368 / 2) * (y_p - 368 / 2)
exponent = dist_sq / 2.0 / (21**2)
gaussian_map[y_p, x_p] = np.exp(-exponent)
return gaussian_map.reshape((1,368,368,1))
return gaussian_map.reshape((1, 368, 368, 1))
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 368, 368, 3), 'input'),
InputVar(tf.float32, (None, 368, 368, 15), 'label'),
......@@ -43,7 +48,7 @@ class Model(ModelDesc):
image = image / 256.0 - 0.5
gmap = tf.constant(get_gaussian_map())
gmap = tf.pad(gmap, [[0,0],[0,1],[0,1],[0,0]])
gmap = tf.pad(gmap, [[0, 0], [0, 1], [0, 1], [0, 0]])
pool_center = AvgPooling('mappool', gmap, 9, stride=8, padding='VALID')
with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu,
W_init=tf.random_normal_initializer(stddev=0.01)):
......@@ -89,7 +94,8 @@ class Model(ModelDesc):
out5 = add_stage(5, out4)
out6 = add_stage(6, out4)
resized_map = tf.image.resize_bilinear(out6,
[368,368], name='resized_map')
[368, 368], name='resized_map')
def run_test(model_path, img_file):
param_dict = np.load(model_path, encoding='latin1').item()
......@@ -101,9 +107,9 @@ def run_test(model_path, img_file):
))
im = cv2.imread(img_file, cv2.IMREAD_COLOR).astype('float32')
im = cv2.resize(im, (368,368))
im = cv2.resize(im, (368, 368))
out = predict_func([[im]])[0][0]
hm = out[:,:,:14].sum(axis=2)
hm = out[:, :, :14].sum(axis=2)
viz = colorize(im, hm)
cv2.imwrite("output.jpg", viz)
......
......@@ -5,7 +5,9 @@
from tensorpack import ProxyDataFlow, get_rng
class DisturbLabel(ProxyDataFlow):
def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds)
self.prob = prob
......@@ -19,4 +21,3 @@ class DisturbLabel(ProxyDataFlow):
if self.rng.rand() < self.prob:
l = self.rng.choice(10)
yield [img, l]
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import argparse
from tensorpack import *
......@@ -16,6 +17,7 @@ mnist_example = imp.load_source('mnist_example',
os.path.join(os.path.dirname(__file__), '..', 'mnist-convnet.py'))
get_config = mnist_example.get_config
def get_data():
dataset_train = BatchData(DisturbLabel(dataset.Mnist('train'), args.prob), 128)
dataset_test = BatchData(dataset.Mnist('test'), 256, remainder=True)
......@@ -24,7 +26,9 @@ mnist_example.get_data = get_data
IMAGE_SIZE = 28
class Model(mnist_example.Model):
def _build_graph(self, input_vars):
image, label = input_vars
image = tf.expand_dims(image, 3)
......@@ -63,4 +67,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -16,20 +16,20 @@ import imp
svhn_example = imp.load_source('svhn_example',
os.path.join(os.path.dirname(__file__), '..', 'svhn-digit-convnet.py')))
Model = svhn_example.Model
get_config = svhn_example.get_config
Model=svhn_example.Model
get_config=svhn_example.get_config
def get_data():
d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra')
data_train = RandomMixData([d1, d2])
data_train = DisturbLabel(data_train, args.prob)
data_test = dataset.SVHNDigit('test')
d1=dataset.SVHNDigit('train')
d2=dataset.SVHNDigit('extra')
data_train=RandomMixData([d1, d2])
data_train=DisturbLabel(data_train, args.prob)
data_test=dataset.SVHNDigit('test')
augmentors = [
augmentors=[
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)),
imgaug.Contrast((0.5, 1.5)),
]
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
......
......@@ -9,7 +9,8 @@ import argparse
import numpy as np
import multiprocessing
import msgpack
import os, sys
import os
import sys
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
......@@ -69,10 +70,12 @@ BITG = 6
TOTAL_BATCH_SIZE = 128
BATCH_SIZE = 64
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -81,6 +84,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer
......@@ -156,25 +160,28 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(dataset_name):
isTrain = dataset_name == 'train'
ds = dataset.ILSVRC12(args.data, dataset_name, shuffle=isTrain)
meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
if isTrain:
class Resize(imgaug.ImageAugmentor):
def __init__(self):
self._init(locals())
def _augment(self, img, _):
h, w = img.shape[:2]
size = 224
scale = self.rng.randint(size, 308) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\
desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize),
interpolation=cv2.INTER_CUBIC)
......@@ -186,11 +193,11 @@ def get_data(dataset_name):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True),
imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True),
imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((224, 224)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(224, 224), 0.2, 3), 0.1),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_224),
......@@ -199,7 +206,7 @@ def get_data(dataset_name):
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\
desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
......@@ -214,6 +221,7 @@ def get_data(dataset_name):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds
def get_config():
logger.auto_set_dir()
......@@ -228,7 +236,7 @@ def get_config():
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
#HumanHyperParamSetter('learning_rate'),
# HumanHyperParamSetter('learning_rate'),
ScheduledHyperParamSetter(
'learning_rate', [(56, 2e-5), (64, 4e-6)]),
InferenceRunner(data_test,
......@@ -241,6 +249,7 @@ def get_config():
max_epoch=100,
)
def run_image(model, sess_init, inputs):
pred_config = PredictConfig(
model=model,
......@@ -252,13 +261,13 @@ def run_image(model, sess_init, inputs):
predict_func = get_predict_func(pred_config)
meta = dataset.ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
words = meta.get_synset_words_1000()
def resize_func(im):
h, w = im.shape[:2]
scale = 256.0 / min(h, w)
desSize = map(int, (max(224, min(w, scale * w)),\
desSize = map(int, (max(224, min(w, scale * w)),
max(224, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
......@@ -272,7 +281,7 @@ def run_image(model, sess_init, inputs):
img = cv2.imread(f).astype('float32')
assert img is not None
img = transformers.augment(img)[np.newaxis, :,:,:]
img = transformers.augment(img)[np.newaxis, :, :, :]
outputs = predict_func([img])[0]
prob = outputs[0]
ret = prob.argsort()[-10:][::-1]
......
......@@ -6,6 +6,7 @@
import tensorflow as tf
from tensorpack.utils.argtools import memoized
@memoized
def get_dorefa(bitW, bitA, bitG):
"""
......@@ -15,7 +16,7 @@ def get_dorefa(bitW, bitA, bitG):
G = tf.get_default_graph()
def quantize(x, k):
n = float(2**k-1)
n = float(2**k - 1)
with G.gradient_override_map({"Floor": "Identity"}):
return tf.floor(x * n + 0.5) / n
......@@ -39,11 +40,11 @@ def get_dorefa(bitW, bitA, bitG):
def grad_fg(op, x):
rank = x.get_shape().ndims
assert rank is not None
maxx = tf.reduce_max(tf.abs(x), list(range(1,rank)), keep_dims=True)
maxx = tf.reduce_max(tf.abs(x), list(range(1, rank)), keep_dims=True)
x = x / maxx
n = float(2**bitG-1)
n = float(2**bitG - 1)
x = x * 0.5 + 0.5 + tf.random_uniform(
tf.shape(x), minval=-0.5/n, maxval=0.5/n)
tf.shape(x), minval=-0.5 / n, maxval=0.5 / n)
x = tf.clip_by_value(x, 0.0, 1.0)
x = quantize(x, bitG) - 0.5
return x * maxx * 2
......@@ -54,4 +55,3 @@ def get_dorefa(bitW, bitA, bitG):
with G.gradient_override_map({"Identity": "FGGrad"}):
return tf.identity(x)
return fw, fa, fg
......@@ -40,10 +40,12 @@ BITW = 1
BITA = 2
BITG = 4
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -52,6 +54,7 @@ class Model(ModelDesc):
fw, fa, fg = get_dorefa(BITW, BITA, BITG)
# monkey-patch tf.get_variable to apply fw
old_get_variable = tf.get_variable
def new_get_variable(name, shape=None, **kwargs):
v = old_get_variable(name, shape, **kwargs)
# don't binarize first and last layer
......@@ -62,9 +65,9 @@ class Model(ModelDesc):
return fw(v)
tf.get_variable = new_get_variable
def cabs(x):
return tf.minimum(1.0, tf.abs(x), name='cabs')
def activate(x):
return fa(cabs(x))
......@@ -122,6 +125,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])])
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_config():
logger.auto_set_dir()
......@@ -134,8 +138,8 @@ def get_config():
augmentors = [
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)),
#imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus
imgaug.Contrast((0.5, 1.5)),
# imgaug.GaussianDeform( # this is slow but helpful. only use it when you have lots of cpus
#[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
#(40,40), 0.2, 3),
]
......@@ -144,7 +148,7 @@ def get_config():
data_train = PrefetchDataZMQ(data_train, 5)
step_per_epoch = data_train.size()
augmentors = [ imgaug.Resize((40, 40)) ]
augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True)
......
......@@ -5,8 +5,10 @@
import numpy as np
import tensorflow as tf
import glob, pickle
import os, sys
import glob
import pickle
import os
import sys
import argparse
import cv2
......@@ -32,15 +34,17 @@ CFG.SHAPE = 64
CFG.BATCH = 128
CFG.Z_DIM = 100
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input') ]
return [InputVar(tf.float32, (None, CFG.SHAPE, CFG.SHAPE, 3), 'input')]
def generator(self, z):
""" return a image generated from z"""
nf = 64
l = FullyConnected('fc0', z, nf * 8 * 4 * 4, nl=tf.identity)
l = tf.reshape(l, [-1, 4, 4, nf*8])
l = tf.reshape(l, [-1, 4, 4, nf * 8])
l = BNReLU(l)
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
l = Deconv2D('deconv1', l, [8, 8, nf * 4])
......@@ -57,11 +61,11 @@ class Model(ModelDesc):
argscope(LeakyReLU, alpha=0.2):
l = (LinearWrap(imgs)
.Conv2D('conv0', nf, nl=LeakyReLU)
.Conv2D('conv1', nf*2)
.Conv2D('conv1', nf * 2)
.BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', nf*4)
.Conv2D('conv2', nf * 4)
.BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', nf*8)
.Conv2D('conv3', nf * 8)
.BatchNorm('bn3').LeakyReLU()
.FullyConnected('fct', 1, nl=tf.identity)())
return l
......@@ -88,16 +92,18 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data():
datadir = CFG.data
imgs = glob.glob(datadir + '/*.jpg')
ds = ImageFromFile(imgs, channel=3, shuffle=True)
augs = [ imgaug.CenterCrop(140), imgaug.Resize(64) ]
augs = [imgaug.CenterCrop(140), imgaug.Resize(64)]
ds = AugmentImageComponent(ds, augs)
ds = BatchData(ds, CFG.BATCH)
ds = PrefetchDataZMQ(ds, 1)
return ds
def get_config():
logger.auto_set_dir()
dataset = get_data()
......@@ -114,6 +120,7 @@ def get_config():
max_epoch=200,
)
def sample(model_path):
pred = PredictConfig(
session_init=get_model_loader(model_path),
......@@ -124,7 +131,7 @@ def sample(model_path):
for o in pred.get_result():
o, zs = o[0] + 1, o[1]
o = o * 128.0
o = o[:,:,:,::-1]
o = o[:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=10, nr_col=10, viz=True))
if __name__ == '__main__':
......
......@@ -11,7 +11,9 @@ from tensorpack import (FeedfreeTrainer, TowerContext,
from tensorpack.tfutils.summary import summary_moving_average, add_moving_summary
from tensorpack.dataflow import DataFlow
class GANTrainer(FeedfreeTrainer):
def __init__(self, config):
self._input_method = QueueInput(config.dataset)
super(GANTrainer, self).__init__(config)
......@@ -33,14 +35,18 @@ class GANTrainer(FeedfreeTrainer):
def run_step(self):
self.sess.run(self.train_op)
class RandomZData(DataFlow):
def __init__(self, shape):
super(RandomZData, self).__init__()
self.shape = shape
def get_data(self):
while True:
yield [np.random.uniform(-1, 1, size=self.shape)]
def build_GAN_losses(vecpos, vecneg):
"""
:param vecpos, vecneg: output of the discriminator (logits) for real
......
......@@ -5,8 +5,10 @@
import numpy as np
import tensorflow as tf
import glob, pickle
import os, sys
import glob
import pickle
import os
import sys
import argparse
import cv2
......@@ -38,10 +40,12 @@ OUT_CH = 3
LAMBDA = 100
NF = 64 # number of filter
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input') ,
InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output') ]
return [InputVar(tf.float32, (None, SHAPE, SHAPE, IN_CH), 'input'),
InputVar(tf.float32, (None, SHAPE, SHAPE, OUT_CH), 'output')]
def generator(self, imgs):
# imgs: input: 256x256xch
......@@ -52,31 +56,31 @@ class Model(ModelDesc):
with argscope(Conv2D, kernel_shape=4, stride=2,
nl=lambda x, name: LeakyReLU(BatchNorm('bn', x), name=name)):
e1 = Conv2D('conv1', imgs, NF, nl=LeakyReLU)
e2 = Conv2D('conv2', e1, NF*2)
e3 = Conv2D('conv3', e2, NF*4)
e4 = Conv2D('conv4', e3, NF*8)
e5 = Conv2D('conv5', e4, NF*8)
e6 = Conv2D('conv6', e5, NF*8)
e7 = Conv2D('conv7', e6, NF*8)
e8 = Conv2D('conv8', e7, NF*8, nl=BNReLU) # 1x1
e2 = Conv2D('conv2', e1, NF * 2)
e3 = Conv2D('conv3', e2, NF * 4)
e4 = Conv2D('conv4', e3, NF * 8)
e5 = Conv2D('conv5', e4, NF * 8)
e6 = Conv2D('conv6', e5, NF * 8)
e7 = Conv2D('conv7', e6, NF * 8)
e8 = Conv2D('conv8', e7, NF * 8, nl=BNReLU) # 1x1
with argscope(Deconv2D, nl=BNReLU, kernel_shape=4, stride=2):
return (LinearWrap(e8)
.Deconv2D('deconv1', NF*8)
.Deconv2D('deconv1', NF * 8)
.Dropout()
.ConcatWith(3, e7)
.Deconv2D('deconv2', NF*8)
.Deconv2D('deconv2', NF * 8)
.Dropout()
.ConcatWith(3, e6)
.Deconv2D('deconv3', NF*8)
.Deconv2D('deconv3', NF * 8)
.Dropout()
.ConcatWith(3, e5)
.Deconv2D('deconv4', NF*8)
.Deconv2D('deconv4', NF * 8)
.ConcatWith(3, e4)
.Deconv2D('deconv5', NF*4)
.Deconv2D('deconv5', NF * 4)
.ConcatWith(3, e3)
.Deconv2D('deconv6', NF*2)
.Deconv2D('deconv6', NF * 2)
.ConcatWith(3, e2)
.Deconv2D('deconv7', NF*1)
.Deconv2D('deconv7', NF * 1)
.ConcatWith(3, e1)
.Deconv2D('deconv8', OUT_CH, nl=tf.tanh)())
......@@ -86,11 +90,11 @@ class Model(ModelDesc):
with argscope(Conv2D, nl=tf.identity, kernel_shape=4, stride=2):
l = (LinearWrap(l)
.Conv2D('conv0', NF, nl=LeakyReLU)
.Conv2D('conv1', NF*2)
.Conv2D('conv1', NF * 2)
.BatchNorm('bn1').LeakyReLU()
.Conv2D('conv2', NF*4)
.Conv2D('conv2', NF * 4)
.BatchNorm('bn2').LeakyReLU()
.Conv2D('conv3', NF*8, stride=1, padding='VALID')
.Conv2D('conv3', NF * 8, stride=1, padding='VALID')
.BatchNorm('bn3').LeakyReLU()
.Conv2D('convlast', 1, stride=1, padding='VALID')())
return l
......@@ -128,33 +132,36 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def split_input(img):
"""
img: an image with shape (s, 2s, 3)
:return: [input, output]
"""
s = img.shape[0]
input, output = img[:,:s,:], img[:,s:,:]
input, output = img[:, :s, :], img[:, s:, :]
if args.mode == 'BtoA':
input, output = output, input
if IN_CH == 1:
input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis]
input = cv2.cvtColor(input, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
if OUT_CH == 1:
output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:,:,np.newaxis]
output = cv2.cvtColor(output, cv2.COLOR_RGB2GRAY)[:, :, np.newaxis]
return [input, output]
def get_data():
datadir = args.data
# assume each image is 512x256 split to left and right
imgs = glob.glob(os.path.join(datadir, '*.jpg'))
ds = ImageFromFile(imgs, channel=3, shuffle=True)
ds = MapData(ds, lambda dp: split_input(dp[0]))
augs = [ imgaug.Resize(286), imgaug.RandomCrop(256) ]
augs = [imgaug.Resize(286), imgaug.RandomCrop(256)]
ds = AugmentImageComponents(ds, augs, (0, 1))
ds = BatchData(ds, BATCH)
ds = PrefetchDataZMQ(ds, 1)
return ds
def get_config():
logger.auto_set_dir()
dataset = get_data()
......@@ -171,6 +178,7 @@ def get_config():
max_epoch=300,
)
def sample(datadir, model_path):
pred = PredictConfig(
session_init=get_model_loader(model_path),
......@@ -184,7 +192,7 @@ def sample(datadir, model_path):
pred = SimpleDatasetPredictor(pred, ds)
for o in pred.get_result():
o = o[0][:,:,:,::-1]
o = o[0][:, :, :, ::-1]
viz = next(build_patch_list(o, nr_row=3, nr_col=2, viz=True))
if __name__ == '__main__':
......
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import cv2
import argparse
......@@ -16,9 +17,11 @@ from GAN import GANTrainer, build_GAN_losses
BATCH = 128
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 28, 28), 'input') ]
return [InputVar(tf.float32, (None, 28, 28), 'input')]
def generator(self, z):
l = FullyConnected('fc0', z, 1024, nl=BNReLU)
......@@ -54,7 +57,7 @@ class Model(ModelDesc):
prior_prob = tf.constant([0.1] * 10, name='prior_prob')
# assume first 10 is categorical
ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:,0]
ids = tf.multinomial(tf.zeros([BATCH, 10]), num_samples=1)[:, 0]
zc = tf.one_hot(ids, 10, name='zc_train')
zc = tf.placeholder_with_default(zc, [None, 10], name='zc')
......@@ -89,11 +92,13 @@ class Model(ModelDesc):
self.g_vars = [v for v in all_vars if v.name.startswith('gen/')]
self.d_vars = [v for v in all_vars if v.name.startswith('discrim/')]
def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH)
return ds
def get_config():
logger.auto_set_dir()
dataset = get_data()
......@@ -110,6 +115,7 @@ def get_config():
max_epoch=100,
)
def sample(model_path):
pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(model_path),
......@@ -143,4 +149,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
GANTrainer(config).train()
......@@ -8,16 +8,19 @@ import tensorflow as tf
import argparse
import numpy as np
from six.moves import zip
import os, sys
import os
import sys
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, None, 3], 'image'),
InputVar(tf.int32, [None, None, None], 'edgemap') ]
InputVar(tf.int32, [None, None, None], 'edgemap')]
def _build_graph(self, input_vars):
image, edgemap = input_vars
......@@ -69,10 +72,10 @@ class Model(ModelDesc):
use_bias=False, nl=tf.identity)
costs = []
for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
output = tf.nn.sigmoid(b, name='output{}'.format(idx+1))
output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
xentropy = class_balanced_sigmoid_cross_entropy(
b, edgemap,
name='xentropy{}'.format(idx+1))
name='xentropy{}'.format(idx + 1))
costs.append(xentropy)
# some magic threshold
......@@ -91,13 +94,15 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost')
def get_gradient_processor(self):
return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)]) ]
return [ScaleGradient([('convfcweight.*', 0.1), ('conv5_.*', 5)])]
def get_data(name):
isTrain = name == 'train'
ds = dataset.BSDS500(name, shuffle=True)
class CropMultiple16(imgaug.ImageAugmentor):
def _get_augment_params(self, img):
newh = img.shape[0] // 16 * 16
neww = img.shape[1] // 16 * 16
......@@ -110,11 +115,11 @@ def get_data(name):
def _augment(self, img, param):
h0, w0, newh, neww = param
return img[h0:h0+newh,w0:w0+neww]
return img[h0:h0 + newh, w0:w0 + neww]
if isTrain:
shape_aug = [
imgaug.RandomResize(xrange=(0.7,1.5), yrange=(0.7,1.5),
imgaug.RandomResize(xrange=(0.7, 1.5), yrange=(0.7, 1.5),
aspect_ratio_thres=0.15),
imgaug.RotationAndCropValid(90),
CropMultiple16(),
......@@ -128,15 +133,15 @@ def get_data(name):
ds = AugmentImageComponents(ds, shape_aug, (0, 1))
def f(m):
m[m>=0.50] = 1
m[m<0.50] = 0
m[m >= 0.50] = 1
m[m < 0.50] = 0
return m
ds = MapDataComponent(ds, f, 1)
if isTrain:
augmentors = [
imgaug.Brightness(63, clip=False),
imgaug.Contrast((0.4,1.5)),
imgaug.Contrast((0.4, 1.5)),
]
ds = AugmentImageComponent(ds, augmentors)
ds = BatchDataByShape(ds, 8, idx=0)
......@@ -145,6 +150,7 @@ def get_data(name):
ds = BatchData(ds, 1)
return ds
def view_data():
ds = RepeatedData(get_data('train'), -1)
ds.reset_state()
......@@ -156,6 +162,7 @@ def view_data():
cv2.imshow("edge", edgemap)
cv2.waitKey(1000)
def get_config():
logger.auto_set_dir()
dataset_train = get_data('train')
......@@ -178,6 +185,7 @@ def get_config():
max_epoch=100,
)
def run(model_path, image_path, output):
pred_config = PredictConfig(
model=Model(),
......@@ -193,7 +201,7 @@ def run(model_path, image_path, output):
for k in range(6):
pred = outputs[k][0]
cv2.imwrite("out{}.png".format(
'-fused' if k == 5 else str(k+1)), pred * 255)
'-fused' if k == 5 else str(k + 1)), pred * 255)
else:
pred = outputs[5][0]
cv2.imwrite(output, pred * 255)
......
......@@ -27,10 +27,12 @@ This config reaches 71% single-crop validation accuracy after 150k steps with 6
Learning rate may need a different schedule for different number of GPUs (because batch size will be different).
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -117,6 +119,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.ILSVRC12(args.data, train_or_test, shuffle=True if isTrain else False)
......@@ -128,7 +131,7 @@ def get_data(train_or_test):
augmentors = [
imgaug.Resize((256, 256)),
imgaug.Brightness(30, False),
imgaug.Contrast((0.8,1.2), True),
imgaug.Contrast((0.8, 1.2), True),
imgaug.MapImage(lambda x: x - pp_mean),
imgaug.RandomCrop((224, 224)),
imgaug.Flip(horiz=True),
......@@ -166,7 +169,7 @@ def get_config():
ScheduledHyperParamSetter('learning_rate',
[(8, 0.03), (14, 0.02), (17, 5e-3),
(19, 3e-3), (24, 1e-3), (26, 2e-4),
(30, 5e-5) ])
(30, 5e-5)])
]),
session_config=get_default_sess_config(0.99),
model=Model(),
......
......@@ -32,10 +32,12 @@ NR_GPU = 8
BATCH_SIZE = TOTAL_BATCH_SIZE // NR_GPU
INPUT_SHAPE = 299
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -62,22 +64,22 @@ class Model(ModelDesc):
def proj_77(l, ch_r, ch):
return (LinearWrap(l)
.Conv2D('conv77r', ch_r, 1)
.Conv2D('conv77a', ch_r, [1,7])
.Conv2D('conv77b', ch, [7,1])())
.Conv2D('conv77a', ch_r, [1, 7])
.Conv2D('conv77b', ch, [7, 1])())
def proj_277(l, ch_r, ch):
return (LinearWrap(l)
.Conv2D('conv277r', ch_r, 1)
.Conv2D('conv277aa', ch_r, [7,1])
.Conv2D('conv277ab', ch_r, [1,7])
.Conv2D('conv277ba', ch_r, [7,1])
.Conv2D('conv277bb', ch, [1,7])())
.Conv2D('conv277aa', ch_r, [7, 1])
.Conv2D('conv277ab', ch_r, [1, 7])
.Conv2D('conv277ba', ch_r, [7, 1])
.Conv2D('conv277bb', ch, [1, 7])())
with argscope(Conv2D, nl=BNReLU, use_bias=False),\
argscope(BatchNorm, decay=0.9997, epsilon=1e-3):
l = (LinearWrap(image)
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') #299
.Conv2D('conv1', 32, 3, padding='VALID') #149
.Conv2D('conv0', 32, 3, stride=2, padding='VALID') # 299
.Conv2D('conv1', 32, 3, padding='VALID') # 149
.Conv2D('conv2', 64, 3, padding='SAME') # 147
.MaxPooling('pool2', 3, 2)
.Conv2D('conv3', 80, 1, padding='SAME') # 73
......@@ -194,6 +196,7 @@ class Model(ModelDesc):
self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
......@@ -205,15 +208,17 @@ def get_data(train_or_test):
if isTrain:
class Resize(imgaug.ImageAugmentor):
def __init__(self):
self._init(locals())
def _augment(self, img, _):
h, w = img.shape[:2]
size = 299
scale = self.rng.randint(size, 340) * 1.0 / min(h, w)
scaleX = scale * self.rng.uniform(0.85, 1.15)
scaleY = scale * self.rng.uniform(0.85, 1.15)
desSize = map(int, (max(size, min(w, scaleX * w)),\
desSize = map(int, (max(size, min(w, scaleX * w)),
max(size, min(h, scaleY * h))))
dst = cv2.resize(img, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return dst
......@@ -224,11 +229,11 @@ def get_data(train_or_test):
imgaug.RandomApplyAug(imgaug.GaussianBlur(3), 0.5),
imgaug.Brightness(30, True),
imgaug.Gamma(),
imgaug.Contrast((0.8,1.2), True),
imgaug.Contrast((0.8, 1.2), True),
imgaug.RandomCrop((299, 299)),
imgaug.RandomApplyAug(imgaug.JpegNoise(), 0.8),
imgaug.RandomApplyAug(imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(299, 299), 0.2, 3), 0.1),
imgaug.Flip(horiz=True),
imgaug.MapImage(lambda x: x - pp_mean_299),
......@@ -237,7 +242,7 @@ def get_data(train_or_test):
def resize_func(im):
h, w = im.shape[:2]
scale = 340.0 / min(h, w)
desSize = map(int, (max(299, min(w, scale * w)),\
desSize = map(int, (max(299, min(w, scale * w)),
max(299, min(h, scale * h))))
im = cv2.resize(im, tuple(desSize), interpolation=cv2.INTER_CUBIC)
return im
......
......@@ -5,7 +5,10 @@
import numpy as np
import tensorflow as tf
import os, sys, re, time
import os
import sys
import re
import time
import random
import argparse
import six
......@@ -23,6 +26,7 @@ ENV_NAME = None
from common import play_one_episode
def get_player(dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir, auto_restart=False)
pl = MapPlayerState(pl, lambda img: cv2.resize(img, IMAGE_SIZE[::-1]))
......@@ -33,12 +37,14 @@ def get_player(dumpdir=None):
pl = HistoryFramePlayer(pl, FRAME_HISTORY)
return pl
class Model(ModelDesc):
def _get_input_vars(self):
assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int32, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ]
InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image):
image = image / 255.0
......@@ -61,6 +67,7 @@ class Model(ModelDesc):
policy = self._get_NN_prediction(state)
self.logits = tf.nn.softmax(policy, name='logits')
def run_submission(cfg, output, nr):
player = get_player(dumpdir=output)
predfunc = get_predict_func(cfg)
......@@ -71,6 +78,7 @@ def run_submission(cfg, output, nr):
score = play_one_episode(player, predfunc)
print("Score:", score)
def do_submit(output):
gym.upload(output, api_key='xxx')
......@@ -87,7 +95,8 @@ if __name__ == '__main__':
ENV_NAME = args.env
assert ENV_NAME
logger.info("Environment Name: {}".format(ENV_NAME))
p = get_player(); del p # set NUM_ACTIONS
p = get_player()
del p # set NUM_ACTIONS
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
......
......@@ -5,11 +5,15 @@
import numpy as np
import tensorflow as tf
import os, sys, re, time
import os
import sys
import re
import time
import random
import uuid
import argparse
import multiprocessing, threading
import multiprocessing
import threading
from collections import deque
import six
from six.moves import queue
......@@ -42,8 +46,10 @@ EVALUATE_PROC = min(multiprocessing.cpu_count() // 2, 20)
NUM_ACTIONS = None
ENV_NAME = None
def get_player(viz=False, train=False, dumpdir=None):
pl = GymEnv(ENV_NAME, dumpdir=dumpdir)
def func(img):
return cv2.resize(img, IMAGE_SIZE[::-1])
pl = MapPlayerState(pl, func)
......@@ -58,16 +64,20 @@ def get_player(viz=False, train=False, dumpdir=None):
return pl
common.get_player = get_player
class MySimulatorWorker(SimulatorProcess):
def _build_player(self):
return get_player(train=True)
class Model(ModelDesc):
def _get_input_vars(self):
assert NUM_ACTIONS is not None
return [InputVar(tf.float32, (None,) + IMAGE_SHAPE3, 'state'),
InputVar(tf.int64, (None,), 'action'),
InputVar(tf.float32, (None,), 'futurereward') ]
InputVar(tf.float32, (None,), 'futurereward')]
def _get_NN_prediction(self, image):
image = image / 255.0
......@@ -122,11 +132,13 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()]
class MySimulatorMaster(SimulatorMaster, Callback):
def __init__(self, pipe_c2s, pipe_s2c, model):
super(MySimulatorMaster, self).__init__(pipe_c2s, pipe_s2c)
self.M = model
self.queue = queue.Queue(maxsize=BATCH_SIZE*8*2)
self.queue = queue.Queue(maxsize=BATCH_SIZE * 8 * 2)
def _setup_graph(self):
self.sess = self.trainer.sess
......@@ -172,6 +184,7 @@ class MySimulatorMaster(SimulatorMaster, Callback):
else:
client.memory = []
def get_config():
logger.auto_set_dir()
M = Model()
......@@ -218,7 +231,8 @@ if __name__ == '__main__':
ENV_NAME = args.env
assert ENV_NAME
p = get_player(); del p # set NUM_ACTIONS
p = get_player()
del p # set NUM_ACTIONS
if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
......@@ -239,11 +253,11 @@ if __name__ == '__main__':
if args.gpu:
nr_gpu = get_nr_gpu()
if nr_gpu > 1:
predict_tower = range(nr_gpu)[-nr_gpu//2:]
predict_tower = range(nr_gpu)[-nr_gpu // 2:]
else:
predict_tower = [0]
PREDICTOR_THREAD = len(predict_tower) * PREDICTOR_THREAD_PER_GPU
train_tower = range(nr_gpu)[:-nr_gpu//2] or [0]
train_tower = range(nr_gpu)[:-nr_gpu // 2] or [0]
logger.info("[BA3C] Train on gpu {} and infer on gpu {}".format(
','.join(map(str, train_tower)), ','.join(map(str, predict_tower))))
trainer = AsyncMultiGPUTrainer
......
......@@ -30,14 +30,16 @@ This model uses the whole training set instead of a train-val split.
BATCH_SIZE = 128
NUM_UNITS = None
class Model(ModelDesc):
def __init__(self, n):
super(Model, self).__init__()
self.n = n
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 32, 32, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -60,7 +62,7 @@ class Model(ModelDesc):
c2 = Conv2D('conv2', c1, out_channel)
if increase_dim:
l = AvgPooling('pool', l, 2)
l = tf.pad(l, [[0,0], [0,0], [0,0], [in_channel//2, in_channel//2]])
l = tf.pad(l, [[0, 0], [0, 0], [0, 0], [in_channel // 2, in_channel // 2]])
l = c2 + l
return l
......@@ -104,6 +106,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
ds = dataset.Cifar10(train_or_test)
......@@ -125,6 +128,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 3, 2)
return ds
def get_config():
logger.auto_set_dir()
......
......@@ -26,10 +26,12 @@ TOTAL_BATCH_SIZE = 256
INPUT_SHAPE = 224
DEPTH = None
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -80,10 +82,10 @@ class Model(ModelDesc):
return l
cfg = {
18: ([2,2,2,2], basicblock),
34: ([3,4,6,3], basicblock),
50: ([3,4,6,3], bottleneck),
101: ([3,4,23,3], bottleneck)
18: ([2, 2, 2, 2], basicblock),
34: ([3, 4, 6, 3], basicblock),
50: ([3, 4, 6, 3], bottleneck),
101: ([3, 4, 23, 3], bottleneck)
}
defs, block_func = cfg[DEPTH]
......@@ -113,6 +115,7 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost')
def get_data(train_or_test):
isTrain = train_or_test == 'train'
......@@ -128,12 +131,13 @@ def get_data(train_or_test):
crop 8%~100% of the original image
See `Going Deeper with Convolutions` by Google.
"""
def _augment(self, img, _):
h, w = img.shape[:2]
area = h * w
for _ in range(10):
targetArea = self.rng.uniform(0.08, 1.0) * area
aspectR = self.rng.uniform(0.75,1.333)
aspectR = self.rng.uniform(0.75, 1.333)
ww = int(np.sqrt(targetArea * aspectR))
hh = int(np.sqrt(targetArea / aspectR))
if self.rng.uniform() < 0.5:
......@@ -141,10 +145,10 @@ def get_data(train_or_test):
if hh <= h and ww <= w:
x1 = 0 if w == ww else self.rng.randint(0, w - ww)
y1 = 0 if h == hh else self.rng.randint(0, h - hh)
out = img[y1:y1+hh,x1:x1+ww]
out = cv2.resize(out, (224,224), interpolation=cv2.INTER_CUBIC)
out = img[y1:y1 + hh, x1:x1 + ww]
out = cv2.resize(out, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
out = cv2.resize(img, (224,224), interpolation=cv2.INTER_CUBIC)
out = cv2.resize(img, (224, 224), interpolation=cv2.INTER_CUBIC)
return out
augmentors = [
......@@ -155,9 +159,9 @@ def get_data(train_or_test):
imgaug.Saturation(0.4),
imgaug.Lighting(0.1,
eigval=[0.2175, 0.0188, 0.0045],
eigvec=[[ -0.5675, 0.7192, 0.4009],
[ -0.5808, -0.0045, -0.8140],
[ -0.5836, -0.6948, 0.4203]]
eigvec=[[-0.5675, 0.7192, 0.4009],
[-0.5808, -0.0045, -0.8140],
[-0.5836, -0.6948, 0.4203]]
)]),
imgaug.Clip(),
imgaug.Flip(horiz=True),
......@@ -175,6 +179,7 @@ def get_data(train_or_test):
ds = PrefetchDataZMQ(ds, min(12, multiprocessing.cpu_count()))
return ds
def get_config():
# prepare dataset
dataset_train = get_data('train')
......@@ -198,6 +203,7 @@ def get_config():
max_epoch=110,
)
def eval_on_ILSVRC12(model_file, data_dir):
ds = get_data('val')
pred_config = PredictConfig(
......
......@@ -7,7 +7,8 @@
import cv2
import tensorflow as tf
import argparse
import os, re
import os
import re
import numpy as np
import six
from six.moves import zip
......@@ -22,7 +23,9 @@ from tensorpack.dataflow.dataset import ILSVRCMeta
MODEL_DEPTH = None
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 224, 224, 3], 'input'),
InputVar(tf.int32, [None], 'label')]
......@@ -64,9 +67,9 @@ class Model(ModelDesc):
return l
cfg = {
50: ([3,4,6,3]),
101: ([3,4,23,3]),
152: ([3,8,36,3])
50: ([3, 4, 6, 3]),
101: ([3, 4, 23, 3]),
152: ([3, 8, 36, 3])
}
defs = cfg[MODEL_DEPTH]
......@@ -74,7 +77,7 @@ class Model(ModelDesc):
W_init=variance_scaling_initializer(mode='FAN_OUT')):
# tensorflow with padding=SAME will by default pad [2,3] here.
# but caffe conv with stride will pad [3,3]
image = tf.pad(image, [[0,0],[3,3],[3,3],[0,0]])
image = tf.pad(image, [[0, 0], [3, 3], [3, 3], [0, 0]])
fc1000 = (LinearWrap(image)
.Conv2D('conv0', 64, 7, stride=2, nl=BNReLU, padding='VALID')
.MaxPooling('pool0', shape=3, stride=2, padding='SAME')
......@@ -89,16 +92,17 @@ class Model(ModelDesc):
nr_wrong = prediction_incorrect(fc1000, label, name='wrong-top1')
nr_wrong = prediction_incorrect(fc1000, label, 5, name='wrong-top5')
def get_inference_augmentor():
# load ResNet mean from Kaiming:
#from tensorpack.utils.loadcaffe import get_caffe_pb
#obj = get_caffe_pb().BlobProto()
#obj.ParseFromString(open('ResNet_mean.binaryproto').read())
# obj.ParseFromString(open('ResNet_mean.binaryproto').read())
#pp_mean_224 = np.array(obj.data).reshape(3, 224, 224).transpose(1,2,0)
meta = ILSVRCMeta()
pp_mean = meta.get_per_pixel_mean()
pp_mean_224 = pp_mean[16:-16,16:-16,:]
pp_mean_224 = pp_mean[16:-16, 16:-16, :]
transformers = imgaug.AugmentorList([
imgaug.ResizeShortestEdge(256),
......@@ -107,6 +111,7 @@ def get_inference_augmentor():
])
return transformers
def run_test(params, input):
pred_config = PredictConfig(
model=Model(),
......@@ -119,7 +124,7 @@ def run_test(params, input):
prepro = get_inference_augmentor()
im = cv2.imread(input).astype('float32')
im = prepro.augment(im)
im = np.reshape( im, (1, 224, 224, 3))
im = np.reshape(im, (1, 224, 224, 3))
outputs = predict_func([im])
prob = outputs[0]
......@@ -128,6 +133,7 @@ def run_test(params, input):
meta = ILSVRCMeta().get_synset_words_1000()
print([meta[k] for k in ret])
def eval_on_ILSVRC12(params, data_dir):
ds = dataset.ILSVRC12(data_dir, 'val', shuffle=False, dir_structure='train')
ds = AugmentImageComponent(ds, get_inference_augmentor())
......@@ -147,6 +153,7 @@ def eval_on_ILSVRC12(params, data_dir):
print("Top1 Error: {}".format(acc1.ratio))
print("Top5 Error: {}".format(acc5.ratio))
def name_conversion(caffe_layer_name):
""" Convert a caffe parameter name to a tensorflow parameter name as
defined in the above model """
......@@ -178,7 +185,7 @@ def name_conversion(caffe_layer_name):
layer_id = re.search('_branch[0-9]([a-z])/', caffe_layer_name).group(1)
layer_id = ord(layer_id) - ord('a') + 1
TYPE_DICT = {'res':'conv', 'bn':'bn'}
TYPE_DICT = {'res': 'conv', 'bn': 'bn'}
tf_name = caffe_layer_name[caffe_layer_name.index('/'):]
layer_type = TYPE_DICT[layer_type] + \
......
......@@ -25,6 +25,7 @@ Model = cifar_example.Model
BATCH_SIZE = 128
def get_data(train_or_test):
isTrain = train_or_test == 'train'
pp_mean = dataset.SVHNDigit.get_per_pixel_mean()
......@@ -39,9 +40,9 @@ def get_data(train_or_test):
augmentors = [
imgaug.CenterPaste((40, 40)),
imgaug.Brightness(10),
imgaug.Contrast((0.8,1.2)),
imgaug.Contrast((0.8, 1.2)),
imgaug.GaussianDeform( # this is slow. without it, can only reach 1.9% error
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3),
imgaug.RandomCrop((32, 32)),
imgaug.MapImage(lambda x: x - pp_mean),
......@@ -56,6 +57,7 @@ def get_data(train_or_test):
ds = PrefetchData(ds, 5, 5)
return ds
def get_config():
logger.auto_set_dir()
......@@ -72,7 +74,7 @@ def get_config():
StatPrinter(),
ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]),
[ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)])
]),
......
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import argparse
from tensorpack import *
......@@ -15,13 +16,15 @@ IMAGE_SIZE = 42
WARP_TARGET_SIZE = 28
HALF_DIFF = (IMAGE_SIZE - WARP_TARGET_SIZE) // 2
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE, 2), 'input'),
InputVar(tf.int32, (None,), 'label') ]
InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars):
xys = np.array([(y,x,1) for y in range(WARP_TARGET_SIZE)
xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
for x in range(WARP_TARGET_SIZE)], dtype='float32')
xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3
......@@ -55,11 +58,11 @@ class Model(ModelDesc):
sampled2 = get_stn(image)
# For visualization in tensorboard
padded1 = tf.pad(sampled1, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]])
padded2 = tf.pad(sampled2, [[0,0],[HALF_DIFF,HALF_DIFF],[HALF_DIFF,HALF_DIFF],[0,0]])
img_orig = tf.concat(1, [image[:,:,:,0], image[:,:,:,1]]) #b x 2h x w
transform1 = tf.concat(1, [padded1[:,:,:,0], padded1[:,:,:,1]])
transform2 = tf.concat(1, [padded2[:,:,:,0], padded2[:,:,:,1]])
padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
img_orig = tf.concat(1, [image[:, :, :, 0], image[:, :, :, 1]]) # b x 2h x w
transform1 = tf.concat(1, [padded1[:, :, :, 0], padded1[:, :, :, 1]])
transform2 = tf.concat(1, [padded2[:, :, :, 0], padded2[:, :, :, 1]])
stacked = tf.concat(2, [img_orig, transform1, transform2], 'viz')
tf.summary.image('visualize',
tf.expand_dims(stacked, -1), max_images=30)
......@@ -87,6 +90,7 @@ class Model(ModelDesc):
return [MapGradient(lambda grad: tf.clip_by_global_norm([grad], 5)[0][0]),
ScaleGradient([('STN.*', 0.1)]), SummaryGradient()]
def get_data(isTrain):
ds = dataset.Mnist('train' if isTrain else 'test')
# create augmentation for both training and testing
......@@ -105,6 +109,7 @@ def get_data(isTrain):
ds = BatchData(ds, 128)
return ds
def view_warp(modelpath):
pred = OfflinePredictor(PredictConfig(
session_init=get_model_loader(modelpath),
......@@ -117,8 +122,8 @@ def view_warp(modelpath):
[WARP_TARGET_SIZE, WARP_TARGET_SIZE, 1],
[0, WARP_TARGET_SIZE, 1]], dtype='float32')
def draw_rect(img, affine, c, offset=[0,0]):
a = np.transpose(affine) #3x2
def draw_rect(img, affine, c, offset=[0, 0]):
a = np.transpose(affine) # 3x2
a = (np.matmul(xys, a) + offset).astype('int32')
cv2.line(img, tuple(a[0][::-1]), tuple(a[1][::-1]), c)
cv2.line(img, tuple(a[1][::-1]), tuple(a[2][::-1]), c)
......@@ -133,11 +138,12 @@ def view_warp(modelpath):
for idx, viz in enumerate(outputs):
viz = cv2.cvtColor(viz, cv2.COLOR_GRAY2BGR)
# Here we assume the second branch focuses on the first digit
draw_rect(viz, affine2[idx], (0,0,255))
draw_rect(viz, affine1[idx], (0,0,255), offset=[IMAGE_SIZE, 0])
draw_rect(viz, affine2[idx], (0, 0, 255))
draw_rect(viz, affine1[idx], (0, 0, 255), offset=[IMAGE_SIZE, 0])
cv2.imwrite('{:03d}.png'.format(idx), (viz + 0.5) * 255)
break
def get_config():
logger.auto_set_dir()
......@@ -152,7 +158,7 @@ def get_config():
callbacks=Callbacks([
StatPrinter(), ModelSaver(),
InferenceRunner(dataset_test,
[ScalarStats('cost'), ClassificationError() ]),
[ScalarStats('cost'), ClassificationError()]),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
]),
session_config=get_default_sess_config(0.5),
......@@ -176,4 +182,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train()
......@@ -2,7 +2,8 @@
# -*- coding: utf-8 -*-
# File: create-lmdb.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
import sys, os
import sys
import os
import scipy.io.wavfile as wavfile
import string
import numpy as np
......@@ -14,10 +15,12 @@ from tensorpack.utils.stats import OnlineMoments
import bob.ap
CHARSET = set(string.ascii_lowercase + ' ')
PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(',')
PHONEME_LIST = "aa,ae,ah,ao,aw,ax,ax-h,axr,ay,b,bcl,ch,d,dcl,dh,dx,eh,el,em,en,eng,epi,er,ey,f,g,gcl,h#,hh,hv,ih,ix,iy,jh,k,kcl,l,m,n,ng,nx,ow,oy,p,pau,pcl,q,r,s,sh,t,tcl,th,uh,uw,ux,v,w,y,z,zh".split(
',')
PHONEME_DIC = {v: k for k, v in enumerate(PHONEME_LIST)}
WORD_DIC = {v: k for k, v in enumerate(string.ascii_lowercase + ' ')}
def read_timit_txt(f):
f = open(f)
line = f.readlines()[0].strip().split(' ')
......@@ -30,6 +33,7 @@ def read_timit_txt(f):
ret.append(WORD_DIC[c])
return np.asarray(ret)
def read_timit_phoneme(f):
f = open(f)
pho = []
......@@ -39,6 +43,7 @@ def read_timit_phoneme(f):
f.close()
return np.asarray(pho)
@memoized
def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5,
n_filters=55, n_ceps=15, f_min=0., f_max=6000,
......@@ -48,6 +53,7 @@ def get_bob_extractor(fs, win_length_ms=10, win_shift_ms=5,
f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm)
return ret
def diff_feature(feat, nd=1):
diff = feat[1:] - feat[:-1]
feat = feat[1:]
......@@ -57,6 +63,7 @@ def diff_feature(feat, nd=1):
d2 = diff[1:] - diff[:-1]
return np.concatenate((feat[1:], diff[1:], d2), axis=1)
def get_feature(f):
fs, signal = wavfile.read(f)
signal = signal.astype('float64')
......@@ -64,7 +71,9 @@ def get_feature(f):
feat = diff_feature(feat, nd=2)
return feat
class RawTIMIT(DataFlow):
def __init__(self, dirname, label='phoneme'):
self.dirname = dirname
assert os.path.isdir(dirname), dirname
......@@ -87,12 +96,13 @@ class RawTIMIT(DataFlow):
label = read_timit_txt(f[:-4] + '.TXT')
yield [feat, label]
def compute_mean_std(db, fname):
ds = LMDBDataPoint(db, shuffle=False)
o = OnlineMoments()
with get_tqdm(total=ds.size()) as bar:
for dp in ds.get_data():
feat = dp[0] #len x dim
feat = dp[0] # len x dim
for f in feat:
o.feed(f)
bar.update()
......@@ -119,4 +129,3 @@ if __name__ == '__main__':
dftools.dump_dataflow_to_lmdb(ds, args.db)
elif args.command == 'stat':
compute_mean_std(args.db, args.output)
......@@ -9,15 +9,17 @@ from six.moves import range
__all__ = ['TIMITBatch']
def batch_feature(feats):
# pad to the longest in the batch
maxlen = max([k.shape[0] for k in feats])
bsize = len(feats)
ret = np.zeros((bsize, maxlen, feats[0].shape[1]))
for idx, feat in enumerate(feats):
ret[idx,:feat.shape[0],:] = feat
ret[idx, :feat.shape[0], :] = feat
return ret
def sparse_label(labels):
maxlen = max([k.shape[0] for k in labels])
shape = [len(labels), maxlen] # bxt
......@@ -31,7 +33,9 @@ def sparse_label(labels):
values = np.asarray(values)
return (indices, values, shape)
class TIMITBatch(ProxyDataFlow):
def __init__(self, ds, batch):
self.batch = batch
self.ds = ds
......@@ -52,4 +56,3 @@ class TIMITBatch(ProxyDataFlow):
batchlab = sparse_label(labs)
seqlen = np.asarray([k.shape[0] for k in feats])
yield [batchfeat, batchlab[0], batchlab[1], batchlab[2], seqlen]
......@@ -5,7 +5,8 @@
import tensorflow as tf
import numpy as np
import os, sys
import os
import sys
import argparse
from collections import Counter
import operator
......@@ -24,10 +25,12 @@ HIDDEN = 128
NR_CLASS = 61 + 1
FEATUREDIM = 39
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, None, FEATUREDIM], 'feat'), # bxmaxseqx39
InputVar(tf.int64, None, 'labelidx'), #label is b x maxlen, sparse
InputVar(tf.int64, None, 'labelidx'), # label is b x maxlen, sparse
InputVar(tf.int32, None, 'labelvalue'),
InputVar(tf.int64, None, 'labelshape'),
InputVar(tf.int32, [None], 'seqlen'), # b
......@@ -56,7 +59,7 @@ class Model(ModelDesc):
self.cost = tf.reduce_mean(loss, name='cost')
logits = tf.transpose(logits, [1,0,2])
logits = tf.transpose(logits, [1, 0, 2])
isTrain = get_current_tower_context().is_training
if isTrain:
......@@ -72,7 +75,8 @@ class Model(ModelDesc):
summary.add_moving_summary(err)
def get_gradient_processor(self):
return [GlobalNormClip(5), SummaryGradient() ]
return [GlobalNormClip(5), SummaryGradient()]
def get_data(path, isTrain, stat_file):
ds = LMDBDataPoint(path, shuffle=isTrain)
......@@ -83,6 +87,7 @@ def get_data(path, isTrain, stat_file):
ds = PrefetchDataZMQ(ds, 1)
return ds
def get_config(ds_train, ds_test):
step_per_epoch = ds_train.size()
......@@ -124,4 +129,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -5,7 +5,8 @@
import tensorflow as tf
import numpy as np
import os, sys
import os
import sys
import argparse
from collections import Counter
import operator
......@@ -27,7 +28,9 @@ param.vocab_size = None
param.softmax_temprature = 1
param.corpus = 'input.txt'
class CharRNNData(RNGDataFlow):
def __init__(self, input_file, size):
self.seq_length = param.seq_len
self._size = size
......@@ -58,9 +61,10 @@ class CharRNNData(RNGDataFlow):
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.int32, (None, param.seq_len), 'input'),
InputVar(tf.int32, (None, param.seq_len), 'nextinput') ]
InputVar(tf.int32, (None, param.seq_len), 'nextinput')]
def _build_graph(self, input_vars):
input, nextinput = input_vars
......@@ -73,7 +77,7 @@ class Model(ModelDesc):
embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
input_feature = tf.nn.embedding_lookup(embeddingW, input) # B x seqlen x rnnsize
input_list = tf.unstack(input_feature, axis=1) #seqlen x (Bxrnnsize)
input_list = tf.unstack(input_feature, axis=1) # seqlen x (Bxrnnsize)
# seqlen is 1 in inference. don't need loop_function
outputs, last_state = tf.nn.rnn(cell, input_list, initial, scope='rnnlm')
......@@ -92,6 +96,7 @@ class Model(ModelDesc):
def get_gradient_processor(self):
return [GlobalNormClip(5)]
def get_config():
logger.auto_set_dir()
......@@ -114,6 +119,8 @@ def get_config():
)
# TODO rewrite using Predictor interface
def sample(path, start, length):
"""
:param path: path to the model
......@@ -130,7 +137,7 @@ def sample(path, start, length):
sess = tf.Session()
tfutils.SaverRestore(path).init(sess)
dummy_input = np.zeros((1,1), dtype='int32')
dummy_input = np.zeros((1, 1), dtype='int32')
with sess.as_default():
# feed the starting sentence
state = model.initial.eval({input_vars[0]: dummy_input})
......@@ -181,4 +188,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
QueueInputTrainer(config).train()
......@@ -22,7 +22,9 @@ Cifar10:
Not a good model for Cifar100, just for demonstration.
"""
class Model(ModelDesc):
def __init__(self, cifar_classnum):
super(Model, self).__init__()
self.cifar_classnum = cifar_classnum
......@@ -72,6 +74,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train'
if cifar_classnum == 10:
......@@ -83,10 +86,10 @@ def get_data(train_or_test, cifar_classnum):
imgaug.RandomCrop((30, 30)),
imgaug.Flip(horiz=True),
imgaug.Brightness(63),
imgaug.Contrast((0.2,1.8)),
imgaug.Contrast((0.2, 1.8)),
imgaug.GaussianDeform(
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(30,30), 0.2, 3),
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(30, 30), 0.2, 3),
imgaug.MeanVarianceNormalize(all_channel=True)
]
else:
......@@ -100,6 +103,7 @@ def get_data(train_or_test, cifar_classnum):
ds = PrefetchData(ds, 3, 2)
return ds
def get_config(cifar_classnum):
logger.auto_set_dir()
......@@ -111,6 +115,7 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5)
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
def lr_func(lr):
if lr < 3e-5:
raise StopTraining()
......
......@@ -6,7 +6,9 @@
from __future__ import print_function
import tensorflow as tf
import numpy as np
import os, cv2, argparse
import os
import cv2
import argparse
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
......@@ -19,9 +21,11 @@ Usage:
./load-alexnet.py --load alexnet.npy --input cat.png
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 227, 227, 3), 'input') ]
return [InputVar(tf.float32, (None, 227, 227, 3), 'input')]
def _build_graph(self, inputs):
# img: 227x227x3
......@@ -48,6 +52,7 @@ class Model(ModelDesc):
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig(
......@@ -59,8 +64,8 @@ def run_test(path, input):
im = cv2.imread(input)
assert im is not None, input
im = cv2.resize(im, (227, 227))[:,:,::-1].reshape(
(1,227,227,3)).astype('float32') - 110
im = cv2.resize(im, (227, 227))[:, :, ::-1].reshape(
(1, 227, 227, 3)).astype('float32') - 110
outputs = predict_func([im])[0]
prob = outputs[0]
ret = prob.argsort()[-10:][::-1]
......
......@@ -7,7 +7,8 @@ from __future__ import print_function
import cv2
import tensorflow as tf
import numpy as np
import os, argparse
import os
import argparse
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
......@@ -20,9 +21,11 @@ Usage:
./load-vgg16.py --load vgg16.npy --input cat.png
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, (None, 224, 224, 3), 'input') ]
return [InputVar(tf.float32, (None, 224, 224, 3), 'input')]
def _build_graph(self, inputs):
image = inputs[0]
......@@ -58,6 +61,7 @@ class Model(ModelDesc):
.FullyConnected('fc8', out_dim=1000, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob')
def run_test(path, input):
param_dict = np.load(path, encoding='latin1').item()
predict_func = OfflinePredictor(PredictConfig(
......@@ -70,7 +74,7 @@ def run_test(path, input):
im = cv2.imread(input)
assert im is not None, input
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
im = cv2.resize(im, (224, 224)).reshape((1,224,224,3)).astype('float32')
im = cv2.resize(im, (224, 224)).reshape((1, 224, 224, 3)).astype('float32')
im = im - 110
outputs = predict_func([im])[0]
prob = outputs[0]
......
......@@ -5,7 +5,8 @@
import numpy as np
import tensorflow as tf
import os, sys
import os
import sys
import argparse
"""
......@@ -18,12 +19,14 @@ from tensorpack import *
IMAGE_SIZE = 28
class Model(ModelDesc):
def _get_input_vars(self):
"""Define all the input variables (with type, shape, name) that'll be
fed into the graph to produce a cost. """
return [InputVar(tf.float32, (None, IMAGE_SIZE, IMAGE_SIZE), 'input'),
InputVar(tf.int32, (None,), 'label') ]
InputVar(tf.int32, (None,), 'label')]
def _build_graph(self, input_vars):
"""This function should build the model which takes the input variables
......@@ -59,7 +62,8 @@ class Model(ModelDesc):
.FullyConnected('fc1', out_dim=10, nl=tf.identity)())
prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) # a vector of length B with loss of each sample
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, label) # a vector of length B with loss of each sample
cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss
# compute the "incorrect vector", for the callback ClassificationError to use at validation time
......@@ -83,11 +87,13 @@ class Model(ModelDesc):
summary.add_param_summary([('.*/W', ['histogram'])])
self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
test = BatchData(dataset.Mnist('test'), 256, remainder=True)
return train, test
def get_config():
# automatically setup the directory train_log/mnist-convnet for logging
logger.auto_set_dir()
......@@ -135,4 +141,3 @@ if __name__ == '__main__':
if args.load:
config.session_init = SaverRestore(args.load)
SimpleTrainer(config).train()
......@@ -20,10 +20,12 @@ Each epoch iterates over the whole training set (4721 iterations).
Speed is about 43 it/s on TitanX.
"""
class Model(ModelDesc):
def _get_input_vars(self):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ]
InputVar(tf.int32, [None], 'label')]
def _build_graph(self, input_vars):
image, label = input_vars
......@@ -58,6 +60,7 @@ class Model(ModelDesc):
add_param_summary([('.*/W', ['histogram', 'rms'])]) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data():
d1 = dataset.SVHNDigit('train')
d2 = dataset.SVHNDigit('extra')
......@@ -67,20 +70,21 @@ def get_data():
augmentors = [
imgaug.Resize((40, 40)),
imgaug.Brightness(30),
imgaug.Contrast((0.5,1.5)),
imgaug.Contrast((0.5, 1.5)),
imgaug.GaussianDeform( # this is slow. only use it when you have lots of cpus
[(0.2, 0.2), (0.2, 0.8), (0.8,0.8), (0.8,0.2)],
(40,40), 0.2, 3),
[(0.2, 0.2), (0.2, 0.8), (0.8, 0.8), (0.8, 0.2)],
(40, 40), 0.2, 3),
]
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
data_train = PrefetchData(data_train, 5, 5)
augmentors = [ imgaug.Resize((40, 40)) ]
augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True)
return data_train, data_test
def get_config():
logger.auto_set_dir()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment