Commit e69034b5 authored by Yuxin Wu's avatar Yuxin Wu

change get_cost to build_graph

parent c0bf0772
...@@ -26,14 +26,11 @@ Learning rate may need a different schedule for different number of GPUs (becaus ...@@ -26,14 +26,11 @@ Learning rate may need a different schedule for different number of GPUs (becaus
""" """
class Model(ModelDesc): class Model(ModelDesc):
def __init__(self):
super(Model, self).__init__()
def _get_input_vars(self): def _get_input_vars(self):
return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'), return [InputVar(tf.float32, [None, INPUT_SHAPE, INPUT_SHAPE, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label') ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
image, label = input_vars image, label = input_vars
image = image / 128.0 image = image / 128.0
...@@ -121,7 +118,7 @@ class Model(ModelDesc): ...@@ -121,7 +118,7 @@ class Model(ModelDesc):
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
return tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
......
...@@ -42,7 +42,7 @@ class Model(ModelDesc): ...@@ -42,7 +42,7 @@ class Model(ModelDesc):
InputVar(tf.int32, [None], 'label') InputVar(tf.int32, [None], 'label')
] ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
image, label = input_vars image, label = input_vars
image = image / 128.0 - 1 image = image / 128.0 - 1
...@@ -120,7 +120,7 @@ class Model(ModelDesc): ...@@ -120,7 +120,7 @@ class Model(ModelDesc):
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
return tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# File: svhn_resnet.py # File: svhn-resnet.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
import tensorflow as tf import tensorflow as tf
...@@ -37,7 +37,7 @@ class Model(ModelDesc): ...@@ -37,7 +37,7 @@ class Model(ModelDesc):
InputVar(tf.int32, [None], 'label') InputVar(tf.int32, [None], 'label')
] ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
image, label = input_vars image, label = input_vars
image = image / 128.0 - 1 image = image / 128.0 - 1
...@@ -117,7 +117,7 @@ class Model(ModelDesc): ...@@ -117,7 +117,7 @@ class Model(ModelDesc):
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
return tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
......
...@@ -77,7 +77,7 @@ class Model(ModelDesc): ...@@ -77,7 +77,7 @@ class Model(ModelDesc):
return [InputVar(tf.int32, (None, param.seq_len), 'input'), return [InputVar(tf.int32, (None, param.seq_len), 'input'),
InputVar(tf.int32, (None, param.seq_len), 'nextinput') ] InputVar(tf.int32, (None, param.seq_len), 'nextinput') ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
input, nextinput = input_vars input, nextinput = input_vars
cell = rnn_cell.BasicLSTMCell(num_units=param.rnn_size) cell = rnn_cell.BasicLSTMCell(num_units=param.rnn_size)
...@@ -101,9 +101,8 @@ class Model(ModelDesc): ...@@ -101,9 +101,8 @@ class Model(ModelDesc):
xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits, symbolic_functions.flatten(nextinput)) logits, symbolic_functions.flatten(nextinput))
xent_loss = tf.reduce_mean(xent_loss, name='xent_loss') self.cost = tf.reduce_mean(xent_loss, name='cost')
summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W
return tf.add_n([xent_loss], name='cost')
def get_gradient_processor(self): def get_gradient_processor(self):
return [MapGradient(lambda grad: tf.clip_by_global_norm( return [MapGradient(lambda grad: tf.clip_by_global_norm(
...@@ -147,7 +146,7 @@ def sample(path, start, length): ...@@ -147,7 +146,7 @@ def sample(path, start, length):
model = Model() model = Model()
input_vars = model.get_input_vars() input_vars = model.get_input_vars()
model.get_cost(input_vars, False) model.build_graph(input_vars, False)
sess = tf.Session() sess = tf.Session()
tfutils.SaverRestore(path).init(sess) tfutils.SaverRestore(path).init(sess)
......
...@@ -28,7 +28,7 @@ class Model(ModelDesc): ...@@ -28,7 +28,7 @@ class Model(ModelDesc):
InputVar(tf.int32, [None], 'label') InputVar(tf.int32, [None], 'label')
] ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
image, label = input_vars image, label = input_vars
keep_prob = tf.constant(0.5 if is_training else 1.0) keep_prob = tf.constant(0.5 if is_training else 1.0)
...@@ -73,7 +73,7 @@ class Model(ModelDesc): ...@@ -73,7 +73,7 @@ class Model(ModelDesc):
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram'])]) # monitor W add_param_summary([('.*/W', ['histogram'])]) # monitor W
return tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
......
#!/usr/bin/env python2 #!/usr/bin/env python2
# -*- coding: UTF-8 -*- # -*- coding: UTF-8 -*-
# File: load_alexnet.py # File: load-alexnet.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
import cv2 # tf bug
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os import os
...@@ -29,7 +30,7 @@ class Model(ModelDesc): ...@@ -29,7 +30,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, (None, 227, 227, 3), 'input'), return [InputVar(tf.float32, (None, 227, 227, 3), 'input'),
InputVar(tf.int32, (None,), 'label') ] InputVar(tf.int32, (None,), 'label') ]
def _get_cost(self, inputs, is_training): def _build_graph(self, inputs, is_training):
# img: 227x227x3 # img: 227x227x3
is_training = bool(is_training) is_training = bool(is_training)
keep_prob = tf.constant(0.5 if is_training else 1.0) keep_prob = tf.constant(0.5 if is_training else 1.0)
...@@ -59,63 +60,6 @@ class Model(ModelDesc): ...@@ -59,63 +60,6 @@ class Model(ModelDesc):
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
y = one_hot(label, 1000)
cost = tf.nn.softmax_cross_entropy_with_logits(logits, y)
cost = tf.reduce_mean(cost, name='cross_entropy_loss')
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost)
# compute the number of failed samples, for ValidationError to use at test time
wrong = tf.not_equal(
tf.cast(tf.argmax(prob, 1), tf.int32), label)
wrong = tf.cast(wrong, tf.float32)
nr_wrong = tf.reduce_sum(wrong, name='wrong')
# monitor training error
tf.add_to_collection(
MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error'))
# weight decay on all W of fc layers
wd_cost = tf.mul(1e-4,
regularize_cost('fc.*/W', tf.nn.l2_loss),
name='regularize_loss')
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
return tf.add_n([wd_cost, cost], name='cost')
def get_config():
basename = os.path.basename(__file__)
logger.set_logger_dir(
os.path.join('train_log', basename[:basename.rfind('.')]))
dataset_train = FakeData([(227,227,3), tuple()], 10)
dataset_train = BatchData(dataset_train, 10)
step_per_epoch = 1
sess_config = get_default_sess_config()
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5
lr = tf.train.exponential_decay(
learning_rate=1e-8,
global_step=get_global_step_var(),
decay_steps=dataset_train.size() * 50,
decay_rate=0.1, staircase=True, name='learning_rate')
tf.scalar_summary('learning_rate', lr)
param_dict = np.load('alexnet.npy').item()
return TrainConfig(
dataset=dataset_train,
optimizer=tf.train.AdamOptimizer(lr),
callbacks=Callbacks([
StatPrinter(),
ModelSaver(),
#ValidationError(dataset_test, prefix='test'),
]),
session_config=sess_config,
model=Model(),
step_per_epoch=step_per_epoch,
session_init=ParamRestore(param_dict),
max_epoch=100,
)
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path).item() param_dict = np.load(path).item()
...@@ -152,7 +96,5 @@ if __name__ == '__main__': ...@@ -152,7 +96,5 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
if args.gpu: if args.gpu:
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
#start_train(get_config())
# run alexnet with given model (in npy format) # run alexnet with given model (in npy format)
run_test(args.load, args.input) run_test(args.load, args.input)
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# File: load-vgg16.py # File: load-vgg16.py
# Author: Yuxin Wu <ppwwyyxx@gmail.com> # Author: Yuxin Wu <ppwwyyxx@gmail.com>
import cv2
import tensorflow as tf import tensorflow as tf
import numpy as np import numpy as np
import os import os
...@@ -31,7 +32,7 @@ class Model(ModelDesc): ...@@ -31,7 +32,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, (None, 224, 224, 3), 'input'), return [InputVar(tf.float32, (None, 224, 224, 3), 'input'),
InputVar(tf.int32, (None,), 'label') ] InputVar(tf.int32, (None,), 'label') ]
def _get_cost(self, inputs, is_training): def _build_graph(self, inputs, is_training):
is_training = bool(is_training) is_training = bool(is_training)
keep_prob = tf.constant(0.5 if is_training else 1.0) keep_prob = tf.constant(0.5 if is_training else 1.0)
...@@ -74,10 +75,6 @@ class Model(ModelDesc): ...@@ -74,10 +75,6 @@ class Model(ModelDesc):
logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity) logits = FullyConnected('fc8', l, out_dim=1000, nl=tf.identity)
prob = tf.nn.softmax(logits, name='output') prob = tf.nn.softmax(logits, name='output')
cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label)
cost = tf.reduce_mean(cost, name='cost')
return cost
def run_test(path, input): def run_test(path, input):
param_dict = np.load(path).item() param_dict = np.load(path).item()
......
...@@ -27,7 +27,7 @@ class Model(ModelDesc): ...@@ -27,7 +27,7 @@ class Model(ModelDesc):
InputVar(tf.int32, (None,), 'label') InputVar(tf.int32, (None,), 'label')
] ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
is_training = bool(is_training) is_training = bool(is_training)
keep_prob = tf.constant(0.5 if is_training else 1.0) keep_prob = tf.constant(0.5 if is_training else 1.0)
...@@ -69,7 +69,7 @@ class Model(ModelDesc): ...@@ -69,7 +69,7 @@ class Model(ModelDesc):
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W summary.add_param_summary([('.*/W', ['histogram'])]) # monitor histogram of all W
return tf.add_n([wd_cost, cost], name='cost') self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_config(): def get_config():
basename = os.path.basename(__file__) basename = os.path.basename(__file__)
......
...@@ -28,7 +28,7 @@ class Model(ModelDesc): ...@@ -28,7 +28,7 @@ class Model(ModelDesc):
return [InputVar(tf.float32, [None, 40, 40, 3], 'input'), return [InputVar(tf.float32, [None, 40, 40, 3], 'input'),
InputVar(tf.int32, [None], 'label') ] InputVar(tf.int32, [None], 'label') ]
def _get_cost(self, input_vars, is_training): def _build_graph(self, input_vars, is_training):
image, label = input_vars image, label = input_vars
keep_prob = tf.constant(0.5 if is_training else 1.0) keep_prob = tf.constant(0.5 if is_training else 1.0)
...@@ -64,7 +64,7 @@ class Model(ModelDesc): ...@@ -64,7 +64,7 @@ class Model(ModelDesc):
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost)
add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W add_param_summary([('.*/W', ['histogram', 'sparsity'])]) # monitor W
return tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def get_config(): def get_config():
#anchors = np.mgrid[0:4,0:4][:,1:,1:].transpose(1,2,0).reshape((-1,2)) / 4.0 #anchors = np.mgrid[0:4,0:4][:,1:,1:].transpose(1,2,0).reshape((-1,2)) / 4.0
......
...@@ -20,7 +20,8 @@ def create_test_graph(trainer): ...@@ -20,7 +20,8 @@ def create_test_graph(trainer):
global_step_var = tf.Variable( global_step_var = tf.Variable(
0, trainable=False, name=GLOBAL_STEP_OP_NAME) 0, trainable=False, name=GLOBAL_STEP_OP_NAME)
input_vars = model.get_input_vars() input_vars = model.get_input_vars()
cost = model.get_cost(input_vars, is_training=False) model.build_graph(input_vars, False)
cost = model.get_cost()
yield Gtest yield Gtest
@contextmanager @contextmanager
......
...@@ -39,7 +39,7 @@ class StatHolder(object): ...@@ -39,7 +39,7 @@ class StatHolder(object):
:param k: name :param k: name
:param v: value :param v: value
""" """
self.stat_now[k] = v self.stat_now[k] = float(v)
def set_print_tag(self, print_tag): def set_print_tag(self, print_tag):
""" """
...@@ -70,6 +70,8 @@ class StatHolder(object): ...@@ -70,6 +70,8 @@ class StatHolder(object):
def _write_stat(self): def _write_stat(self):
tmp_filename = self.filename + '.tmp' tmp_filename = self.filename + '.tmp'
with open(tmp_filename, 'w') as f: with open(tmp_filename, 'w') as f:
import IPython;
IPython.embed(config=IPython.terminal.ipapp.load_default_config())
json.dump(self.stat_history, f) json.dump(self.stat_history, f)
os.rename(tmp_filename, self.filename) os.rename(tmp_filename, self.filename)
......
...@@ -7,6 +7,7 @@ from abc import ABCMeta, abstractmethod ...@@ -7,6 +7,7 @@ from abc import ABCMeta, abstractmethod
import tensorflow as tf import tensorflow as tf
from collections import namedtuple from collections import namedtuple
from ..utils import logger
from ..tfutils import * from ..tfutils import *
__all__ = ['ModelDesc', 'InputVar'] __all__ = ['ModelDesc', 'InputVar']
...@@ -43,9 +44,10 @@ class ModelDesc(object): ...@@ -43,9 +44,10 @@ class ModelDesc(object):
def _get_input_vars(self): def _get_input_vars(self):
""":returns: a list of InputVar """ """:returns: a list of InputVar """
def get_cost(self, input_vars, is_training): def build_graph(self, model_inputs, is_training):
""" """
:param input_vars: a list of input variable in the graph setup the whole graph.
:param model_inputs: a list of input variable in the graph
e.g.: [image_var, label_var] with: e.g.: [image_var, label_var] with:
* image_var: bx28x28 * image_var: bx28x28
...@@ -53,12 +55,33 @@ class ModelDesc(object): ...@@ -53,12 +55,33 @@ class ModelDesc(object):
:param is_training: a boolean :param is_training: a boolean
:returns: the cost to minimize. a scalar variable :returns: the cost to minimize. a scalar variable
""" """
assert type(is_training) == bool self._build_graph(model_inputs, is_training)
return self._get_cost(input_vars, is_training)
@abstractmethod #@abstractmethod
def _get_cost(self, input_vars, is_training): def _build_graph(self, inputs, is_training):
pass if self._old_version():
self.model_inputs = inputs
self.is_training = is_training
else:
raise NotImplementedError()
def _old_version(self):
# for backward-compat only.
import inspect
args = inspect.getargspec(self._get_cost)
return len(args.args) == 3
def get_cost(self):
if self._old_version():
assert type(self.is_training) == bool
logger.warn("!!!using _get_cost to setup the graph is deprecated in favor of _build_graph")
logger.warn("See examples for details.")
return self._get_cost(self.model_inputs, self.is_training)
else:
return self._get_cost()
def _get_cost(self, *args):
return self.cost
def get_gradient_processor(self): def get_gradient_processor(self):
""" Return a list of GradientProcessor. They will be executed in order""" """ Return a list of GradientProcessor. They will be executed in order"""
......
...@@ -76,7 +76,7 @@ def get_predict_func(config): ...@@ -76,7 +76,7 @@ def get_predict_func(config):
# input/output variables # input/output variables
input_vars = config.model.get_input_vars() input_vars = config.model.get_input_vars()
cost_var = config.model.get_cost(input_vars, is_training=False) config.model._build_graph(input_vars, False)
if config.input_data_mapping is None: if config.input_data_mapping is None:
input_map = input_vars input_map = input_vars
else: else:
......
...@@ -29,7 +29,8 @@ class SimpleTrainer(Trainer): ...@@ -29,7 +29,8 @@ class SimpleTrainer(Trainer):
model = self.model model = self.model
input_vars = model.get_input_vars() input_vars = model.get_input_vars()
self.input_vars = input_vars self.input_vars = input_vars
cost_var = model.get_cost(input_vars, is_training=True) model.build_graph(input_vars, True)
cost_var = model.get_cost()
avg_maintain_op = summary_moving_average() avg_maintain_op = summary_moving_average()
grads = self.config.optimizer.compute_gradients(cost_var) grads = self.config.optimizer.compute_gradients(cost_var)
...@@ -133,7 +134,8 @@ class QueueInputTrainer(Trainer): ...@@ -133,7 +134,8 @@ class QueueInputTrainer(Trainer):
def _single_tower_grad(self): def _single_tower_grad(self):
""" Get grad and cost for single-tower case""" """ Get grad and cost for single-tower case"""
model_inputs = self._get_model_inputs() model_inputs = self._get_model_inputs()
cost_var = self.model.get_cost(model_inputs, is_training=True) self.model.build_graph(model_inputs, True)
cost_var = self.model.get_cost()
grads = self.config.optimizer.compute_gradients(cost_var) grads = self.config.optimizer.compute_gradients(cost_var)
tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost_var) tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost_var)
return grads return grads
...@@ -151,7 +153,8 @@ class QueueInputTrainer(Trainer): ...@@ -151,7 +153,8 @@ class QueueInputTrainer(Trainer):
tf.name_scope('tower{}'.format(i)) as scope: tf.name_scope('tower{}'.format(i)) as scope:
logger.info("Building graph for tower {}...".format(i)) logger.info("Building graph for tower {}...".format(i))
model_inputs = self._get_model_inputs() # each tower dequeue from input queue model_inputs = self._get_model_inputs() # each tower dequeue from input queue
cost_var = self.model.get_cost(model_inputs, is_training=True) # build tower self.model.build_graph(model_inputs, True)
cost_var = self.model.get_cost() # build tower
# gate_gradienst=0 seems to be faster? # gate_gradienst=0 seems to be faster?
grad_list.append( grad_list.append(
......
...@@ -25,7 +25,7 @@ class AtariDriver(object): ...@@ -25,7 +25,7 @@ class AtariDriver(object):
self.ale = ALEInterface() self.ale = ALEInterface()
self.rng = get_rng(self) self.rng = get_rng(self)
self.ale.setInt("random_seed", self.rng.randint(214)) self.ale.setInt("random_seed", self.rng.randint(999))
self.ale.setInt("frame_skip", frame_skip) self.ale.setInt("frame_skip", frame_skip)
self.ale.loadROM(rom_file) self.ale.loadROM(rom_file)
self.width, self.height = self.ale.getScreenDims() self.width, self.height = self.ale.getScreenDims()
......
...@@ -15,6 +15,9 @@ class StatCounter(object): ...@@ -15,6 +15,9 @@ class StatCounter(object):
def reset(self): def reset(self):
self.values = [] self.values = []
def count(self):
return len(self.values)
@property @property
def average(self): def average(self):
return np.mean(self.values) return np.mean(self.values)
...@@ -23,6 +26,10 @@ class StatCounter(object): ...@@ -23,6 +26,10 @@ class StatCounter(object):
def sum(self): def sum(self):
return np.sum(self.values) return np.sum(self.values)
@property
def max(self):
return max(self.values)
class Accuracy(object): class Accuracy(object):
def __init__(self): def __init__(self):
self.reset() self.reset()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment