Commit d1041a77 authored by Yuxin Wu's avatar Yuxin Wu

change interface for optimizer. (#137)

parent e0b1a5ce
...@@ -24,6 +24,7 @@ from tensorpack.utils.concurrency import * ...@@ -24,6 +24,7 @@ from tensorpack.utils.concurrency import *
from tensorpack.utils.serialize import * from tensorpack.utils.serialize import *
from tensorpack.utils.stats import * from tensorpack.utils.stats import *
from tensorpack.tfutils import symbolic_functions as symbf from tensorpack.tfutils import symbolic_functions as symbf
from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient
from tensorpack.RL import * from tensorpack.RL import *
from simulator import * from simulator import *
...@@ -132,9 +133,14 @@ class Model(ModelDesc): ...@@ -132,9 +133,14 @@ class Model(ModelDesc):
summary.add_moving_summary(policy_loss, xentropy_loss, summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage, self.cost) value_loss, pred_reward, advantage, self.cost)
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)), lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
gradproc.SummaryGradient()] opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()]
opt = optimizer.apply_grad_processors(opt, gradprocs)
return opt
class MySimulatorMaster(SimulatorMaster, Callback): class MySimulatorMaster(SimulatorMaster, Callback):
...@@ -202,11 +208,8 @@ def get_config(): ...@@ -202,11 +208,8 @@ def get_config():
master = MySimulatorMaster(namec2s, names2c, M) master = MySimulatorMaster(namec2s, names2c, M)
dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataflow, dataflow=dataflow,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]), ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]),
...@@ -269,8 +272,7 @@ if __name__ == '__main__': ...@@ -269,8 +272,7 @@ if __name__ == '__main__':
logger.warn("Without GPU this model will never learn! CPU is only useful for debug.") logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
nr_gpu = 0 nr_gpu = 0
PREDICTOR_THREAD = 1 PREDICTOR_THREAD = 1
predict_tower = [0] predict_tower, train_tower = [0], [0]
train_tower = [0]
trainer = QueueInputTrainer trainer = QueueInputTrainer
config = get_config() config = get_config()
if args.load: if args.load:
......
...@@ -14,7 +14,7 @@ import six ...@@ -14,7 +14,7 @@ import six
from six.moves import map, range from six.moves import map, range
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.gradproc import * from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip
from tensorpack.utils.globvars import globalns as param from tensorpack.utils.globvars import globalns as param
import tensorpack.tfutils.symbolic_functions as symbf import tensorpack.tfutils.symbolic_functions as symbf
from timitdata import TIMITBatch from timitdata import TIMITBatch
...@@ -73,8 +73,11 @@ class Model(ModelDesc): ...@@ -73,8 +73,11 @@ class Model(ModelDesc):
err = tf.reduce_mean(err, name='error') err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err, self.cost) summary.add_moving_summary(err, self.cost)
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.GlobalNormClip(5), gradproc.SummaryGradient()] lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [GlobalNormClip(5), SummaryGradient()])
def get_data(path, isTrain, stat_file): def get_data(path, isTrain, stat_file):
...@@ -88,13 +91,8 @@ def get_data(path, isTrain, stat_file): ...@@ -88,13 +91,8 @@ def get_data(path, isTrain, stat_file):
def get_config(ds_train, ds_test): def get_config(ds_train, ds_test):
steps_per_epoch = ds_train.size()
lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True)
return TrainConfig( return TrainConfig(
dataflow=ds_train, dataflow=ds_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
StatMonitorParamSetter('learning_rate', 'error', StatMonitorParamSetter('learning_rate', 'error',
...@@ -105,7 +103,6 @@ def get_config(ds_train, ds_test): ...@@ -105,7 +103,6 @@ def get_config(ds_train, ds_test):
every_k_epochs=2), every_k_epochs=2),
], ],
model=Model(), model=Model(),
steps_per_epoch=steps_per_epoch,
max_epoch=70, max_epoch=70,
) )
......
...@@ -14,7 +14,7 @@ import six ...@@ -14,7 +14,7 @@ import six
from six.moves import map, range from six.moves import map, range
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.gradproc import * from tensorpack.tfutils.gradproc import GlobalNormClip
from tensorpack.utils.lut import LookUpTable from tensorpack.utils.lut import LookUpTable
from tensorpack.utils.globvars import globalns as param from tensorpack.utils.globvars import globalns as param
rnn = tf.contrib.rnn rnn = tf.contrib.rnn
...@@ -42,7 +42,7 @@ class CharRNNData(RNGDataFlow): ...@@ -42,7 +42,7 @@ class CharRNNData(RNGDataFlow):
data = f.read() data = f.read()
if six.PY2: if six.PY2:
data = bytearray(data) data = bytearray(data)
data = [chr(c) for c in data if c < 128] # TODO this is Py2 only data = [chr(c) for c in data if c < 128]
counter = Counter(data) counter = Counter(data)
char_cnt = sorted(counter.items(), key=operator.itemgetter(1), reverse=True) char_cnt = sorted(counter.items(), key=operator.itemgetter(1), reverse=True)
self.chars = [x[0] for x in char_cnt] self.chars = [x[0] for x in char_cnt]
...@@ -105,8 +105,10 @@ class Model(ModelDesc): ...@@ -105,8 +105,10 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost) summary.add_moving_summary(self.cost)
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.GlobalNormClip(5)] lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)
opt = tf.train.AdamOptimizer(lr)
return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
def get_config(): def get_config():
...@@ -116,11 +118,8 @@ def get_config(): ...@@ -116,11 +118,8 @@ def get_config():
ds = BatchData(ds, param.batch_size) ds = BatchData(ds, param.batch_size)
steps_per_epoch = ds.size() steps_per_epoch = ds.size()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)
return TrainConfig( return TrainConfig(
dataflow=ds, dataflow=ds,
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(25, 2e-4)]) ScheduledHyperParamSetter('learning_rate', [(25, 2e-4)])
......
...@@ -149,9 +149,11 @@ class Model(ModelDesc): ...@@ -149,9 +149,11 @@ class Model(ModelDesc):
ops.append(v.assign(tf.get_default_graph().get_tensor_by_name(new_name + ':0'))) ops.append(v.assign(tf.get_default_graph().get_tensor_by_name(new_name + ':0')))
return tf.group(*ops, name='update_target_network') return tf.group(*ops, name='update_target_network')
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.GlobalNormalClip(10), lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
gradproc.SummaryGradient()] opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [gradproc.GlobalNormalClip(10), gradproc.SummaryGradient()])
def get_config(): def get_config():
...@@ -171,11 +173,8 @@ def get_config(): ...@@ -171,11 +173,8 @@ def get_config():
reward_clip=(-1, 1), reward_clip=(-1, 1),
history_len=FRAME_HISTORY) history_len=FRAME_HISTORY)
lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
...@@ -186,7 +185,7 @@ def get_config(): ...@@ -186,7 +185,7 @@ def get_config():
# HumanHyperParamSetter('learning_rate', 'hyper.txt'), # HumanHyperParamSetter('learning_rate', 'hyper.txt'),
# HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'), # HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'),
], ],
# save memory for multiprocess evaluator # save memory for multi-thread evaluator
session_config=get_default_sess_config(0.6), session_config=get_default_sess_config(0.6),
model=M, model=M,
steps_per_epoch=STEP_PER_EPOCH, steps_per_epoch=STEP_PER_EPOCH,
......
...@@ -41,7 +41,6 @@ def play_model(cfg): ...@@ -41,7 +41,6 @@ def play_model(cfg):
def eval_with_funcs(predict_funcs, nr_eval): def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread): class Worker(StoppableThread):
def __init__(self, func, queue): def __init__(self, func, queue):
super(Worker, self).__init__() super(Worker, self).__init__()
self._func = func self._func = func
......
...@@ -7,12 +7,12 @@ from tensorpack import ProxyDataFlow, get_rng ...@@ -7,12 +7,12 @@ from tensorpack import ProxyDataFlow, get_rng
class DisturbLabel(ProxyDataFlow): class DisturbLabel(ProxyDataFlow):
def __init__(self, ds, prob): def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds) super(DisturbLabel, self).__init__(ds)
self.prob = prob self.prob = prob
def reset_state(self): def reset_state(self):
super(DisturbLabel, self).reset_state()
self.rng = get_rng(self) self.rng = get_rng(self)
def get_data(self): def get_data(self):
......
...@@ -161,6 +161,10 @@ class Model(ModelDesc): ...@@ -161,6 +161,10 @@ class Model(ModelDesc):
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost) add_moving_summary(cost, wd_cost, self.cost)
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 1e-4, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)
def get_data(dataset_name): def get_data(dataset_name):
isTrain = dataset_name == 'train' isTrain = dataset_name == 'train'
...@@ -225,16 +229,11 @@ def get_data(dataset_name): ...@@ -225,16 +229,11 @@ def get_data(dataset_name):
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
# prepare dataset
data_train = get_data('train') data_train = get_data('train')
data_test = get_data('val') data_test = get_data('val')
lr = get_scalar_var('learning_rate', 1e-4, summary=True)
return TrainConfig( return TrainConfig(
dataflow=data_train, dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
# HumanHyperParamSetter('learning_rate'), # HumanHyperParamSetter('learning_rate'),
......
...@@ -125,6 +125,15 @@ class Model(ModelDesc): ...@@ -125,6 +125,15 @@ class Model(ModelDesc):
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost) add_moving_summary(cost, wd_cost, self.cost)
def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=4721 * 100,
decay_rate=0.5, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
...@@ -146,29 +155,19 @@ def get_config(): ...@@ -146,29 +155,19 @@ def get_config():
data_train = AugmentImageComponent(data_train, augmentors) data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128) data_train = BatchData(data_train, 128)
data_train = PrefetchDataZMQ(data_train, 5) data_train = PrefetchDataZMQ(data_train, 5)
steps_per_epoch = data_train.size()
augmentors = [imgaug.Resize((40, 40))] augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors) data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True) data_test = BatchData(data_test, 128, remainder=True)
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=data_train.size() * 100,
decay_rate=0.5, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return TrainConfig( return TrainConfig(
dataflow=data_train, dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(data_test, InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()]) [ScalarStats('cost'), ClassificationError()])
], ],
model=Model(), model=Model(),
steps_per_epoch=steps_per_epoch,
max_epoch=200, max_epoch=200,
) )
......
...@@ -90,20 +90,21 @@ class Model(GANModelDesc): ...@@ -90,20 +90,21 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg) self.build_losses(vecpos, vecneg)
self.collect_variables() self.collect_variables()
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
def get_data(): def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')]) ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH) return BatchData(ds, BATCH)
return ds
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset, dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ModelSaver()], callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5), session_config=get_default_sess_config(0.5),
model=Model(), model=Model(),
......
...@@ -88,6 +88,10 @@ class Model(GANModelDesc): ...@@ -88,6 +88,10 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg) self.build_losses(vecpos, vecneg)
self.collect_variables() self.collect_variables()
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
def get_data(): def get_data():
global args global args
...@@ -104,10 +108,8 @@ def get_data(): ...@@ -104,10 +108,8 @@ def get_data():
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset, dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ModelSaver()], callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5), session_config=get_default_sess_config(0.5),
model=Model(), model=Model(),
......
...@@ -65,12 +65,6 @@ class GANModelDesc(ModelDesc): ...@@ -65,12 +65,6 @@ class GANModelDesc(ModelDesc):
add_moving_summary(self.g_loss, self.d_loss, self.d_accuracy, self.g_accuracy) add_moving_summary(self.g_loss, self.d_loss, self.d_accuracy, self.g_accuracy)
def get_gradient_processor_g(self):
return [CheckGradient()]
def get_gradient_processor_d(self):
return [CheckGradient()]
class GANTrainer(FeedfreeTrainerBase): class GANTrainer(FeedfreeTrainerBase):
def __init__(self, config): def __init__(self, config):
...@@ -86,16 +80,12 @@ class GANTrainer(FeedfreeTrainerBase): ...@@ -86,16 +80,12 @@ class GANTrainer(FeedfreeTrainerBase):
# optimize G # optimize G
grads = self.config.optimizer.compute_gradients( grads = self.config.optimizer.compute_gradients(
self.model.g_loss, var_list=self.model.g_vars) self.model.g_loss, var_list=self.model.g_vars)
grads = apply_grad_processors(
grads, self.model.get_gradient_processor_g())
self.g_min = self.config.optimizer.apply_gradients(grads, name='g_op') self.g_min = self.config.optimizer.apply_gradients(grads, name='g_op')
# optimize D # optimize D
with tf.control_dependencies([self.g_min]): with tf.control_dependencies([self.g_min]):
grads = self.config.optimizer.compute_gradients( grads = self.config.optimizer.compute_gradients(
self.model.d_loss, var_list=self.model.d_vars) self.model.d_loss, var_list=self.model.d_vars)
grads = apply_grad_processors(
grads, self.model.get_gradient_processor_d())
self.d_min = self.config.optimizer.apply_gradients(grads, name='d_op') self.d_min = self.config.optimizer.apply_gradients(grads, name='d_op')
self.train_op = self.d_min self.train_op = self.d_min
......
...@@ -130,6 +130,10 @@ class Model(GANModelDesc): ...@@ -130,6 +130,10 @@ class Model(GANModelDesc):
self.collect_variables() self.collect_variables()
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
def split_input(img): def split_input(img):
""" """
...@@ -167,10 +171,8 @@ def get_data(): ...@@ -167,10 +171,8 @@ def get_data():
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data() dataset = get_data()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset, dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ callbacks=[
PeriodicTrigger(ModelSaver(), every_k_epochs=3), PeriodicTrigger(ModelSaver(), every_k_epochs=3),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)]) ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
......
...@@ -146,9 +146,12 @@ class Model(GANModelDesc): ...@@ -146,9 +146,12 @@ class Model(GANModelDesc):
# distinguish between variables of generator and discriminator updates # distinguish between variables of generator and discriminator updates
self.collect_variables() self.collect_variables()
def get_gradient_processor_g(self): def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
# generator learns 5 times faster # generator learns 5 times faster
return [gradproc.ScaleGradient(('.*', 5), log=False)] return optimizer.apply_grad_processors(
opt, [gradproc.ScaleGradient(('.*', 5), log=False)])
def get_data(): def get_data():
...@@ -159,11 +162,8 @@ def get_data(): ...@@ -159,11 +162,8 @@ def get_data():
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
dataset = get_data()
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset, dataflow=get_data(),
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6),
callbacks=[ModelSaver()], callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5), session_config=get_default_sess_config(0.5),
model=Model(), model=Model(),
......
...@@ -92,9 +92,12 @@ class Model(ModelDesc): ...@@ -92,9 +92,12 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost') self.cost = tf.add_n(costs, name='cost')
add_moving_summary(costs + [wrong, self.cost]) add_moving_summary(costs + [wrong, self.cost])
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.ScaleGradient([ lr = get_scalar_var('learning_rate', 3e-5, summary=True)
('convfcweight.*', 0.1), ('conv5_.*', 5)])] opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [gradproc.ScaleGradient(
[('convfcweight.*', 0.1), ('conv5_.*', 5)])])
def get_data(name): def get_data(name):
...@@ -102,7 +105,6 @@ def get_data(name): ...@@ -102,7 +105,6 @@ def get_data(name):
ds = dataset.BSDS500(name, shuffle=True) ds = dataset.BSDS500(name, shuffle=True)
class CropMultiple16(imgaug.ImageAugmentor): class CropMultiple16(imgaug.ImageAugmentor):
def _get_augment_params(self, img): def _get_augment_params(self, img):
newh = img.shape[0] // 16 * 16 newh = img.shape[0] // 16 * 16
neww = img.shape[1] // 16 * 16 neww = img.shape[1] // 16 * 16
...@@ -132,7 +134,7 @@ def get_data(name): ...@@ -132,7 +134,7 @@ def get_data(name):
shape_aug = [imgaug.CenterCrop(IMAGE_SHAPE)] shape_aug = [imgaug.CenterCrop(IMAGE_SHAPE)]
ds = AugmentImageComponents(ds, shape_aug, (0, 1)) ds = AugmentImageComponents(ds, shape_aug, (0, 1))
def f(m): def f(m): # thresholding
m[m >= 0.50] = 1 m[m >= 0.50] = 1
m[m < 0.50] = 0 m[m < 0.50] = 0
return m return m
...@@ -169,10 +171,8 @@ def get_config(): ...@@ -169,10 +171,8 @@ def get_config():
steps_per_epoch = dataset_train.size() * 40 steps_per_epoch = dataset_train.size() * 40
dataset_val = get_data('val') dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 3e-5, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(30, 6e-6), (45, 1e-6), (60, 8e-7)]), ScheduledHyperParamSetter('learning_rate', [(30, 6e-6), (45, 1e-6), (60, 8e-7)]),
......
...@@ -120,6 +120,10 @@ class Model(ModelDesc): ...@@ -120,6 +120,10 @@ class Model(ModelDesc):
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(wd_cost, self.cost) add_moving_summary(wd_cost, self.cost)
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return tf.train.MomentumOptimizer(lr, 0.9)
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -157,10 +161,8 @@ def get_config(): ...@@ -157,10 +161,8 @@ def get_config():
steps_per_epoch = 5000 steps_per_epoch = 5000
dataset_val = get_data('val') dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_val, [ InferenceRunner(dataset_val, [
......
...@@ -8,13 +8,12 @@ import argparse ...@@ -8,13 +8,12 @@ import argparse
import numpy as np import numpy as np
import os import os
import tensorflow as tf import tensorflow as tf
import multiprocessing
from tensorpack import * from tensorpack import *
from tensorpack.tfutils.symbolic_functions import * from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import * from tensorpack.tfutils.summary import *
import multiprocessing
""" """
InceptionV3 on ILSVRC12. InceptionV3 on ILSVRC12.
See "Rethinking the Inception Architecture for Computer Vision", arxiv:1512.00567 See "Rethinking the Inception Architecture for Computer Vision", arxiv:1512.00567
...@@ -195,6 +194,10 @@ class Model(ModelDesc): ...@@ -195,6 +194,10 @@ class Model(ModelDesc):
self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost') self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
add_moving_summary(loss1, loss2, wd_cost, self.cost) add_moving_summary(loss1, loss2, wd_cost, self.cost)
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-3)
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -261,10 +264,8 @@ def get_config(): ...@@ -261,10 +264,8 @@ def get_config():
dataset_train = get_data('train') dataset_train = get_data('train')
dataset_val = get_data('val') dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_val, [ InferenceRunner(dataset_val, [
......
...@@ -100,8 +100,11 @@ class Model(ModelDesc): ...@@ -100,8 +100,11 @@ class Model(ModelDesc):
s[1].c.assign(z), s[1].c.assign(z),
s[1].h.assign(z)) s[1].h.assign(z))
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.GlobalNormClip(5)] lr = symbolic_functions.get_scalar_var('learning_rate', 1, summary=True)
opt = tf.train.GradientDescentOptimizer(lr)
return optimizer.apply_grad_processors(
opt, [gradproc.GlobalNormClip(5)])
def get_config(): def get_config():
...@@ -120,12 +123,9 @@ def get_config(): ...@@ -120,12 +123,9 @@ def get_config():
(data3[1].shape[0] // BATCH - 1) // SEQ_LEN) (data3[1].shape[0] // BATCH - 1) // SEQ_LEN)
M = Model() M = Model()
lr = symbolic_functions.get_scalar_var('learning_rate', 1, summary=True)
return TrainConfig( return TrainConfig(
data=train_data, data=train_data,
model=M, model=M,
optimizer=tf.train.GradientDescentOptimizer(lr),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
HyperParamSetterWithFunc( HyperParamSetterWithFunc(
......
...@@ -12,8 +12,9 @@ Models can be [downloaded here](https://goo.gl/6XjK9V). ...@@ -12,8 +12,9 @@ Models can be [downloaded here](https://goo.gl/6XjK9V).
| ResNet 50 | 7.13% | 24.12% | | ResNet 50 | 7.13% | 24.12% |
| ResNet 101 | 6.54% | 22.89% | | ResNet 101 | 6.54% | 22.89% |
To train, just run:
```bash ```bash
./imagenet-resnet.py --data /path/to/ILSVRC --gpu 0,1,2,3 -d 18 ./imagenet-resnet.py --data /path/to/original/ILSVRC --gpu 0,1,2,3 -d 18
``` ```
![imagenet](imagenet-resnet.png) ![imagenet](imagenet-resnet.png)
......
...@@ -109,6 +109,11 @@ class Model(ModelDesc): ...@@ -109,6 +109,11 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.01, summary=True)
opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -140,10 +145,8 @@ def get_config(): ...@@ -140,10 +145,8 @@ def get_config():
steps_per_epoch = dataset_train.size() steps_per_epoch = dataset_train.size()
dataset_test = get_data('test') dataset_test = get_data('test')
lr = get_scalar_var('learning_rate', 0.01, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
......
...@@ -114,6 +114,10 @@ class Model(ModelDesc): ...@@ -114,6 +114,10 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost) add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost') self.cost = tf.add_n([loss, wd_cost], name='cost')
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.1, summary=True)
return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
def get_data(train_or_test): def get_data(train_or_test):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -176,14 +180,11 @@ def get_data(train_or_test): ...@@ -176,14 +180,11 @@ def get_data(train_or_test):
def get_config(): def get_config():
# prepare dataset
dataset_train = get_data('train') dataset_train = get_data('train')
dataset_val = get_data('val') dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 0.1, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_val, [ InferenceRunner(dataset_val, [
......
...@@ -66,10 +66,8 @@ def get_config(): ...@@ -66,10 +66,8 @@ def get_config():
steps_per_epoch = dataset_train.size() steps_per_epoch = dataset_train.size()
dataset_test = get_data('test') dataset_test = get_data('test')
lr = get_scalar_var('learning_rate', 0.01, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
......
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# File: embedding_data.py # File: embedding_data.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com> # Author: tensorpack contributors
import numpy as np import numpy as np
from tensorpack.dataflow import dataset, BatchData from tensorpack.dataflow import dataset, BatchData
......
...@@ -53,6 +53,10 @@ class EmbeddingModel(ModelDesc): ...@@ -53,6 +53,10 @@ class EmbeddingModel(ModelDesc):
return embeddings return embeddings
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)
return tf.train.GradientDescentOptimizer(lr)
class SiameseModel(EmbeddingModel): class SiameseModel(EmbeddingModel):
@staticmethod @staticmethod
...@@ -136,8 +140,6 @@ def get_config(model, algorithm_name): ...@@ -136,8 +140,6 @@ def get_config(model, algorithm_name):
dataset = model.get_data() dataset = model.get_data()
steps_per_epoch = dataset.size() steps_per_epoch = dataset.size()
lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)
extra_display = ["cost"] extra_display = ["cost"]
if not algorithm_name == "cosine": if not algorithm_name == "cosine":
extra_display = extra_display + ["loss/pos-dist", "loss/neg-dist"] extra_display = extra_display + ["loss/pos-dist", "loss/neg-dist"]
...@@ -145,7 +147,6 @@ def get_config(model, algorithm_name): ...@@ -145,7 +147,6 @@ def get_config(model, algorithm_name):
return TrainConfig( return TrainConfig(
dataflow=dataset, dataflow=dataset,
model=model(), model=model(),
optimizer=tf.train.GradientDescentOptimizer(lr),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(10, 1e-5), (20, 1e-6)]) ScheduledHyperParamSetter('learning_rate', [(10, 1e-5), (20, 1e-6)])
......
...@@ -85,9 +85,13 @@ class Model(ModelDesc): ...@@ -85,9 +85,13 @@ class Model(ModelDesc):
summary.add_moving_summary(cost, wd_cost) summary.add_moving_summary(cost, wd_cost)
self.cost = tf.add_n([wd_cost, cost], name='cost') self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_gradient_processor(self): def _get_optimizer(self):
return [gradproc.ScaleGradient(('STN.*', 0.1)), lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)
gradproc.SummaryGradient()] opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [
gradproc.ScaleGradient(('STN.*', 0.1)),
gradproc.SummaryGradient()])
def get_data(isTrain): def get_data(isTrain):
...@@ -149,11 +153,8 @@ def get_config(): ...@@ -149,11 +153,8 @@ def get_config():
dataset_train, dataset_test = get_data(True), get_data(False) dataset_train, dataset_test = get_data(True), get_data(False)
steps_per_epoch = dataset_train.size() * 5 steps_per_epoch = dataset_train.size() * 5
lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_test, InferenceRunner(dataset_test,
......
...@@ -71,6 +71,10 @@ class Model(ModelDesc): ...@@ -71,6 +71,10 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-3)
def get_data(train_or_test, cifar_classnum): def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train' isTrain = train_or_test == 'train'
...@@ -111,16 +115,12 @@ def get_config(cifar_classnum): ...@@ -111,16 +115,12 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5) sess_config = get_default_sess_config(0.5)
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
def lr_func(lr): def lr_func(lr):
if lr < 3e-5: if lr < 3e-5:
raise StopTraining() raise StopTraining()
return lr * 0.31 return lr * 0.31
return TrainConfig( return TrainConfig(
dataflow=dataset_train, dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(dataset_test, ClassificationError()), InferenceRunner(dataset_test, ClassificationError()),
......
...@@ -23,7 +23,6 @@ Usage: ...@@ -23,7 +23,6 @@ Usage:
class Model(ModelDesc): class Model(ModelDesc):
def _get_inputs(self): def _get_inputs(self):
return [InputDesc(tf.float32, (None, 227, 227, 3), 'input')] return [InputDesc(tf.float32, (None, 227, 227, 3), 'input')]
......
...@@ -112,6 +112,17 @@ class Model(ModelDesc): ...@@ -112,6 +112,17 @@ class Model(ModelDesc):
('.*/weights', ['histogram', 'rms']) # to also work with slim ('.*/weights', ['histogram', 'rms']) # to also work with slim
) )
def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=468 * 10,
decay_rate=0.3, staircase=True, name='learning_rate')
# This will also put the summary in tensorboard, stat.json and print in terminal
# but this time without moving average
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr)
def get_data(): def get_data():
train = BatchData(dataset.Mnist('train'), 128) train = BatchData(dataset.Mnist('train'), 128)
...@@ -127,20 +138,9 @@ def get_config(): ...@@ -127,20 +138,9 @@ def get_config():
# how many iterations you want in each epoch # how many iterations you want in each epoch
steps_per_epoch = dataset_train.size() steps_per_epoch = dataset_train.size()
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=dataset_train.size() * 10,
decay_rate=0.3, staircase=True, name='learning_rate')
# This will also put the summary in tensorboard,stat.json and print in
# terminal, but without the moving average
tf.summary.scalar('lr', lr)
# get the config which contains everything necessary in a training # get the config which contains everything necessary in a training
return TrainConfig( return TrainConfig(
dataflow=dataset_train, # the DataFlow instance for training dataflow=dataset_train, # the DataFlow instance for training
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[ callbacks=[
ModelSaver(), # save the model after every epoch ModelSaver(), # save the model after every epoch
InferenceRunner( # run inference(for validation) after every epoch InferenceRunner( # run inference(for validation) after every epoch
......
...@@ -59,6 +59,15 @@ class Model(ModelDesc): ...@@ -59,6 +59,15 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost') self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=4721 * 60,
decay_rate=0.2, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr)
def get_data(): def get_data():
d1 = dataset.SVHNDigit('train') d1 = dataset.SVHNDigit('train')
...@@ -86,20 +95,11 @@ def get_data(): ...@@ -86,20 +95,11 @@ def get_data():
def get_config(): def get_config():
logger.auto_set_dir() logger.auto_set_dir()
data_train, data_test = get_data() data_train, data_test = get_data()
steps_per_epoch = data_train.size() steps_per_epoch = data_train.size()
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=data_train.size() * 60,
decay_rate=0.2, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return TrainConfig( return TrainConfig(
dataflow=data_train, dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[ callbacks=[
ModelSaver(), ModelSaver(),
InferenceRunner(data_test, InferenceRunner(data_test,
......
...@@ -12,8 +12,6 @@ from ..utils import logger, INPUTS_KEY, deprecated, log_deprecated ...@@ -12,8 +12,6 @@ from ..utils import logger, INPUTS_KEY, deprecated, log_deprecated
from ..utils.argtools import memoized from ..utils.argtools import memoized
from ..tfutils.modelutils import apply_slim_collections from ..tfutils.modelutils import apply_slim_collections
from ..tfutils.gradproc import CheckGradient
__all__ = ['InputDesc', 'InputVar', 'ModelDesc', 'ModelFromMetaGraph'] __all__ = ['InputDesc', 'InputVar', 'ModelDesc', 'ModelFromMetaGraph']
# TODO "variable" is not the right name to use for input here. # TODO "variable" is not the right name to use for input here.
...@@ -156,12 +154,7 @@ class ModelDesc(object): ...@@ -156,12 +154,7 @@ class ModelDesc(object):
raise NotImplementedError() raise NotImplementedError()
def get_gradient_processor(self): def get_gradient_processor(self):
""" (Deprecated) Return a list of :class:`tensorpack.tfutils.GradientProcessor`. return []
They will be executed by the trainer in the given order.
"""
return [ # SummaryGradient(),
CheckGradient()
]
class ModelFromMetaGraph(ModelDesc): class ModelFromMetaGraph(ModelDesc):
......
...@@ -90,7 +90,7 @@ class PredictorWorkerThread(StoppableThread): ...@@ -90,7 +90,7 @@ class PredictorWorkerThread(StoppableThread):
except tf.errors.CancelledError: except tf.errors.CancelledError:
for f in futures: for f in futures:
f.cancel() f.cancel()
logger.warn("PredictorWorkerThread id={}, call was cancelled.".format(self.id)) logger.warn("In PredictorWorkerThread id={}, call was cancelled.".format(self.id))
return return
# print "Worker {} batched {} Queue {}".format( # print "Worker {} batched {} Queue {}".format(
# self.id, len(futures), self.queue.qsize()) # self.id, len(futures), self.queue.qsize())
......
...@@ -120,6 +120,7 @@ class MapGradient(GradientProcessor): ...@@ -120,6 +120,7 @@ class MapGradient(GradientProcessor):
_summaried_gradient = set() _summaried_gradient = set()
# TODO let the maintain op depend on grad directly ?
class SummaryGradient(MapGradient): class SummaryGradient(MapGradient):
""" """
Summary histogram and RMS for each graident variable. Summary histogram and RMS for each graident variable.
......
...@@ -6,6 +6,8 @@ ...@@ -6,6 +6,8 @@
import tensorflow as tf import tensorflow as tf
from .gradproc import apply_grad_processors as apply_gradproc from .gradproc import apply_grad_processors as apply_gradproc
__all__ = ['apply_grad_processors', 'ProxyOptimizer']
class ProxyOptimizer(tf.train.Optimizer): class ProxyOptimizer(tf.train.Optimizer):
def __init__(self, opt): def __init__(self, opt):
......
...@@ -13,6 +13,7 @@ from ..models import ModelDesc ...@@ -13,6 +13,7 @@ from ..models import ModelDesc
from ..utils import logger, log_deprecated from ..utils import logger, log_deprecated
from ..tfutils import (JustCurrentSession, from ..tfutils import (JustCurrentSession,
get_default_sess_config, SessionInit) get_default_sess_config, SessionInit)
from ..tfutils.optimizer import apply_grad_processors
from .input_data import InputData from .input_data import InputData
__all__ = ['TrainConfig'] __all__ = ['TrainConfig']
...@@ -130,6 +131,9 @@ class TrainConfig(object): ...@@ -130,6 +131,9 @@ class TrainConfig(object):
self.predict_tower = [self.predict_tower] self.predict_tower = [self.predict_tower]
if 'optimizer' in kwargs: if 'optimizer' in kwargs:
log_deprecated("TrainConfig(optimizer=...)",
"Use ModelDesc._get_optimizer() instead.",
"2017-04-12")
self._optimizer = kwargs.pop('optimizer') self._optimizer = kwargs.pop('optimizer')
assert_type(self._optimizer, tf.train.Optimizer) assert_type(self._optimizer, tf.train.Optimizer)
else: else:
...@@ -160,7 +164,14 @@ class TrainConfig(object): ...@@ -160,7 +164,14 @@ class TrainConfig(object):
def optimizer(self): def optimizer(self):
""" for back-compatibilty only. will remove in the future""" """ for back-compatibilty only. will remove in the future"""
if self._optimizer: if self._optimizer:
return self._optimizer opt = self._optimizer
opt = self.model.get_optimizer() else:
self._optimizer = opt opt = self.model.get_optimizer()
gradproc = self.model.get_gradient_processor()
if gradproc:
log_deprecated("ModelDesc.get_gradient_processor()",
"Use gradient processor to build an optimizer instead.", "2017-04-12")
opt = apply_grad_processors(opt, gradproc)
if not self._optimizer:
self._optimizer = opt
return opt return opt
...@@ -7,7 +7,6 @@ import tensorflow as tf ...@@ -7,7 +7,6 @@ import tensorflow as tf
from ..utils import log_deprecated from ..utils import log_deprecated
from ..tfutils.tower import TowerContext from ..tfutils.tower import TowerContext
from ..tfutils.gradproc import apply_grad_processors
from .input_data import QueueInput, FeedfreeInput from .input_data import QueueInput, FeedfreeInput
from .base import Trainer from .base import Trainer
...@@ -98,8 +97,6 @@ class SimpleFeedfreeTrainer( ...@@ -98,8 +97,6 @@ class SimpleFeedfreeTrainer(
super(SimpleFeedfreeTrainer, self)._setup() super(SimpleFeedfreeTrainer, self)._setup()
with TowerContext('', is_training=True): with TowerContext('', is_training=True):
cost, grads = self._get_cost_and_grad() cost, grads = self._get_cost_and_grad()
grads = apply_grad_processors(grads, self.model.get_gradient_processor())
self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op') self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op')
# skip training # skip training
# self.train_op = tf.group(*self.dequed_inputs) # self.train_op = tf.group(*self.dequed_inputs)
......
...@@ -155,8 +155,6 @@ class SyncMultiGPUTrainer(MultiGPUTrainer, ...@@ -155,8 +155,6 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
cost, cost,
gate_gradients=tf.train.Optimizer.GATE_NONE, gate_gradients=tf.train.Optimizer.GATE_NONE,
colocate_gradients_with_ops=True) colocate_gradients_with_ops=True)
grads = apply_grad_processors(grads, self.model.get_gradient_processor())
self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op') self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op')
...@@ -198,12 +196,11 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer, ...@@ -198,12 +196,11 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer,
super(AsyncMultiGPUTrainer, self)._setup() super(AsyncMultiGPUTrainer, self)._setup()
grad_list = MultiGPUTrainer._multi_tower_grads( grad_list = MultiGPUTrainer._multi_tower_grads(
self.config.tower, lambda: self._get_cost_and_grad()[1]) self.config.tower, lambda: self._get_cost_and_grad()[1])
gradprocs = self.model.get_gradient_processor()
if self._scale_gradient and self.config.nr_tower > 1: if self._scale_gradient and self.config.nr_tower > 1:
# pretend to average the grads, in order to make async and # pretend to average the grads, in order to make async and
# sync have consistent effective learning rate # sync have consistent effective learning rate
gradprocs.insert(0, ScaleGradient(('.*', 1.0 / self.config.nr_tower), log=False)) gradproc = ScaleGradient(('.*', 1.0 / self.config.nr_tower), log=False)
grad_list = [apply_grad_processors(g, gradprocs) for g in grad_list] grad_list = [apply_grad_processors(g, [gradproc]) for g in grad_list]
# use grad from the first tower for iteration in main thread # use grad from the first tower for iteration in main thread
self.train_op = self.config.optimizer.apply_gradients(grad_list[0], name='min_op') self.train_op = self.config.optimizer.apply_gradients(grad_list[0], name='min_op')
......
...@@ -10,7 +10,6 @@ from ..utils import SUMMARY_BACKUP_KEYS, PREDICT_TOWER ...@@ -10,7 +10,6 @@ from ..utils import SUMMARY_BACKUP_KEYS, PREDICT_TOWER
from ..tfutils import get_tensors_by_names, TowerContext from ..tfutils import get_tensors_by_names, TowerContext
from ..tfutils.collection import freeze_collection from ..tfutils.collection import freeze_collection
from ..predict import OnlinePredictor, build_prediction_graph from ..predict import OnlinePredictor, build_prediction_graph
from ..tfutils.gradproc import apply_grad_processors
from .input_data import FeedInput from .input_data import FeedInput
__all__ = ['SimpleTrainer', 'MultiPredictorTowerTrainer'] __all__ = ['SimpleTrainer', 'MultiPredictorTowerTrainer']
...@@ -86,9 +85,6 @@ class SimpleTrainer(Trainer): ...@@ -86,9 +85,6 @@ class SimpleTrainer(Trainer):
opt = self.config.optimizer opt = self.config.optimizer
grads = opt.compute_gradients(cost_var) grads = opt.compute_gradients(cost_var)
grads = apply_grad_processors(grads,
self.model.get_gradient_processor())
self.train_op = opt.apply_gradients(grads, name='min_op') self.train_op = opt.apply_gradients(grads, name='min_op')
def _trigger_epoch(self): def _trigger_epoch(self):
......
...@@ -135,7 +135,7 @@ def log_deprecated(name="", text="", eos=""): ...@@ -135,7 +135,7 @@ def log_deprecated(name="", text="", eos=""):
eos = "after " + datetime(*map(int, eos.split("-"))).strftime("%d %b") eos = "after " + datetime(*map(int, eos.split("-"))).strftime("%d %b")
if name: if name:
if eos: if eos:
warn_msg = "%s will be deprecated on %s. %s" % (name, eos, text) warn_msg = "%s will be deprecated %s. %s" % (name, eos, text)
else: else:
warn_msg = "%s was deprecated. %s" % (name, text) warn_msg = "%s was deprecated. %s" % (name, text)
else: else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment