Commit d1041a77 authored by Yuxin Wu's avatar Yuxin Wu

change interface for optimizer. (#137)

parent e0b1a5ce
......@@ -24,6 +24,7 @@ from tensorpack.utils.concurrency import *
from tensorpack.utils.serialize import *
from tensorpack.utils.stats import *
from tensorpack.tfutils import symbolic_functions as symbf
from tensorpack.tfutils.gradproc import MapGradient, SummaryGradient
from tensorpack.RL import *
from simulator import *
......@@ -132,9 +133,14 @@ class Model(ModelDesc):
summary.add_moving_summary(policy_loss, xentropy_loss,
value_loss, pred_reward, advantage, self.cost)
def get_gradient_processor(self):
return [gradproc.MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
gradprocs = [MapGradient(lambda grad: tf.clip_by_average_norm(grad, 0.1)),
SummaryGradient()]
opt = optimizer.apply_grad_processors(opt, gradprocs)
return opt
class MySimulatorMaster(SimulatorMaster, Callback):
......@@ -202,11 +208,8 @@ def get_config():
master = MySimulatorMaster(namec2s, names2c, M)
dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
return TrainConfig(
dataflow=dataflow,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(80, 0.0003), (120, 0.0001)]),
......@@ -269,8 +272,7 @@ if __name__ == '__main__':
logger.warn("Without GPU this model will never learn! CPU is only useful for debug.")
nr_gpu = 0
PREDICTOR_THREAD = 1
predict_tower = [0]
train_tower = [0]
predict_tower, train_tower = [0], [0]
trainer = QueueInputTrainer
config = get_config()
if args.load:
......
......@@ -14,7 +14,7 @@ import six
from six.moves import map, range
from tensorpack import *
from tensorpack.tfutils.gradproc import *
from tensorpack.tfutils.gradproc import SummaryGradient, GlobalNormClip
from tensorpack.utils.globvars import globalns as param
import tensorpack.tfutils.symbolic_functions as symbf
from timitdata import TIMITBatch
......@@ -73,8 +73,11 @@ class Model(ModelDesc):
err = tf.reduce_mean(err, name='error')
summary.add_moving_summary(err, self.cost)
def get_gradient_processor(self):
return [gradproc.GlobalNormClip(5), gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [GlobalNormClip(5), SummaryGradient()])
def get_data(path, isTrain, stat_file):
......@@ -88,13 +91,8 @@ def get_data(path, isTrain, stat_file):
def get_config(ds_train, ds_test):
steps_per_epoch = ds_train.size()
lr = symbolic_functions.get_scalar_var('learning_rate', 5e-3, summary=True)
return TrainConfig(
dataflow=ds_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
StatMonitorParamSetter('learning_rate', 'error',
......@@ -105,7 +103,6 @@ def get_config(ds_train, ds_test):
every_k_epochs=2),
],
model=Model(),
steps_per_epoch=steps_per_epoch,
max_epoch=70,
)
......
......@@ -14,7 +14,7 @@ import six
from six.moves import map, range
from tensorpack import *
from tensorpack.tfutils.gradproc import *
from tensorpack.tfutils.gradproc import GlobalNormClip
from tensorpack.utils.lut import LookUpTable
from tensorpack.utils.globvars import globalns as param
rnn = tf.contrib.rnn
......@@ -42,7 +42,7 @@ class CharRNNData(RNGDataFlow):
data = f.read()
if six.PY2:
data = bytearray(data)
data = [chr(c) for c in data if c < 128] # TODO this is Py2 only
data = [chr(c) for c in data if c < 128]
counter = Counter(data)
char_cnt = sorted(counter.items(), key=operator.itemgetter(1), reverse=True)
self.chars = [x[0] for x in char_cnt]
......@@ -105,8 +105,10 @@ class Model(ModelDesc):
summary.add_param_summary(('.*/W', ['histogram'])) # monitor histogram of all W
summary.add_moving_summary(self.cost)
def get_gradient_processor(self):
return [gradproc.GlobalNormClip(5)]
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)
opt = tf.train.AdamOptimizer(lr)
return optimizer.apply_grad_processors(opt, [GlobalNormClip(5)])
def get_config():
......@@ -116,11 +118,8 @@ def get_config():
ds = BatchData(ds, param.batch_size)
steps_per_epoch = ds.size()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)
return TrainConfig(
dataflow=ds,
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(25, 2e-4)])
......
......@@ -149,9 +149,11 @@ class Model(ModelDesc):
ops.append(v.assign(tf.get_default_graph().get_tensor_by_name(new_name + ':0')))
return tf.group(*ops, name='update_target_network')
def get_gradient_processor(self):
return [gradproc.GlobalNormalClip(10),
gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [gradproc.GlobalNormalClip(10), gradproc.SummaryGradient()])
def get_config():
......@@ -171,11 +173,8 @@ def get_config():
reward_clip=(-1, 1),
history_len=FRAME_HISTORY)
lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate',
......@@ -186,7 +185,7 @@ def get_config():
# HumanHyperParamSetter('learning_rate', 'hyper.txt'),
# HumanHyperParamSetter(ObjAttrParam(dataset_train, 'exploration'), 'hyper.txt'),
],
# save memory for multiprocess evaluator
# save memory for multi-thread evaluator
session_config=get_default_sess_config(0.6),
model=M,
steps_per_epoch=STEP_PER_EPOCH,
......
......@@ -41,7 +41,6 @@ def play_model(cfg):
def eval_with_funcs(predict_funcs, nr_eval):
class Worker(StoppableThread):
def __init__(self, func, queue):
super(Worker, self).__init__()
self._func = func
......
......@@ -7,12 +7,12 @@ from tensorpack import ProxyDataFlow, get_rng
class DisturbLabel(ProxyDataFlow):
def __init__(self, ds, prob):
super(DisturbLabel, self).__init__(ds)
self.prob = prob
def reset_state(self):
super(DisturbLabel, self).reset_state()
self.rng = get_rng(self)
def get_data(self):
......
......@@ -161,6 +161,10 @@ class Model(ModelDesc):
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 1e-4, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)
def get_data(dataset_name):
isTrain = dataset_name == 'train'
......@@ -225,16 +229,11 @@ def get_data(dataset_name):
def get_config():
logger.auto_set_dir()
# prepare dataset
data_train = get_data('train')
data_test = get_data('val')
lr = get_scalar_var('learning_rate', 1e-4, summary=True)
return TrainConfig(
dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=[
ModelSaver(),
# HumanHyperParamSetter('learning_rate'),
......
......@@ -125,6 +125,15 @@ class Model(ModelDesc):
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(cost, wd_cost, self.cost)
def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=4721 * 100,
decay_rate=0.5, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr, epsilon=1e-5)
def get_config():
logger.auto_set_dir()
......@@ -146,29 +155,19 @@ def get_config():
data_train = AugmentImageComponent(data_train, augmentors)
data_train = BatchData(data_train, 128)
data_train = PrefetchDataZMQ(data_train, 5)
steps_per_epoch = data_train.size()
augmentors = [imgaug.Resize((40, 40))]
data_test = AugmentImageComponent(data_test, augmentors)
data_test = BatchData(data_test, 128, remainder=True)
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=data_train.size() * 100,
decay_rate=0.5, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return TrainConfig(
dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-5),
callbacks=[
ModelSaver(),
InferenceRunner(data_test,
[ScalarStats('cost'), ClassificationError()])
],
model=Model(),
steps_per_epoch=steps_per_epoch,
max_epoch=200,
)
......
......@@ -90,20 +90,21 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg)
self.collect_variables()
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
def get_data():
ds = ConcatData([dataset.Mnist('train'), dataset.Mnist('test')])
ds = BatchData(ds, BATCH)
return ds
return BatchData(ds, BATCH)
def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5),
model=Model(),
......
......@@ -88,6 +88,10 @@ class Model(GANModelDesc):
self.build_losses(vecpos, vecneg)
self.collect_variables()
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
def get_data():
global args
......@@ -104,10 +108,8 @@ def get_data():
def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5),
model=Model(),
......
......@@ -65,12 +65,6 @@ class GANModelDesc(ModelDesc):
add_moving_summary(self.g_loss, self.d_loss, self.d_accuracy, self.g_accuracy)
def get_gradient_processor_g(self):
return [CheckGradient()]
def get_gradient_processor_d(self):
return [CheckGradient()]
class GANTrainer(FeedfreeTrainerBase):
def __init__(self, config):
......@@ -86,16 +80,12 @@ class GANTrainer(FeedfreeTrainerBase):
# optimize G
grads = self.config.optimizer.compute_gradients(
self.model.g_loss, var_list=self.model.g_vars)
grads = apply_grad_processors(
grads, self.model.get_gradient_processor_g())
self.g_min = self.config.optimizer.apply_gradients(grads, name='g_op')
# optimize D
with tf.control_dependencies([self.g_min]):
grads = self.config.optimizer.compute_gradients(
self.model.d_loss, var_list=self.model.d_vars)
grads = apply_grad_processors(
grads, self.model.get_gradient_processor_d())
self.d_min = self.config.optimizer.apply_gradients(grads, name='d_op')
self.train_op = self.d_min
......
......@@ -130,6 +130,10 @@ class Model(GANModelDesc):
self.collect_variables()
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3)
def split_input(img):
"""
......@@ -167,10 +171,8 @@ def get_data():
def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbolic_functions.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-3),
callbacks=[
PeriodicTrigger(ModelSaver(), every_k_epochs=3),
ScheduledHyperParamSetter('learning_rate', [(200, 1e-4)])
......
......@@ -146,9 +146,12 @@ class Model(GANModelDesc):
# distinguish between variables of generator and discriminator updates
self.collect_variables()
def get_gradient_processor_g(self):
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
opt = tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6)
# generator learns 5 times faster
return [gradproc.ScaleGradient(('.*', 5), log=False)]
return optimizer.apply_grad_processors(
opt, [gradproc.ScaleGradient(('.*', 5), log=False)])
def get_data():
......@@ -159,11 +162,8 @@ def get_data():
def get_config():
logger.auto_set_dir()
dataset = get_data()
lr = symbf.get_scalar_var('learning_rate', 2e-4, summary=True)
return TrainConfig(
dataflow=dataset,
optimizer=tf.train.AdamOptimizer(lr, beta1=0.5, epsilon=1e-6),
dataflow=get_data(),
callbacks=[ModelSaver()],
session_config=get_default_sess_config(0.5),
model=Model(),
......
......@@ -92,9 +92,12 @@ class Model(ModelDesc):
self.cost = tf.add_n(costs, name='cost')
add_moving_summary(costs + [wrong, self.cost])
def get_gradient_processor(self):
return [gradproc.ScaleGradient([
('convfcweight.*', 0.1), ('conv5_.*', 5)])]
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 3e-5, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [gradproc.ScaleGradient(
[('convfcweight.*', 0.1), ('conv5_.*', 5)])])
def get_data(name):
......@@ -102,7 +105,6 @@ def get_data(name):
ds = dataset.BSDS500(name, shuffle=True)
class CropMultiple16(imgaug.ImageAugmentor):
def _get_augment_params(self, img):
newh = img.shape[0] // 16 * 16
neww = img.shape[1] // 16 * 16
......@@ -132,7 +134,7 @@ def get_data(name):
shape_aug = [imgaug.CenterCrop(IMAGE_SHAPE)]
ds = AugmentImageComponents(ds, shape_aug, (0, 1))
def f(m):
def f(m): # thresholding
m[m >= 0.50] = 1
m[m < 0.50] = 0
return m
......@@ -169,10 +171,8 @@ def get_config():
steps_per_epoch = dataset_train.size() * 40
dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 3e-5, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(30, 6e-6), (45, 1e-6), (60, 8e-7)]),
......
......@@ -120,6 +120,10 @@ class Model(ModelDesc):
self.cost = tf.add_n([cost, wd_cost], name='cost')
add_moving_summary(wd_cost, self.cost)
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return tf.train.MomentumOptimizer(lr, 0.9)
def get_data(train_or_test):
isTrain = train_or_test == 'train'
......@@ -157,10 +161,8 @@ def get_config():
steps_per_epoch = 5000
dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_val, [
......
......@@ -8,13 +8,12 @@ import argparse
import numpy as np
import os
import tensorflow as tf
import multiprocessing
from tensorpack import *
from tensorpack.tfutils.symbolic_functions import *
from tensorpack.tfutils.summary import *
import multiprocessing
"""
InceptionV3 on ILSVRC12.
See "Rethinking the Inception Architecture for Computer Vision", arxiv:1512.00567
......@@ -195,6 +194,10 @@ class Model(ModelDesc):
self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost')
add_moving_summary(loss1, loss2, wd_cost, self.cost)
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-3)
def get_data(train_or_test):
isTrain = train_or_test == 'train'
......@@ -261,10 +264,8 @@ def get_config():
dataset_train = get_data('train')
dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 0.045, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_val, [
......
......@@ -100,8 +100,11 @@ class Model(ModelDesc):
s[1].c.assign(z),
s[1].h.assign(z))
def get_gradient_processor(self):
return [gradproc.GlobalNormClip(5)]
def _get_optimizer(self):
lr = symbolic_functions.get_scalar_var('learning_rate', 1, summary=True)
opt = tf.train.GradientDescentOptimizer(lr)
return optimizer.apply_grad_processors(
opt, [gradproc.GlobalNormClip(5)])
def get_config():
......@@ -120,12 +123,9 @@ def get_config():
(data3[1].shape[0] // BATCH - 1) // SEQ_LEN)
M = Model()
lr = symbolic_functions.get_scalar_var('learning_rate', 1, summary=True)
return TrainConfig(
data=train_data,
model=M,
optimizer=tf.train.GradientDescentOptimizer(lr),
callbacks=[
ModelSaver(),
HyperParamSetterWithFunc(
......
......@@ -12,8 +12,9 @@ Models can be [downloaded here](https://goo.gl/6XjK9V).
| ResNet 50 | 7.13% | 24.12% |
| ResNet 101 | 6.54% | 22.89% |
To train, just run:
```bash
./imagenet-resnet.py --data /path/to/ILSVRC --gpu 0,1,2,3 -d 18
./imagenet-resnet.py --data /path/to/original/ILSVRC --gpu 0,1,2,3 -d 18
```
![imagenet](imagenet-resnet.png)
......
......@@ -109,6 +109,11 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.01, summary=True)
opt = tf.train.MomentumOptimizer(lr, 0.9)
return opt
def get_data(train_or_test):
isTrain = train_or_test == 'train'
......@@ -140,10 +145,8 @@ def get_config():
steps_per_epoch = dataset_train.size()
dataset_test = get_data('test')
lr = get_scalar_var('learning_rate', 0.01, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_test,
......
......@@ -114,6 +114,10 @@ class Model(ModelDesc):
add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost')
def _get_optimizer(self):
lr = get_scalar_var('learning_rate', 0.1, summary=True)
return tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True)
def get_data(train_or_test):
isTrain = train_or_test == 'train'
......@@ -176,14 +180,11 @@ def get_data(train_or_test):
def get_config():
# prepare dataset
dataset_train = get_data('train')
dataset_val = get_data('val')
lr = get_scalar_var('learning_rate', 0.1, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_val, [
......
......@@ -66,10 +66,8 @@ def get_config():
steps_per_epoch = dataset_train.size()
dataset_test = get_data('test')
lr = get_scalar_var('learning_rate', 0.01, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_test,
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: embedding_data.py
# Author: Yuxin Wu <ppwwyyxxc@gmail.com>
# Author: tensorpack contributors
import numpy as np
from tensorpack.dataflow import dataset, BatchData
......
......@@ -53,6 +53,10 @@ class EmbeddingModel(ModelDesc):
return embeddings
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)
return tf.train.GradientDescentOptimizer(lr)
class SiameseModel(EmbeddingModel):
@staticmethod
......@@ -136,8 +140,6 @@ def get_config(model, algorithm_name):
dataset = model.get_data()
steps_per_epoch = dataset.size()
lr = symbf.get_scalar_var('learning_rate', 1e-4, summary=True)
extra_display = ["cost"]
if not algorithm_name == "cosine":
extra_display = extra_display + ["loss/pos-dist", "loss/neg-dist"]
......@@ -145,7 +147,6 @@ def get_config(model, algorithm_name):
return TrainConfig(
dataflow=dataset,
model=model(),
optimizer=tf.train.GradientDescentOptimizer(lr),
callbacks=[
ModelSaver(),
ScheduledHyperParamSetter('learning_rate', [(10, 1e-5), (20, 1e-6)])
......
......@@ -85,9 +85,13 @@ class Model(ModelDesc):
summary.add_moving_summary(cost, wd_cost)
self.cost = tf.add_n([wd_cost, cost], name='cost')
def get_gradient_processor(self):
return [gradproc.ScaleGradient(('STN.*', 0.1)),
gradproc.SummaryGradient()]
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)
opt = tf.train.AdamOptimizer(lr, epsilon=1e-3)
return optimizer.apply_grad_processors(
opt, [
gradproc.ScaleGradient(('STN.*', 0.1)),
gradproc.SummaryGradient()])
def get_data(isTrain):
......@@ -149,11 +153,8 @@ def get_config():
dataset_train, dataset_test = get_data(True), get_data(False)
steps_per_epoch = dataset_train.size() * 5
lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_test,
......
......@@ -71,6 +71,10 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
return tf.train.AdamOptimizer(lr, epsilon=1e-3)
def get_data(train_or_test, cifar_classnum):
isTrain = train_or_test == 'train'
......@@ -111,16 +115,12 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5)
lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
def lr_func(lr):
if lr < 3e-5:
raise StopTraining()
return lr * 0.31
return TrainConfig(
dataflow=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
callbacks=[
ModelSaver(),
InferenceRunner(dataset_test, ClassificationError()),
......
......@@ -23,7 +23,6 @@ Usage:
class Model(ModelDesc):
def _get_inputs(self):
return [InputDesc(tf.float32, (None, 227, 227, 3), 'input')]
......
......@@ -112,6 +112,17 @@ class Model(ModelDesc):
('.*/weights', ['histogram', 'rms']) # to also work with slim
)
def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=468 * 10,
decay_rate=0.3, staircase=True, name='learning_rate')
# This will also put the summary in tensorboard, stat.json and print in terminal
# but this time without moving average
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr)
def get_data():
train = BatchData(dataset.Mnist('train'), 128)
......@@ -127,20 +138,9 @@ def get_config():
# how many iterations you want in each epoch
steps_per_epoch = dataset_train.size()
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=dataset_train.size() * 10,
decay_rate=0.3, staircase=True, name='learning_rate')
# This will also put the summary in tensorboard,stat.json and print in
# terminal, but without the moving average
tf.summary.scalar('lr', lr)
# get the config which contains everything necessary in a training
return TrainConfig(
dataflow=dataset_train, # the DataFlow instance for training
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[
ModelSaver(), # save the model after every epoch
InferenceRunner( # run inference(for validation) after every epoch
......
......@@ -59,6 +59,15 @@ class Model(ModelDesc):
add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W
self.cost = tf.add_n([cost, wd_cost], name='cost')
def _get_optimizer(self):
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=4721 * 60,
decay_rate=0.2, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return tf.train.AdamOptimizer(lr)
def get_data():
d1 = dataset.SVHNDigit('train')
......@@ -86,20 +95,11 @@ def get_data():
def get_config():
logger.auto_set_dir()
data_train, data_test = get_data()
steps_per_epoch = data_train.size()
lr = tf.train.exponential_decay(
learning_rate=1e-3,
global_step=get_global_step_var(),
decay_steps=data_train.size() * 60,
decay_rate=0.2, staircase=True, name='learning_rate')
tf.summary.scalar('lr', lr)
return TrainConfig(
dataflow=data_train,
optimizer=tf.train.AdamOptimizer(lr),
callbacks=[
ModelSaver(),
InferenceRunner(data_test,
......
......@@ -12,8 +12,6 @@ from ..utils import logger, INPUTS_KEY, deprecated, log_deprecated
from ..utils.argtools import memoized
from ..tfutils.modelutils import apply_slim_collections
from ..tfutils.gradproc import CheckGradient
__all__ = ['InputDesc', 'InputVar', 'ModelDesc', 'ModelFromMetaGraph']
# TODO "variable" is not the right name to use for input here.
......@@ -156,12 +154,7 @@ class ModelDesc(object):
raise NotImplementedError()
def get_gradient_processor(self):
""" (Deprecated) Return a list of :class:`tensorpack.tfutils.GradientProcessor`.
They will be executed by the trainer in the given order.
"""
return [ # SummaryGradient(),
CheckGradient()
]
return []
class ModelFromMetaGraph(ModelDesc):
......
......@@ -90,7 +90,7 @@ class PredictorWorkerThread(StoppableThread):
except tf.errors.CancelledError:
for f in futures:
f.cancel()
logger.warn("PredictorWorkerThread id={}, call was cancelled.".format(self.id))
logger.warn("In PredictorWorkerThread id={}, call was cancelled.".format(self.id))
return
# print "Worker {} batched {} Queue {}".format(
# self.id, len(futures), self.queue.qsize())
......
......@@ -120,6 +120,7 @@ class MapGradient(GradientProcessor):
_summaried_gradient = set()
# TODO let the maintain op depend on grad directly ?
class SummaryGradient(MapGradient):
"""
Summary histogram and RMS for each graident variable.
......
......@@ -6,6 +6,8 @@
import tensorflow as tf
from .gradproc import apply_grad_processors as apply_gradproc
__all__ = ['apply_grad_processors', 'ProxyOptimizer']
class ProxyOptimizer(tf.train.Optimizer):
def __init__(self, opt):
......
......@@ -13,6 +13,7 @@ from ..models import ModelDesc
from ..utils import logger, log_deprecated
from ..tfutils import (JustCurrentSession,
get_default_sess_config, SessionInit)
from ..tfutils.optimizer import apply_grad_processors
from .input_data import InputData
__all__ = ['TrainConfig']
......@@ -130,6 +131,9 @@ class TrainConfig(object):
self.predict_tower = [self.predict_tower]
if 'optimizer' in kwargs:
log_deprecated("TrainConfig(optimizer=...)",
"Use ModelDesc._get_optimizer() instead.",
"2017-04-12")
self._optimizer = kwargs.pop('optimizer')
assert_type(self._optimizer, tf.train.Optimizer)
else:
......@@ -160,7 +164,14 @@ class TrainConfig(object):
def optimizer(self):
""" for back-compatibilty only. will remove in the future"""
if self._optimizer:
return self._optimizer
opt = self._optimizer
else:
opt = self.model.get_optimizer()
gradproc = self.model.get_gradient_processor()
if gradproc:
log_deprecated("ModelDesc.get_gradient_processor()",
"Use gradient processor to build an optimizer instead.", "2017-04-12")
opt = apply_grad_processors(opt, gradproc)
if not self._optimizer:
self._optimizer = opt
return opt
......@@ -7,7 +7,6 @@ import tensorflow as tf
from ..utils import log_deprecated
from ..tfutils.tower import TowerContext
from ..tfutils.gradproc import apply_grad_processors
from .input_data import QueueInput, FeedfreeInput
from .base import Trainer
......@@ -98,8 +97,6 @@ class SimpleFeedfreeTrainer(
super(SimpleFeedfreeTrainer, self)._setup()
with TowerContext('', is_training=True):
cost, grads = self._get_cost_and_grad()
grads = apply_grad_processors(grads, self.model.get_gradient_processor())
self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op')
# skip training
# self.train_op = tf.group(*self.dequed_inputs)
......
......@@ -155,8 +155,6 @@ class SyncMultiGPUTrainer(MultiGPUTrainer,
cost,
gate_gradients=tf.train.Optimizer.GATE_NONE,
colocate_gradients_with_ops=True)
grads = apply_grad_processors(grads, self.model.get_gradient_processor())
self.train_op = self.config.optimizer.apply_gradients(grads, name='min_op')
......@@ -198,12 +196,11 @@ class AsyncMultiGPUTrainer(MultiGPUTrainer,
super(AsyncMultiGPUTrainer, self)._setup()
grad_list = MultiGPUTrainer._multi_tower_grads(
self.config.tower, lambda: self._get_cost_and_grad()[1])
gradprocs = self.model.get_gradient_processor()
if self._scale_gradient and self.config.nr_tower > 1:
# pretend to average the grads, in order to make async and
# sync have consistent effective learning rate
gradprocs.insert(0, ScaleGradient(('.*', 1.0 / self.config.nr_tower), log=False))
grad_list = [apply_grad_processors(g, gradprocs) for g in grad_list]
gradproc = ScaleGradient(('.*', 1.0 / self.config.nr_tower), log=False)
grad_list = [apply_grad_processors(g, [gradproc]) for g in grad_list]
# use grad from the first tower for iteration in main thread
self.train_op = self.config.optimizer.apply_gradients(grad_list[0], name='min_op')
......
......@@ -10,7 +10,6 @@ from ..utils import SUMMARY_BACKUP_KEYS, PREDICT_TOWER
from ..tfutils import get_tensors_by_names, TowerContext
from ..tfutils.collection import freeze_collection
from ..predict import OnlinePredictor, build_prediction_graph
from ..tfutils.gradproc import apply_grad_processors
from .input_data import FeedInput
__all__ = ['SimpleTrainer', 'MultiPredictorTowerTrainer']
......@@ -86,9 +85,6 @@ class SimpleTrainer(Trainer):
opt = self.config.optimizer
grads = opt.compute_gradients(cost_var)
grads = apply_grad_processors(grads,
self.model.get_gradient_processor())
self.train_op = opt.apply_gradients(grads, name='min_op')
def _trigger_epoch(self):
......
......@@ -135,7 +135,7 @@ def log_deprecated(name="", text="", eos=""):
eos = "after " + datetime(*map(int, eos.split("-"))).strftime("%d %b")
if name:
if eos:
warn_msg = "%s will be deprecated on %s. %s" % (name, eos, text)
warn_msg = "%s will be deprecated %s. %s" % (name, eos, text)
else:
warn_msg = "%s was deprecated. %s" % (name, text)
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment