Commit 608ad4a9 authored by Yuxin Wu's avatar Yuxin Wu

use get_scalar_var for learning rate

parent 3facd518
...@@ -158,8 +158,7 @@ def get_config(): ...@@ -158,8 +158,7 @@ def get_config():
reward_clip=(-1, 1), reward_clip=(-1, 1),
history_len=FRAME_HISTORY) history_len=FRAME_HISTORY)
lr = tf.Variable(0.001, trainable=False, name='learning_rate') lr = symbf.get_scalar_var('learning_rate', 1e-3, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
......
...@@ -219,8 +219,7 @@ def get_config(): ...@@ -219,8 +219,7 @@ def get_config():
data_train = get_data('train') data_train = get_data('train')
data_test = get_data('val') data_test = get_data('val')
lr = tf.Variable(1e-4, trainable=False, name='learning_rate') lr = get_scalar_var('learning_rate', 1e-4, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=data_train, dataset=data_train,
......
...@@ -162,9 +162,7 @@ def get_config(): ...@@ -162,9 +162,7 @@ def get_config():
step_per_epoch = dataset_train.size() * 40 step_per_epoch = dataset_train.size() * 40
dataset_val = get_data('val') dataset_val = get_data('val')
lr = tf.Variable(3e-5, trainable=False, name='learning_rate') lr = get_scalar_var('learning_rate', 3e-5, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3), optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
......
...@@ -152,9 +152,7 @@ def get_config(): ...@@ -152,9 +152,7 @@ def get_config():
step_per_epoch = 5000 step_per_epoch = 5000
dataset_val = get_data('val') dataset_val = get_data('val')
lr = tf.Variable(0.045, trainable=False, name='learning_rate') lr = get_scalar_var('learning_rate', 0.045, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9), optimizer=tf.train.MomentumOptimizer(lr, 0.9),
......
...@@ -258,9 +258,7 @@ def get_config(): ...@@ -258,9 +258,7 @@ def get_config():
dataset_train = get_data('train') dataset_train = get_data('train')
dataset_val = get_data('val') dataset_val = get_data('val')
lr = tf.Variable(0.045, trainable=False, name='learning_rate') lr = get_scalar_var('learning_rate', 0.045, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3), optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
......
...@@ -188,9 +188,7 @@ def get_config(): ...@@ -188,9 +188,7 @@ def get_config():
master = MySimulatorMaster(namec2s, names2c, M) master = MySimulatorMaster(namec2s, names2c, M)
dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE) dataflow = BatchData(DataFromQueue(master.queue), BATCH_SIZE)
lr = tf.Variable(0.001, trainable=False, name='learning_rate') lr = symbf.get_scalar_var('learning_rate', 0.001, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataflow, dataset=dataflow,
optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3), optimizer=tf.train.AdamOptimizer(lr, epsilon=1e-3),
...@@ -200,9 +198,6 @@ def get_config(): ...@@ -200,9 +198,6 @@ def get_config():
ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]), ScheduledHyperParamSetter('entropy_beta', [(80, 0.005)]),
ScheduledHyperParamSetter('explore_factor', ScheduledHyperParamSetter('explore_factor',
[(80, 2), (100, 3), (120, 4), (140, 5)]), [(80, 2), (100, 3), (120, 4), (140, 5)]),
HumanHyperParamSetter('learning_rate'),
HumanHyperParamSetter('entropy_beta'),
HumanHyperParamSetter('explore_factor'),
master, master,
StartProcOrThread(master), StartProcOrThread(master),
PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 2), PeriodicCallback(Evaluator(EVAL_EPISODE, ['state'], ['logits']), 2),
......
...@@ -142,9 +142,7 @@ def get_config(): ...@@ -142,9 +142,7 @@ def get_config():
step_per_epoch = dataset_train.size() step_per_epoch = dataset_train.size()
dataset_test = get_data('test') dataset_test = get_data('test')
lr = tf.Variable(0.01, trainable=False, name='learning_rate') lr = get_scalar_var('learning_rate', 0.01, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9), optimizer=tf.train.MomentumOptimizer(lr, 0.9),
...@@ -155,7 +153,6 @@ def get_config(): ...@@ -155,7 +153,6 @@ def get_config():
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)]) [(1, 0.1), (82, 0.01), (123, 0.001), (300, 0.0002)])
]), ]),
session_config=get_default_sess_config(0.9),
model=Model(n=18), model=Model(n=18),
step_per_epoch=step_per_epoch, step_per_epoch=step_per_epoch,
max_epoch=400, max_epoch=400,
......
...@@ -19,7 +19,7 @@ from tensorpack.tfutils.summary import * ...@@ -19,7 +19,7 @@ from tensorpack.tfutils.summary import *
""" """
Training code of Pre-Activation version of ResNet on ImageNet. Training code of Pre-Activation version of ResNet on ImageNet.
Mainly follow the setup in fb.resnet.torch It mainly follows the setup in fb.resnet.torch, and get similar performance.
""" """
TOTAL_BATCH_SIZE = 256 TOTAL_BATCH_SIZE = 256
...@@ -116,8 +116,7 @@ class Model(ModelDesc): ...@@ -116,8 +116,7 @@ class Model(ModelDesc):
wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))
wd_w = 1e-4 wd_cost = tf.mul(1e-4, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
wd_cost = tf.mul(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss')
add_moving_summary(loss, wd_cost) add_moving_summary(loss, wd_cost)
self.cost = tf.add_n([loss, wd_cost], name='cost') self.cost = tf.add_n([loss, wd_cost], name='cost')
...@@ -186,11 +185,7 @@ def get_config(): ...@@ -186,11 +185,7 @@ def get_config():
dataset_train = get_data('train') dataset_train = get_data('train')
dataset_val = get_data('val') dataset_val = get_data('val')
sess_config = get_default_sess_config(0.99) lr = get_scalar_var('learning_rate', 0.1, summary=True)
lr = tf.Variable(0.1, trainable=False, name='learning_rate')
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True), optimizer=tf.train.MomentumOptimizer(lr, 0.9, use_nesterov=True),
...@@ -203,7 +198,6 @@ def get_config(): ...@@ -203,7 +198,6 @@ def get_config():
[(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]), [(30, 1e-2), (60, 1e-3), (85, 1e-4), (95, 1e-5)]),
HumanHyperParamSetter('learning_rate'), HumanHyperParamSetter('learning_rate'),
]), ]),
session_config=sess_config,
model=Model(), model=Model(),
step_per_epoch=5000, step_per_epoch=5000,
max_epoch=110, max_epoch=110,
......
...@@ -64,11 +64,7 @@ def get_config(): ...@@ -64,11 +64,7 @@ def get_config():
step_per_epoch = dataset_train.size() step_per_epoch = dataset_train.size()
dataset_test = get_data('test') dataset_test = get_data('test')
sess_config = get_default_sess_config(0.9) lr = get_scalar_var('learning_rate', 0.01, summary=True)
lr = tf.Variable(0.1, trainable=False, name='learning_rate')
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
optimizer=tf.train.MomentumOptimizer(lr, 0.9), optimizer=tf.train.MomentumOptimizer(lr, 0.9),
...@@ -80,7 +76,6 @@ def get_config(): ...@@ -80,7 +76,6 @@ def get_config():
ScheduledHyperParamSetter('learning_rate', ScheduledHyperParamSetter('learning_rate',
[(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)]) [(1, 0.1), (20, 0.01), (28, 0.001), (50, 0.0001)])
]), ]),
session_config=sess_config,
model=Model(n=18), model=Model(n=18),
step_per_epoch=step_per_epoch, step_per_epoch=step_per_epoch,
max_epoch=500, max_epoch=500,
......
...@@ -144,8 +144,7 @@ def get_config(): ...@@ -144,8 +144,7 @@ def get_config():
dataset_train, dataset_test = get_data(True), get_data(False) dataset_train, dataset_test = get_data(True), get_data(False)
step_per_epoch = dataset_train.size() * 5 step_per_epoch = dataset_train.size() * 5
lr = symbolic_functions.get_scalar_var('learning_rate', 5e-4) lr = symbf.get_scalar_var('learning_rate', 5e-4, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=dataset_train, dataset=dataset_train,
......
...@@ -112,8 +112,7 @@ def get_config(): ...@@ -112,8 +112,7 @@ def get_config():
ds = BatchData(ds, param.batch_size) ds = BatchData(ds, param.batch_size)
step_per_epoch = ds.size() step_per_epoch = ds.size()
lr = tf.Variable(2e-3, trainable=False, name='learning_rate') lr = symbolic_functions.get_scalar_var('learning_rate', 2e-3, summary=True)
tf.scalar_summary('learning_rate', lr)
return TrainConfig( return TrainConfig(
dataset=ds, dataset=ds,
......
...@@ -110,9 +110,7 @@ def get_config(cifar_classnum): ...@@ -110,9 +110,7 @@ def get_config(cifar_classnum):
sess_config = get_default_sess_config(0.5) sess_config = get_default_sess_config(0.5)
lr = tf.Variable(1e-2, name='learning_rate', lr = symbf.get_scalar_var('learning_rate', 1e-2, summary=True)
dtype=tf.float32, trainable=False)
tf.scalar_summary('learning_rate', lr)
def lr_func(lr): def lr_func(lr):
if lr < 3e-5: if lr < 3e-5:
raise StopTraining() raise StopTraining()
......
...@@ -87,9 +87,9 @@ class OfflinePredictor(OnlinePredictor): ...@@ -87,9 +87,9 @@ class OfflinePredictor(OnlinePredictor):
def __init__(self, config): def __init__(self, config):
self.graph = tf.Graph() self.graph = tf.Graph()
with self.graph.as_default(): with self.graph.as_default():
input_vars = config.model.get_input_vars() input_placehdrs = config.model.get_input_vars()
with TowerContext('', False): with TowerContext('', False):
config.model.build_graph(input_vars) config.model.build_graph(input_placehdrs)
input_vars = get_tensors_by_names(config.input_names) input_vars = get_tensors_by_names(config.input_names)
output_vars = get_tensors_by_names(config.output_names) output_vars = get_tensors_by_names(config.output_names)
......
...@@ -56,7 +56,11 @@ class SimpleDatasetPredictor(DatasetPredictorBase): ...@@ -56,7 +56,11 @@ class SimpleDatasetPredictor(DatasetPredictorBase):
def get_result(self): def get_result(self):
""" A generator to produce prediction for each data""" """ A generator to produce prediction for each data"""
with tqdm(total=self.dataset.size()) as pbar: try:
sz = self.dataset.size()
except NotImplementedError:
sz = 0
with tqdm(total=sz) as pbar:
for dp in self.dataset.get_data(): for dp in self.dataset.get_data():
res = self.predictor(dp) res = self.predictor(dp)
yield res yield res
...@@ -111,7 +115,11 @@ class MultiProcessDatasetPredictor(DatasetPredictorBase): ...@@ -111,7 +115,11 @@ class MultiProcessDatasetPredictor(DatasetPredictorBase):
ensure_proc_terminate(self.workers + [self.result_queue, self.inqueue_proc]) ensure_proc_terminate(self.workers + [self.result_queue, self.inqueue_proc])
def get_result(self): def get_result(self):
with tqdm(total=self.dataset.size()) as pbar: try:
sz = self.dataset.size()
except NotImplementedError:
sz = 0
with tqdm(total=sz) as pbar:
die_cnt = 0 die_cnt = 0
while True: while True:
res = self.result_queue.get() res = self.result_queue.get()
......
...@@ -21,12 +21,12 @@ __all__ = ['get_default_sess_config', ...@@ -21,12 +21,12 @@ __all__ = ['get_default_sess_config',
'clear_collection', 'clear_collection',
'freeze_collection'] 'freeze_collection']
def get_default_sess_config(mem_fraction=0.9): def get_default_sess_config(mem_fraction=0.99):
""" """
Return a better session config to use as default. Return a better session config to use as default.
Tensorflow default session config consume too much resources. Tensorflow default session config consume too much resources.
:param mem_fraction: fraction of memory to use. :param mem_fraction: fraction of memory to use. default to 0.99
:returns: a `tf.ConfigProto` object. :returns: a `tf.ConfigProto` object.
""" """
conf = tf.ConfigProto() conf = tf.ConfigProto()
......
...@@ -104,7 +104,14 @@ def huber_loss(x, delta=1, name='huber_loss'): ...@@ -104,7 +104,14 @@ def huber_loss(x, delta=1, name='huber_loss'):
abscost * delta - 0.5 * delta ** 2), abscost * delta - 0.5 * delta ** 2),
name=name) name=name)
def get_scalar_var(name, init_value): def get_scalar_var(name, init_value, summary=False, trainable=False):
return tf.get_variable(name, shape=[], """
get a scalar variable with certain initial value
:param summary: summary this variable
"""
ret = tf.get_variable(name, shape=[],
initializer=tf.constant_initializer(init_value), initializer=tf.constant_initializer(init_value),
trainable=False) trainable=trainable)
if summary:
tf.scalar_summary(name, ret)
return ret
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment